diff options
Diffstat (limited to 'fs/iomap/buffered-io.c')
-rw-r--r-- | fs/iomap/buffered-io.c | 516 |
1 files changed, 250 insertions, 266 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 41da4f14c00b..9cc5798423d1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -36,7 +36,7 @@ static inline struct iomap_page *to_iomap_page(struct page *page) { /* * per-block data is stored in the head page. Callers should - * not be dealing with tail pages (and if they are, they can + * not be dealing with tail pages, and if they are, they can * call thp_head() first. */ VM_BUG_ON_PGFLAGS(PageTail(page), page); @@ -98,7 +98,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, unsigned last = (poff + plen - 1) >> block_bits; /* - * If the block size is smaller than the page size we need to check the + * If the block size is smaller than the page size, we need to check the * per-block uptodate status and adjust the offset and length if needed * to avoid reading in already uptodate ranges. */ @@ -126,7 +126,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, } /* - * If the extent spans the block that contains the i_size we need to + * If the extent spans the block that contains the i_size, we need to * handle both halves separately so that we properly zero data in the * page cache for blocks that are entirely outside of i_size. */ @@ -205,58 +205,67 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -static void -iomap_read_inline_data(struct inode *inode, struct page *page, - struct iomap *iomap) +static loff_t iomap_read_inline_data(const struct iomap_iter *iter, + struct page *page) { - size_t size = i_size_read(inode); + const struct iomap *iomap = iomap_iter_srcmap(iter); + size_t size = i_size_read(iter->inode) - iomap->offset; + size_t poff = offset_in_page(iomap->offset); void *addr; if (PageUptodate(page)) - return; - - BUG_ON(page->index); - BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); - - addr = kmap_atomic(page); + return PAGE_SIZE - poff; + + if (WARN_ON_ONCE(size > PAGE_SIZE - poff)) + return -EIO; + if (WARN_ON_ONCE(size > PAGE_SIZE - + offset_in_page(iomap->inline_data))) + return -EIO; + if (WARN_ON_ONCE(size > iomap->length)) + return -EIO; + if (poff > 0) + iomap_page_create(iter->inode, page); + + addr = kmap_local_page(page) + poff; memcpy(addr, iomap->inline_data, size); - memset(addr + size, 0, PAGE_SIZE - size); - kunmap_atomic(addr); - SetPageUptodate(page); + memset(addr + size, 0, PAGE_SIZE - poff - size); + kunmap_local(addr); + iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff); + return PAGE_SIZE - poff; } -static inline bool iomap_block_needs_zeroing(struct inode *inode, - struct iomap *iomap, loff_t pos) +static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, + loff_t pos) { - return iomap->type != IOMAP_MAPPED || - (iomap->flags & IOMAP_F_NEW) || - pos >= i_size_read(inode); + const struct iomap *srcmap = iomap_iter_srcmap(iter); + + return srcmap->type != IOMAP_MAPPED || + (srcmap->flags & IOMAP_F_NEW) || + pos >= i_size_read(iter->inode); } -static loff_t -iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, - struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_readpage_iter(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx, loff_t offset) { - struct iomap_readpage_ctx *ctx = data; + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos + offset; + loff_t length = iomap_length(iter) - offset; struct page *page = ctx->cur_page; - struct iomap_page *iop = iomap_page_create(inode, page); - bool same_page = false, is_contig = false; + struct iomap_page *iop; loff_t orig_pos = pos; unsigned poff, plen; sector_t sector; - if (iomap->type == IOMAP_INLINE) { - WARN_ON_ONCE(pos); - iomap_read_inline_data(inode, page, iomap); - return PAGE_SIZE; - } + if (iomap->type == IOMAP_INLINE) + return min(iomap_read_inline_data(iter, page), length); /* zero post-eof blocks as the page may be mapped */ - iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); + iop = iomap_page_create(iter->inode, page); + iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen); if (plen == 0) goto done; - if (iomap_block_needs_zeroing(inode, iomap, pos)) { + if (iomap_block_needs_zeroing(iter, pos)) { zero_user(page, poff, plen); iomap_set_range_uptodate(page, poff, plen); goto done; @@ -266,16 +275,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (iop) atomic_add(plen, &iop->read_bytes_pending); - /* Try to merge into a previous segment if we can */ sector = iomap_sector(iomap, pos); - if (ctx->bio && bio_end_sector(ctx->bio) == sector) { - if (__bio_try_merge_page(ctx->bio, page, plen, poff, - &same_page)) - goto done; - is_contig = true; - } - - if (!is_contig || bio_full(ctx->bio, plen)) { + if (!ctx->bio || + bio_end_sector(ctx->bio) != sector || + bio_add_page(ctx->bio, page, plen, poff) != plen) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); @@ -299,13 +302,12 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ctx->bio->bi_iter.bi_sector = sector; bio_set_dev(ctx->bio, iomap->bdev); ctx->bio->bi_end_io = iomap_read_end_io; + __bio_add_page(ctx->bio, page, plen, poff); } - - bio_add_page(ctx->bio, page, plen, poff); done: /* * Move the caller beyond our range so that it keeps making progress. - * For that we have to include any leading non-uptodate ranges, but + * For that, we have to include any leading non-uptodate ranges, but * we can skip trailing ones as they will be handled in the next * iteration. */ @@ -315,23 +317,23 @@ done: int iomap_readpage(struct page *page, const struct iomap_ops *ops) { - struct iomap_readpage_ctx ctx = { .cur_page = page }; - struct inode *inode = page->mapping->host; - unsigned poff; - loff_t ret; + struct iomap_iter iter = { + .inode = page->mapping->host, + .pos = page_offset(page), + .len = PAGE_SIZE, + }; + struct iomap_readpage_ctx ctx = { + .cur_page = page, + }; + int ret; trace_iomap_readpage(page->mapping->host, 1); - for (poff = 0; poff < PAGE_SIZE; poff += ret) { - ret = iomap_apply(inode, page_offset(page) + poff, - PAGE_SIZE - poff, 0, ops, &ctx, - iomap_readpage_actor); - if (ret <= 0) { - WARN_ON_ONCE(ret == 0); - SetPageError(page); - break; - } - } + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_readpage_iter(&iter, &ctx, 0); + + if (ret < 0) + SetPageError(page); if (ctx.bio) { submit_bio(ctx.bio); @@ -342,23 +344,22 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) } /* - * Just like mpage_readahead and block_read_full_page we always + * Just like mpage_readahead and block_read_full_page, we always * return 0 and just mark the page as PageError on errors. This - * should be cleaned up all through the stack eventually. + * should be cleaned up throughout the stack eventually. */ return 0; } EXPORT_SYMBOL_GPL(iomap_readpage); -static loff_t -iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, - void *data, struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_readahead_iter(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx) { - struct iomap_readpage_ctx *ctx = data; + loff_t length = iomap_length(iter); loff_t done, ret; for (done = 0; done < length; done += ret) { - if (ctx->cur_page && offset_in_page(pos + done) == 0) { + if (ctx->cur_page && offset_in_page(iter->pos + done) == 0) { if (!ctx->cur_page_in_bio) unlock_page(ctx->cur_page); put_page(ctx->cur_page); @@ -368,8 +369,7 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, ctx->cur_page = readahead_page(ctx->rac); ctx->cur_page_in_bio = false; } - ret = iomap_readpage_actor(inode, pos + done, length - done, - ctx, iomap, srcmap); + ret = iomap_readpage_iter(iter, ctx, done); } return done; @@ -392,25 +392,19 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, */ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) { - struct inode *inode = rac->mapping->host; - loff_t pos = readahead_pos(rac); - size_t length = readahead_length(rac); + struct iomap_iter iter = { + .inode = rac->mapping->host, + .pos = readahead_pos(rac), + .len = readahead_length(rac), + }; struct iomap_readpage_ctx ctx = { .rac = rac, }; - trace_iomap_readahead(inode, readahead_count(rac)); + trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); - while (length > 0) { - ssize_t ret = iomap_apply(inode, pos, length, 0, ops, - &ctx, iomap_readahead_actor); - if (ret <= 0) { - WARN_ON_ONCE(ret == 0); - break; - } - pos += ret; - length -= ret; - } + while (iomap_iter(&iter, ops) > 0) + iter.processed = iomap_readahead_iter(&iter, &ctx); if (ctx.bio) submit_bio(ctx.bio); @@ -465,7 +459,7 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask) /* * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to - * ->releasepage() via shrink_active_list(), skip those here. + * ->releasepage() via shrink_active_list(); skip those here. */ if (PageDirty(page) || PageWriteback(page)) return 0; @@ -480,7 +474,7 @@ iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) trace_iomap_invalidatepage(page->mapping->host, offset, len); /* - * If we are invalidating the entire page, clear the dirty state from it + * If we're invalidating the entire page, clear the dirty state from it * and release it to avoid unnecessary buildup of the LRU. */ if (offset == 0 && len == PAGE_SIZE) { @@ -514,10 +508,6 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, EXPORT_SYMBOL_GPL(iomap_migrate_page); #endif /* CONFIG_MIGRATION */ -enum { - IOMAP_WRITE_F_UNSHARE = (1 << 0), -}; - static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { @@ -533,7 +523,7 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) static int iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, - unsigned plen, struct iomap *iomap) + unsigned plen, const struct iomap *iomap) { struct bio_vec bvec; struct bio bio; @@ -546,12 +536,12 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, return submit_bio_wait(&bio); } -static int -__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, - struct page *page, struct iomap *srcmap) +static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, + unsigned len, struct page *page) { - struct iomap_page *iop = iomap_page_create(inode, page); - loff_t block_size = i_blocksize(inode); + const struct iomap *srcmap = iomap_iter_srcmap(iter); + struct iomap_page *iop = iomap_page_create(iter->inode, page); + loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); unsigned from = offset_in_page(pos), to = from + len, poff, plen; @@ -561,18 +551,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, ClearPageError(page); do { - iomap_adjust_read_range(inode, iop, &block_start, + iomap_adjust_read_range(iter->inode, iop, &block_start, block_end - block_start, &poff, &plen); if (plen == 0) break; - if (!(flags & IOMAP_WRITE_F_UNSHARE) && + if (!(iter->flags & IOMAP_UNSHARE) && (from <= poff || from >= poff + plen) && (to <= poff || to >= poff + plen)) continue; - if (iomap_block_needs_zeroing(inode, srcmap, block_start)) { - if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE)) + if (iomap_block_needs_zeroing(iter, block_start)) { + if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) return -EIO; zero_user_segments(page, poff, from, to, poff + plen); } else { @@ -587,41 +577,54 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, return 0; } -static int -iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, struct iomap *iomap, struct iomap *srcmap) +static int iomap_write_begin_inline(const struct iomap_iter *iter, + struct page *page) +{ + int ret; + + /* needs more work for the tailpacking case; disable for now */ + if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) + return -EIO; + ret = iomap_read_inline_data(iter, page); + if (ret < 0) + return ret; + return 0; +} + +static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, + unsigned len, struct page **pagep) { - const struct iomap_page_ops *page_ops = iomap->page_ops; + const struct iomap_page_ops *page_ops = iter->iomap.page_ops; + const struct iomap *srcmap = iomap_iter_srcmap(iter); struct page *page; int status = 0; - BUG_ON(pos + len > iomap->offset + iomap->length); - if (srcmap != iomap) + BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); + if (srcmap != &iter->iomap) BUG_ON(pos + len > srcmap->offset + srcmap->length); if (fatal_signal_pending(current)) return -EINTR; if (page_ops && page_ops->page_prepare) { - status = page_ops->page_prepare(inode, pos, len, iomap); + status = page_ops->page_prepare(iter->inode, pos, len); if (status) return status; } - page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT, - AOP_FLAG_NOFS); + page = grab_cache_page_write_begin(iter->inode->i_mapping, + pos >> PAGE_SHIFT, AOP_FLAG_NOFS); if (!page) { status = -ENOMEM; goto out_no_page; } if (srcmap->type == IOMAP_INLINE) - iomap_read_inline_data(inode, page, srcmap); - else if (iomap->flags & IOMAP_F_BUFFER_HEAD) + status = iomap_write_begin_inline(iter, page); + else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(page, pos, len, NULL, srcmap); else - status = __iomap_write_begin(inode, pos, len, flags, page, - srcmap); + status = __iomap_write_begin(iter, pos, len, page); if (unlikely(status)) goto out_unlock; @@ -632,11 +635,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, out_unlock: unlock_page(page); put_page(page); - iomap_write_failed(inode, pos, len); + iomap_write_failed(iter->inode, pos, len); out_no_page: if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, 0, NULL, iomap); + page_ops->page_done(iter->inode, pos, 0, NULL); return status; } @@ -648,13 +651,13 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, /* * The blocks that were entirely written will now be uptodate, so we * don't have to worry about a readpage reading them and overwriting a - * partial write. However if we have encountered a short write and only + * partial write. However, if we've encountered a short write and only * partially written into a block, it will not be marked uptodate, so a * readpage might come in and destroy our partial write. * - * Do the simplest thing, and just treat any short write to a non - * uptodate page as a zero-length write, and force the caller to redo - * the whole thing. + * Do the simplest thing and just treat any short write to a + * non-uptodate page as a zero-length write, and force the caller to + * redo the whole thing. */ if (unlikely(copied < len && !PageUptodate(page))) return 0; @@ -663,39 +666,40 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, return copied; } -static size_t iomap_write_end_inline(struct inode *inode, struct page *page, - struct iomap *iomap, loff_t pos, size_t copied) +static size_t iomap_write_end_inline(const struct iomap_iter *iter, + struct page *page, loff_t pos, size_t copied) { + const struct iomap *iomap = &iter->iomap; void *addr; WARN_ON_ONCE(!PageUptodate(page)); - BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); + BUG_ON(!iomap_inline_data_valid(iomap)); flush_dcache_page(page); - addr = kmap_atomic(page); - memcpy(iomap->inline_data + pos, addr + pos, copied); - kunmap_atomic(addr); + addr = kmap_local_page(page) + pos; + memcpy(iomap_inline_data(iomap, pos), addr, copied); + kunmap_local(addr); - mark_inode_dirty(inode); + mark_inode_dirty(iter->inode); return copied; } /* Returns the number of bytes copied. May be 0. Cannot be an errno. */ -static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len, - size_t copied, struct page *page, struct iomap *iomap, - struct iomap *srcmap) +static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, + size_t copied, struct page *page) { - const struct iomap_page_ops *page_ops = iomap->page_ops; - loff_t old_size = inode->i_size; + const struct iomap_page_ops *page_ops = iter->iomap.page_ops; + const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t old_size = iter->inode->i_size; size_t ret; if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(inode, page, iomap, pos, copied); + ret = iomap_write_end_inline(iter, page, pos, copied); } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - ret = block_write_end(NULL, inode->i_mapping, pos, len, copied, - page, NULL); + ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, + copied, page, NULL); } else { - ret = __iomap_write_end(inode, pos, len, copied, page); + ret = __iomap_write_end(iter->inode, pos, len, copied, page); } /* @@ -704,29 +708,28 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len, * preferably after I/O completion so that no stale data is exposed. */ if (pos + ret > old_size) { - i_size_write(inode, pos + ret); - iomap->flags |= IOMAP_F_SIZE_CHANGED; + i_size_write(iter->inode, pos + ret); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } unlock_page(page); if (old_size < pos) - pagecache_isize_extended(inode, old_size, pos); + pagecache_isize_extended(iter->inode, old_size, pos); if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, ret, page, iomap); + page_ops->page_done(iter->inode, pos, ret, page); put_page(page); if (ret < len) - iomap_write_failed(inode, pos, len); + iomap_write_failed(iter->inode, pos, len); return ret; } -static loff_t -iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, - struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) { - struct iov_iter *i = data; - long status = 0; + loff_t length = iomap_length(iter); + loff_t pos = iter->pos; ssize_t written = 0; + long status = 0; do { struct page *page; @@ -742,7 +745,7 @@ again: bytes = length; /* - * Bring in the user page that we will copy from _first_. + * Bring in the user page that we'll copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. @@ -752,18 +755,16 @@ again: break; } - status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, - srcmap); + status = iomap_write_begin(iter, pos, bytes, &page); if (unlikely(status)) break; - if (mapping_writably_mapped(inode->i_mapping)) + if (mapping_writably_mapped(iter->inode->i_mapping)) flush_dcache_page(page); copied = copy_page_from_iter_atomic(page, offset, bytes, i); - status = iomap_write_end(inode, pos, bytes, copied, page, iomap, - srcmap); + status = iomap_write_end(iter, pos, bytes, copied, page); if (unlikely(copied != status)) iov_iter_revert(i, copied - status); @@ -784,36 +785,38 @@ again: written += status; length -= status; - balance_dirty_pages_ratelimited(inode->i_mapping); + balance_dirty_pages_ratelimited(iter->inode->i_mapping); } while (iov_iter_count(i) && length); return written ? written : status; } ssize_t -iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, +iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, const struct iomap_ops *ops) { - struct inode *inode = iocb->ki_filp->f_mapping->host; - loff_t pos = iocb->ki_pos, ret = 0, written = 0; - - while (iov_iter_count(iter)) { - ret = iomap_apply(inode, pos, iov_iter_count(iter), - IOMAP_WRITE, ops, iter, iomap_write_actor); - if (ret <= 0) - break; - pos += ret; - written += ret; - } + struct iomap_iter iter = { + .inode = iocb->ki_filp->f_mapping->host, + .pos = iocb->ki_pos, + .len = iov_iter_count(i), + .flags = IOMAP_WRITE, + }; + int ret; - return written ? written : ret; + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_write_iter(&iter, i); + if (iter.pos == iocb->ki_pos) + return ret; + return iter.pos - iocb->ki_pos; } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); -static loff_t -iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, - struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_unshare_iter(struct iomap_iter *iter) { + struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; + loff_t length = iomap_length(iter); long status = 0; loff_t written = 0; @@ -829,13 +832,11 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length); struct page *page; - status = iomap_write_begin(inode, pos, bytes, - IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap); + status = iomap_write_begin(iter, pos, bytes, &page); if (unlikely(status)) return status; - status = iomap_write_end(inode, pos, bytes, bytes, page, iomap, - srcmap); + status = iomap_write_end(iter, pos, bytes, bytes, page); if (WARN_ON_ONCE(status == 0)) return -EIO; @@ -845,7 +846,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, written += status; length -= status; - balance_dirty_pages_ratelimited(inode->i_mapping); + balance_dirty_pages_ratelimited(iter->inode->i_mapping); } while (length); return written; @@ -855,44 +856,43 @@ int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops) { - loff_t ret; - - while (len) { - ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL, - iomap_unshare_actor); - if (ret <= 0) - return ret; - pos += ret; - len -= ret; - } + struct iomap_iter iter = { + .inode = inode, + .pos = pos, + .len = len, + .flags = IOMAP_WRITE | IOMAP_UNSHARE, + }; + int ret; - return 0; + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_unshare_iter(&iter); + return ret; } EXPORT_SYMBOL_GPL(iomap_file_unshare); -static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length, - struct iomap *iomap, struct iomap *srcmap) +static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) { struct page *page; int status; unsigned offset = offset_in_page(pos); unsigned bytes = min_t(u64, PAGE_SIZE - offset, length); - status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap); + status = iomap_write_begin(iter, pos, bytes, &page); if (status) return status; zero_user(page, offset, bytes); mark_page_accessed(page); - return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap); + return iomap_write_end(iter, pos, bytes, bytes, page); } -static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos, - loff_t length, void *data, struct iomap *iomap, - struct iomap *srcmap) +static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { - bool *did_zero = data; + struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; + loff_t length = iomap_length(iter); loff_t written = 0; /* already zeroed? we're done. */ @@ -902,10 +902,10 @@ static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos, do { s64 bytes; - if (IS_DAX(inode)) + if (IS_DAX(iter->inode)) bytes = dax_iomap_zero(pos, length, iomap); else - bytes = iomap_zero(inode, pos, length, iomap, srcmap); + bytes = __iomap_zero_iter(iter, pos, length); if (bytes < 0) return bytes; @@ -923,19 +923,17 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops) { - loff_t ret; - - while (len > 0) { - ret = iomap_apply(inode, pos, len, IOMAP_ZERO, - ops, did_zero, iomap_zero_range_actor); - if (ret <= 0) - return ret; - - pos += ret; - len -= ret; - } + struct iomap_iter iter = { + .inode = inode, + .pos = pos, + .len = len, + .flags = IOMAP_ZERO, + }; + int ret; - return 0; + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_zero_iter(&iter, did_zero); + return ret; } EXPORT_SYMBOL_GPL(iomap_zero_range); @@ -953,21 +951,20 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, } EXPORT_SYMBOL_GPL(iomap_truncate_page); -static loff_t -iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, - void *data, struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter, + struct page *page) { - struct page *page = data; + loff_t length = iomap_length(iter); int ret; - if (iomap->flags & IOMAP_F_BUFFER_HEAD) { - ret = __block_write_begin_int(page, pos, length, NULL, iomap); + if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { + ret = __block_write_begin_int(page, iter->pos, length, NULL, + &iter->iomap); if (ret) return ret; block_commit_write(page, 0, length); } else { WARN_ON_ONCE(!PageUptodate(page)); - iomap_page_create(inode, page); set_page_dirty(page); } @@ -976,29 +973,24 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) { + struct iomap_iter iter = { + .inode = file_inode(vmf->vma->vm_file), + .flags = IOMAP_WRITE | IOMAP_FAULT, + }; struct page *page = vmf->page; - struct inode *inode = file_inode(vmf->vma->vm_file); - unsigned long length; - loff_t offset; ssize_t ret; lock_page(page); - ret = page_mkwrite_check_truncate(page, inode); + ret = page_mkwrite_check_truncate(page, iter.inode); if (ret < 0) goto out_unlock; - length = ret; - - offset = page_offset(page); - while (length > 0) { - ret = iomap_apply(inode, offset, length, - IOMAP_WRITE | IOMAP_FAULT, ops, page, - iomap_page_mkwrite_actor); - if (unlikely(ret <= 0)) - goto out_unlock; - offset += ret; - length -= ret; - } + iter.pos = page_offset(page); + iter.len = ret; + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_page_mkwrite_iter(&iter, page); + if (ret < 0) + goto out_unlock; wait_for_stable_page(page); return VM_FAULT_LOCKED; out_unlock: @@ -1015,7 +1007,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page, if (error) { SetPageError(page); - mapping_set_error(inode->i_mapping, -EIO); + mapping_set_error(inode->i_mapping, error); } WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop); @@ -1152,7 +1144,7 @@ static void iomap_writepage_end_bio(struct bio *bio) * Submit the final bio for an ioend. * * If @error is non-zero, it means that we have a situation where some part of - * the submission process has failed after we have marked paged for writeback + * the submission process has failed after we've marked pages for writeback * and unlocked them. In this situation, we need to fail the bio instead of * submitting it. This typically only happens on a filesystem shutdown. */ @@ -1167,7 +1159,7 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend, error = wpc->ops->prepare_ioend(ioend, error); if (error) { /* - * If we are failing the IO now, just mark the ioend with an + * If we're failing the IO now, just mark the ioend with an * error and finish it. This will run IO completion immediately * as there is only one reference to the ioend at this point in * time. @@ -1209,7 +1201,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, /* * Allocate a new bio, and chain the old bio to the new one. * - * Note that we have to do perform the chaining in this unintuitive order + * Note that we have to perform the chaining in this unintuitive order * so that the bi_private linkage is set up in the right direction for the * traversal in iomap_finish_ioend(). */ @@ -1248,7 +1240,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset, /* * Test to see if we have an existing ioend structure that we could append to - * first, otherwise finish off the current ioend and start another. + * first; otherwise finish off the current ioend and start another. */ static void iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, @@ -1258,7 +1250,6 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, sector_t sector = iomap_sector(&wpc->iomap, offset); unsigned len = i_blocksize(inode); unsigned poff = offset & (PAGE_SIZE - 1); - bool merged, same_page = false; if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) { if (wpc->ioend) @@ -1266,19 +1257,13 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc); } - merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, - &same_page); - if (iop) - atomic_add(len, &iop->write_bytes_pending); - - if (!merged) { - if (bio_full(wpc->ioend->io_bio, len)) { - wpc->ioend->io_bio = - iomap_chain_bio(wpc->ioend->io_bio); - } - bio_add_page(wpc->ioend->io_bio, page, len, poff); + if (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len) { + wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio); + __bio_add_page(wpc->ioend->io_bio, page, len, poff); } + if (iop) + atomic_add(len, &iop->write_bytes_pending); wpc->ioend->io_size += len; wbc_account_cgroup_owner(wbc, page, len); } @@ -1286,9 +1271,9 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, /* * We implement an immediate ioend submission policy here to avoid needing to * chain multiple ioends and hence nest mempool allocations which can violate - * forward progress guarantees we need to provide. The current ioend we are - * adding blocks to is cached on the writepage context, and if the new block - * does not append to the cached ioend it will create a new ioend and cache that + * the forward progress guarantees we need to provide. The current ioend we're + * adding blocks to is cached in the writepage context, and if the new block + * doesn't append to the cached ioend, it will create a new ioend and cache that * instead. * * If a new ioend is created and cached, the old ioend is returned and queued @@ -1304,14 +1289,13 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, struct page *page, u64 end_offset) { - struct iomap_page *iop = to_iomap_page(page); + struct iomap_page *iop = iomap_page_create(inode, page); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); u64 file_offset; /* file offset of page */ int error = 0, count = 0, i; LIST_HEAD(submit_list); - WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0); /* @@ -1351,7 +1335,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, if (unlikely(error)) { /* * Let the filesystem know what portion of the current page - * failed to map. If the page wasn't been added to ioend, it + * failed to map. If the page hasn't been added to ioend, it * won't be affected by I/O completion and we must unlock it * now. */ @@ -1368,7 +1352,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, unlock_page(page); /* - * Preserve the original error if there was one, otherwise catch + * Preserve the original error if there was one; catch * submission errors here and propagate into subsequent ioend * submissions. */ @@ -1395,8 +1379,8 @@ done: /* * Write out a dirty page. * - * For delalloc space on the page we need to allocate space and flush it. - * For unwritten space on the page we need to start the conversion to + * For delalloc space on the page, we need to allocate space and flush it. + * For unwritten space on the page, we need to start the conversion to * regular allocated space. */ static int @@ -1411,7 +1395,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE); /* - * Refuse to write the page out if we are called from reclaim context. + * Refuse to write the page out if we're called from reclaim context. * * This avoids stack overflows when called from deeply used stacks in * random callers for direct reclaim or memcg reclaim. We explicitly @@ -1456,20 +1440,20 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) unsigned offset_into_page = offset & (PAGE_SIZE - 1); /* - * Skip the page if it is fully outside i_size, e.g. due to a - * truncate operation that is in progress. We must redirty the + * Skip the page if it's fully outside i_size, e.g. due to a + * truncate operation that's in progress. We must redirty the * page so that reclaim stops reclaiming it. Otherwise * iomap_vm_releasepage() is called on it and gets confused. * - * Note that the end_index is unsigned long, it would overflow - * if the given offset is greater than 16TB on 32-bit system - * and if we do check the page is fully outside i_size or not - * via "if (page->index >= end_index + 1)" as "end_index + 1" - * will be evaluated to 0. Hence this page will be redirtied - * and be written out repeatedly which would result in an - * infinite loop, the user program that perform this operation - * will hang. Instead, we can verify this situation by checking - * if the page to write is totally beyond the i_size or if it's + * Note that the end_index is unsigned long. If the given + * offset is greater than 16TB on a 32-bit system then if we + * checked if the page is fully outside i_size with + * "if (page->index >= end_index + 1)", "end_index + 1" would + * overflow and evaluate to 0. Hence this page would be + * redirtied and written out repeatedly, which would result in + * an infinite loop; the user program performing this operation + * would hang. Instead, we can detect this situation by + * checking if the page is totally beyond i_size or if its * offset is just equal to the EOF. */ if (page->index > end_index || |