summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2018-06-20 00:10:58 +0200
committerDarrick J. Wong <darrick.wong@oracle.com>2018-06-20 18:32:41 +0200
commitc03cea42149de56fbae2301d7123daaa2cfe80e2 (patch)
treefd4293998868758f4e522165babcd4ecbff417a4 /fs
parentiomap: add an iomap-based readpage and readpages implementation (diff)
downloadlinux-c03cea42149de56fbae2301d7123daaa2cfe80e2.tar.xz
linux-c03cea42149de56fbae2301d7123daaa2cfe80e2.zip
iomap: add initial support for writes without buffer heads
For now just limited to blocksize == PAGE_SIZE, where we can simply read in the full page in write begin, and just set the whole page dirty after copying data into it. This code is enabled by default and XFS will now be feed pages without buffer heads in ->writepage and ->writepages. If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old path will still be used, this both helps the transition in XFS and prepares for the gfs2 migration to the iomap infrastructure. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/iomap.c115
-rw-r--r--fs/xfs/xfs_iomap.c6
2 files changed, 112 insertions, 9 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 4f10c6b1cf6d..2ebff76039b5 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -349,6 +349,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
}
static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+ unsigned poff, unsigned plen, unsigned from, unsigned to,
+ struct iomap *iomap)
+{
+ struct bio_vec bvec;
+ struct bio bio;
+
+ if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+ zero_user_segments(page, poff, from, to, poff + plen);
+ return 0;
+ }
+
+ bio_init(&bio, &bvec, 1);
+ bio.bi_opf = REQ_OP_READ;
+ bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+ bio_set_dev(&bio, iomap->bdev);
+ __bio_add_page(&bio, page, plen, poff);
+ return submit_bio_wait(&bio);
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+ struct page *page, struct iomap *iomap)
+{
+ loff_t block_size = i_blocksize(inode);
+ loff_t block_start = pos & ~(block_size - 1);
+ loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+ unsigned poff = block_start & (PAGE_SIZE - 1);
+ unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+ unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+
+ WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+
+ if (PageUptodate(page))
+ return 0;
+ if (from <= poff && to >= poff + plen)
+ return 0;
+ return iomap_read_page_sync(inode, block_start, page,
+ poff, plen, from, to, iomap);
+}
+
+static int
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, struct iomap *iomap)
{
@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if (iomap->type == IOMAP_INLINE)
iomap_read_inline_data(inode, page, iomap);
- else
+ else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(page, pos, len, NULL, iomap);
-
+ else
+ status = __iomap_write_begin(inode, pos, len, page, iomap);
if (unlikely(status)) {
unlock_page(page);
put_page(page);
@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return status;
}
+int
+iomap_set_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ /*
+ * Lock out page->mem_cgroup migration to keep PageDirty
+ * synchronized with per-memcg dirty page counters.
+ */
+ lock_page_memcg(page);
+ newly_dirty = !TestSetPageDirty(page);
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 0);
+ unlock_page_memcg(page);
+
+ if (newly_dirty)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ return newly_dirty;
+}
+EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+ unsigned copied, struct page *page, struct iomap *iomap)
+{
+ flush_dcache_page(page);
+
+ /*
+ * The blocks that were entirely written will now be uptodate, so we
+ * don't have to worry about a readpage reading them and overwriting a
+ * partial write. However if we have encountered a short write and only
+ * partially written into a block, it will not be marked uptodate, so a
+ * readpage might come in and destroy our partial write.
+ *
+ * Do the simplest thing, and just treat any short write to a non
+ * uptodate page as a zero-length write, and force the caller to redo
+ * the whole thing.
+ */
+ if (unlikely(copied < len && !PageUptodate(page))) {
+ copied = 0;
+ } else {
+ SetPageUptodate(page);
+ iomap_set_page_dirty(page);
+ }
+ return __generic_write_end(inode, pos, copied, page);
+}
+
static int
iomap_write_end_inline(struct inode *inode, struct page *page,
struct iomap *iomap, loff_t pos, unsigned copied)
@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
if (iomap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
- } else {
+ } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
ret = generic_write_end(NULL, inode->i_mapping, pos, len,
copied, page, NULL);
+ } else {
+ ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
if (iomap->page_done)
@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data;
int ret;
- ret = __block_write_begin_int(page, pos, length, NULL, iomap);
- if (ret)
- return ret;
+ if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+ ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+ if (ret)
+ return ret;
+ block_commit_write(page, 0, length);
+ } else {
+ WARN_ON_ONCE(!PageUptodate(page));
+ WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+ }
- block_commit_write(page, 0, length);
return length;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..8a3613d576af 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -626,7 +626,7 @@ retry:
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
- iomap->flags = IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin(
if (error)
return error;
- iomap->flags = IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
out_finish: