summaryrefslogtreecommitdiffstats
path: root/fs/nfs/write.c
diff options
context:
space:
mode:
authorWeston Andros Adamson <dros@primarydata.com>2014-05-15 17:56:45 +0200
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-05-29 17:11:44 +0200
commit2bfc6e566daa8386c9cffef2f7de17fc330d3835 (patch)
treea615bb7091787ad574c5b31bcd6a30a5bfb8c2f9 /fs/nfs/write.c
parentnfs: call nfs_can_coalesce_requests for every req (diff)
downloadlinux-2bfc6e566daa8386c9cffef2f7de17fc330d3835.tar.xz
linux-2bfc6e566daa8386c9cffef2f7de17fc330d3835.zip
nfs: add support for multiple nfs reqs per page
Add "page groups" - a circular list of nfs requests (struct nfs_page) that all reference the same page. This gives nfs read and write paths the ability to account for sub-page regions independently. This somewhat follows the design of struct buffer_head's sub-page accounting. Only "head" requests are ever added/removed from the inode list in the buffered write path. "head" and "sub" requests are treated the same through the read path and the rest of the write/commit path. Requests are given an extra reference across the life of the list. Page groups are never rejoined after being split. If the read/write request fails and the client falls back to another path (ie revert to MDS in PNFS case), the already split requests are pushed through the recoalescing code again, which may split them further and then coalesce them into properly sized requests on the wire. Fragmentation shouldn't be a problem with the current design, because we flush all requests in page group when a non-contiguous request is added, so the only time resplitting should occur is on a resend of a read or write. This patch lays the groundwork for sub-page splitting, but does not actually do any splitting. For now all page groups have one request as pg_test functions don't yet split pages. There are several related patches that are needed support multiple requests per page group. Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs/write.c')
-rw-r--r--fs/nfs/write.c13
1 files changed, 12 insertions, 1 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e773df207c05..d0f30f12a8b3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
/* Lock the request! */
nfs_lock_request(req);
@@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
+ set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
+ bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -596,6 +600,7 @@ remove_req:
next:
nfs_unlock_request(req);
nfs_end_page_writeback(req->wb_page);
+ do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
if (req == NULL)
goto out_unlock;
+ /* should be handled by nfs_flush_incompatible */
+ WARN_ON_ONCE(req->wb_head != req);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
rqend = req->wb_offset + req->wb_bytes;
/*
* Tell the caller to flush out the request if
@@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, page, offset, bytes);
+ req = nfs_create_request(ctx, page, NULL, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx;
+ /* for now, flush if more than 1 request in page_group */
+ do_flush |= req->wb_this_page != req;
if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid;