From f3ddee8dc4e2cff37936afbeed2fdaa95b7fb7c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Apr 2018 14:17:25 +0100 Subject: afs: Fix directory handling AFS directories are structured blobs that are downloaded just like files and then parsed by the lookup and readdir code and, as such, are currently handled in the pagecache like any other file, with the entire directory content being thrown away each time the directory changes. However, since the blob is a known structure and since the data version counter on a directory increases by exactly one for each change committed to that directory, we can actually edit the directory locally rather than fetching it from the server after each locally-induced change. What we can't do, though, is mix data from the server and data from the client since the server is technically at liberty to rearrange or compress a directory if it sees fit, provided it updates the data version number when it does so and breaks the callback (ie. sends a notification). Further, lookup with lookup-ahead, readdir and, when it arrives, local editing are likely want to scan the whole of a directory. So directory handling needs to be improved to maintain the coherency of the directory blob prior to permitting local directory editing. To this end: (1) If any directory page gets discarded, invalidate and reread the entire directory. (2) If readpage notes that if when it fetches a single page that the version number has changed, the entire directory is flagged for invalidation. (3) Read as much of the directory in one go as we can. Note that this removes local caching of directories in fscache for the moment as we can't pass the pages to fscache_read_or_alloc_pages() since page->lru is in use by the LRU. Signed-off-by: David Howells --- fs/afs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index 9370e2feb999..70a563c14e6f 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -42,10 +42,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, if (!req) return -ENOMEM; - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->pos = pos; req->len = len; req->nr_pages = 1; + req->pages = req->array; req->pages[0] = page; get_page(page); -- cgit v1.2.3 From 76a5cb6fc1e22a2a316fb690fc4cdd5121d1c0ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Apr 2018 14:17:26 +0100 Subject: afs: Add stats for data transfer operations Add statistics to /proc/fs/afs/stats for data transfer RPC operations. New lines are added that look like: file-rd : n=55794 nb=10252282150 file-wr : n=9789 nb=3247763645 where n= indicates the number of ops completed and nb= indicates the number of bytes successfully transferred. file-rd is the counts for read/fetch operations and file-wr the counts for write/store operations. Note that directory and symlink downloading are included in the file-rd stats at the moment. Signed-off-by: David Howells --- fs/afs/file.c | 6 ++++++ fs/afs/internal.h | 4 ++++ fs/afs/proc.c | 7 +++++++ fs/afs/write.c | 6 ++++++ 4 files changed, 23 insertions(+) (limited to 'fs/afs/write.c') diff --git a/fs/afs/file.c b/fs/afs/file.c index 91ff1335fd33..e5cac1bc3cf0 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -242,6 +242,12 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = afs_end_vnode_operation(&fc); } + if (ret == 0) { + afs_stat_v(vnode, n_fetches); + atomic_long_add(desc->actual_len, + &afs_v2net(vnode)->n_fetch_bytes); + } + _leave(" = %d", ret); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6ae023cbf00e..f6b44f47732d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -273,6 +273,10 @@ struct afs_net { atomic_t n_read_dir; /* Number of directory pages read */ atomic_t n_dir_cr; /* Number of directory entry creation edits */ atomic_t n_dir_rm; /* Number of directory entry removal edits */ + atomic_t n_stores; /* Number of store ops */ + atomic_long_t n_store_bytes; /* Number of bytes stored */ + atomic_long_t n_fetch_bytes; /* Number of bytes fetched */ + atomic_t n_fetches; /* Number of data fetch ops */ }; extern const char afs_init_sysname[]; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 3212bce0d4fb..839a22280606 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -922,6 +922,13 @@ static int afs_proc_stats_show(struct seq_file *m, void *v) seq_printf(m, "dir-edit: cr=%u rm=%u\n", atomic_read(&net->n_dir_cr), atomic_read(&net->n_dir_rm)); + + seq_printf(m, "file-rd : n=%u nb=%lu\n", + atomic_read(&net->n_fetches), + atomic_long_read(&net->n_fetch_bytes)); + seq_printf(m, "file-wr : n=%u nb=%lu\n", + atomic_read(&net->n_stores), + atomic_long_read(&net->n_store_bytes)); return 0; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 70a563c14e6f..eccc16198f68 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -356,6 +356,12 @@ found_key: } switch (ret) { + case 0: + afs_stat_v(vnode, n_stores); + atomic_long_add((last * PAGE_SIZE + to) - + (first * PAGE_SIZE + offset), + &afs_v2net(vnode)->n_store_bytes); + break; case -EACCES: case -EPERM: case -ENOKEY: -- cgit v1.2.3 From 5a8132761609bd7e42db642d6f157140d5bf2ae8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Apr 2018 14:17:26 +0100 Subject: afs: Do better accretion of small writes on newly created content Processes like ld that do lots of small writes that aren't necessarily contiguous result in a lot of small StoreData operations to the server, the idea being that if someone else changes the data on the server, we only write our changes over that and not the space between. Further, we don't want to write back empty space if we can avoid it to make it easier for the server to do sparse files. However, making lots of tiny RPC ops is a lot less efficient for the server than one big one because each op requires allocation of resources and the taking of locks, so we want to compromise a bit. Reduce the load by the following: (1) If a file is just created locally or has just been truncated with O_TRUNC locally, allow subsequent writes to the file to be merged with intervening space if that space doesn't cross an entire intervening page. (2) Don't flush the file on ->flush() but rather on ->release() if the file was open for writing. Just linking vmlinux.o, without this patch, looking in /proc/fs/afs/stats: file-wr : n=441 nb=513581204 and after the patch: file-wr : n=62 nb=513668555 there were 379 fewer StoreData RPC operations at the expense of an extra 87K being written. Signed-off-by: David Howells --- fs/afs/callback.c | 1 + fs/afs/dir.c | 3 +++ fs/afs/file.c | 7 ++++++- fs/afs/internal.h | 2 +- fs/afs/write.c | 32 +++++++++++++------------------- 5 files changed, 24 insertions(+), 21 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 6049ca837498..abd9a84f4e88 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -130,6 +130,7 @@ void afs_break_callback(struct afs_vnode *vnode) write_seqlock(&vnode->cb_lock); + clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; afs_clear_permits(vnode); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 43bb3b23a879..5889f70d4d27 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1092,6 +1092,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, struct afs_file_status *newstatus, struct afs_callback *newcb) { + struct afs_vnode *vnode; struct inode *inode; if (fc->ac.error < 0) @@ -1109,6 +1110,8 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, return; } + vnode = AFS_FS_I(inode); + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); d_add(new_dentry, inode); } diff --git a/fs/afs/file.c b/fs/afs/file.c index e5cac1bc3cf0..c24c08016dd9 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,7 +30,6 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, - .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -146,6 +145,9 @@ int afs_open(struct inode *inode, struct file *file) if (ret < 0) goto error_af; } + + if (file->f_flags & O_TRUNC) + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); file->private_data = af; _leave(" = 0"); @@ -170,6 +172,9 @@ int afs_release(struct inode *inode, struct file *file) _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + if ((file->f_mode & FMODE_WRITE)) + return vfs_fsync(file, 0); + file->private_data = NULL; if (af->wb) afs_put_wb_key(af->wb); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f6b44f47732d..f8086ec95e24 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -506,6 +506,7 @@ struct afs_vnode { #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ #define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ +#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ @@ -1026,7 +1027,6 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern int afs_page_mkwrite(struct vm_fault *); extern void afs_prune_wb_keys(struct afs_vnode *); diff --git a/fs/afs/write.c b/fs/afs/write.c index eccc16198f68..160f6cc26c00 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -125,7 +125,12 @@ try_again: page->index, priv); goto flush_conflicting_write; } - if (to < f || from > t) + /* If the file is being filled locally, allow inter-write + * spaces to be merged into writes. If it's not, only write + * back what the user gives us. + */ + if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && + (to < f || from > t)) goto flush_conflicting_write; if (from < f) f = from; @@ -419,7 +424,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, trace_afs_page_dirty(vnode, tracepoint_string("WARN"), primary_page->index, priv); - if (start >= final_page || to < PAGE_SIZE) + if (start >= final_page || + (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))) goto no_more; start++; @@ -440,9 +446,10 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } for (loop = 0; loop < n; loop++) { - if (to != PAGE_SIZE) - break; page = pages[loop]; + if (to != PAGE_SIZE && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) + break; if (page->index > final_page) break; if (!trylock_page(page)) @@ -455,7 +462,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, priv = page_private(page); f = priv & AFS_PRIV_MAX; t = priv >> AFS_PRIV_SHIFT; - if (f != 0) { + if (f != 0 && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { unlock_page(page); break; } @@ -740,20 +748,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return file_write_and_wait_range(file, start, end); } -/* - * Flush out all outstanding writes on a file opened for writing when it is - * closed. - */ -int afs_flush(struct file *file, fl_owner_t id) -{ - _enter(""); - - if ((file->f_mode & FMODE_WRITE) == 0) - return 0; - - return vfs_fsync(file, 0); -} - /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal -- cgit v1.2.3