From d9dabc1a01656868d53a1d08309ca24d922b1376 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Mar 2015 15:55:49 -0400 Subject: NFS: File unlock needs to be a metadata synchronisation point File unlock needs to update both data and metadata on the NFS server in order to act as a synchronisation point for other clients. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e679d24c39d3..6959cb76744b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -780,7 +780,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * Flush all pending writes before doing anything * with locks.. */ - nfs_sync_mapping(filp->f_mapping); + vfs_fsync(filp, 0); l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); if (!IS_ERR(l_ctx)) { -- cgit v1.2.3 From a0815d556d1cfb686b46995f86fb081f623fa720 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 18:58:53 -0400 Subject: NFSv4.1/pnfs: Ensure that writes respect the O_SYNC flag when doing O_DIRECT If the caller does not specify the O_SYNC flag, then it is legitimate to return from O_DIRECT without doing a pNFS layoutcommit operation. However if the file is opened O_DIRECT|O_SYNC then we'd better get it right. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 + fs/nfs/file.c | 1 + fs/nfs/nfs4file.c | 1 + 3 files changed, 3 insertions(+) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 51f617e45fd1..018a06a062bd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -1034,6 +1034,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, if (i_size_read(inode) < iocb->ki_pos) i_size_write(inode, iocb->ki_pos); spin_unlock(&inode->i_lock); + generic_write_sync(file, pos, result); } } nfs_direct_req_release(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6959cb76744b..28228f381266 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -281,6 +281,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); + nfs_inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8da5409e6f1a..befe7a2c6284 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -104,6 +104,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); + nfs_inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) -- cgit v1.2.3 From 2ba48ce513c4e545318d22b138861d5876edf906 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 13:52:01 -0400 Subject: mirror O_APPEND and O_DIRECT into iocb->ki_flags ... avoiding write_iter/fcntl races. Signed-off-by: Al Viro --- fs/aio.c | 2 +- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 8 ++++---- fs/ext4/file.c | 4 ++-- fs/fuse/file.c | 2 +- fs/gfs2/file.c | 2 +- fs/nfs/file.c | 6 +++--- fs/ocfs2/file.c | 10 +++++----- fs/xfs/xfs_file.c | 4 ++-- include/linux/fs.h | 15 +++++++++++++++ mm/filemap.c | 6 +++--- 11 files changed, 38 insertions(+), 23 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/aio.c b/fs/aio.c index 5785c4b58fea..e976185c8e5b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1502,7 +1502,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->common.ki_pos = iocb->aio_offset; req->common.ki_complete = aio_complete; - req->common.ki_flags = 0; + req->common.ki_flags = iocb_flags(req->common.ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c64d11c41eeb..faa7d390841b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1794,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from, pos); } else { num_written = __btrfs_buffered_write(file, from, pos); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3f0b9339d823..b9b8eb225f66 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -457,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, if (ret < 0) return ret; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { while (iov_iter_count(i)) { size_t start; ssize_t n; @@ -828,7 +828,7 @@ again: return ret; if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_filp->f_flags & O_DIRECT) || + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", @@ -995,12 +995,12 @@ retry_snap: inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { struct iov_iter data; mutex_unlock(&inode->i_mutex); /* we might need to revert back to that point */ data = *from; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) written = ceph_sync_direct_write(iocb, &data, pos); else written = ceph_sync_write(iocb, &data, pos); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index c10785f10d1d..53bbc0b1995f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); struct mutex *aio_mutex = NULL; struct blk_plug plug; - int o_direct = io_is_direct(file); + int o_direct = iocb->ki_flags & IOCB_DIRECT; int overwrite = 0; ssize_t ret; @@ -106,7 +106,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && - (file->f_flags & O_APPEND || + (iocb->ki_flags & IOCB_APPEND || ext4_unaligned_aio(inode, from, iocb->ki_pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b86c8e08399a..5ef05b5c4cff 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1177,7 +1177,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 614bb42cb7e1..08329afa1339 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -709,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6a3adedf027..14364dc001f7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,7 +170,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_read(iocb, to, iocb->ki_pos); dprintk("NFS: read(%pD2, %zu@%lu)\n", @@ -680,7 +680,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) return result; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_write(iocb, from, pos); dprintk("NFS: write(%pD2, %zu@%Ld)\n", @@ -692,7 +692,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) /* * O_APPEND implies that we must revalidate the file length. */ - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { result = nfs_revalidate_file_size(inode, file); if (result) goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b93919f50f0f..cd37f6cd4d51 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2274,8 +2274,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, if (count == 0) return 0; - appending = file->f_flags & O_APPEND ? 1 : 0; - direct_io = file->f_flags & O_DIRECT ? 1 : 0; + appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0; + direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; mutex_lock(&inode->i_mutex); @@ -2429,7 +2429,7 @@ relock: out_dio: /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); if (unlikely(written <= 0)) goto no_sync; @@ -2546,7 +2546,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ - if (filp->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { have_alloc_sem = 1; ocfs2_iocb_set_sem_locked(iocb); @@ -2580,7 +2580,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, trace_generic_file_aio_read_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 28d157807b42..1f12ad0a8585 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -279,7 +279,7 @@ xfs_file_read_iter( XFS_STATS_INC(xs_read_calls); - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -804,7 +804,7 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/include/linux/fs.h b/include/linux/fs.h index b4aa400ac723..b1d7db28c13c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,8 @@ struct address_space; struct writeback_control; #define IOCB_EVENTFD (1 << 0) +#define IOCB_APPEND (1 << 1) +#define IOCB_DIRECT (1 << 2) struct kiocb { struct file *ki_filp; @@ -329,10 +331,13 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } +static inline int iocb_flags(struct file *file); + static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, + .ki_flags = iocb_flags(filp), }; } @@ -2779,6 +2784,16 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); } +static inline int iocb_flags(struct file *file) +{ + int res = 0; + if (file->f_flags & O_APPEND) + res |= IOCB_APPEND; + if (io_is_direct(file)) + res |= IOCB_DIRECT; + return res; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; diff --git a/mm/filemap.c b/mm/filemap.c index 243997a26e7c..405de370e657 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1694,7 +1694,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t *ppos = &iocb->ki_pos; loff_t pos = *ppos; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); @@ -2271,7 +2271,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) return 0; /* FIXME: this is for backwards compatibility with 2.4 */ - if (file->f_flags & O_APPEND) + if (iocb->ki_flags & IOCB_APPEND) iocb->ki_pos = i_size_read(inode); pos = iocb->ki_pos; @@ -2545,7 +2545,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; written = generic_file_direct_write(iocb, from, iocb->ki_pos); -- cgit v1.2.3 From 65a4a1cad7c56e7056fb4b35ac2d93695612612c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:11:08 -0400 Subject: nfs: generic_write_checks() shouldn't be done on swapout... Signed-off-by: Al Viro --- fs/nfs/direct.c | 12 +++--------- fs/nfs/file.c | 11 +++++++---- include/linux/nfs_fs.h | 3 +-- 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9634189b8545..682f65fe09b5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,7 +268,7 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter, pos); - return nfs_file_direct_write(iocb, iter, pos); + return nfs_file_direct_write(iocb, iter); #endif /* CONFIG_NFS_SWAP */ } @@ -959,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; @@ -968,15 +967,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - loff_t end; + loff_t pos, end; dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); - if (result <= 0) - goto out; - nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, iov_iter_count(iter)); @@ -1044,7 +1039,6 @@ out_release: nfs_direct_req_release(dreq); out_unlock: mutex_unlock(&inode->i_mutex); -out: return result; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 14364dc001f7..c40e4363e746 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -674,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) unsigned long written = 0; ssize_t result; size_t count = iov_iter_count(from); - loff_t pos = iocb->ki_pos; result = nfs_key_timeout_notify(file, inode); if (result) return result; - if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from, pos); + if (iocb->ki_flags & IOCB_DIRECT) { + result = generic_write_checks(iocb, from); + if (result <= 0) + return result; + return nfs_file_direct_write(iocb, from); + } dprintk("NFS: write(%pD2, %zu@%Ld)\n", - file, count, (long long) pos); + file, count, (long long) iocb->ki_pos); result = -EBUSY; if (IS_SWAPFILE(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3d1b0d2fe55e..410abd172feb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -452,8 +452,7 @@ extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); /* * linux/fs/nfs/dir.c -- cgit v1.2.3