From 3176c3e0ef32963aa5f6f9754142e420a4ba5d64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:31:42 +1000 Subject: xfs: kill ioflags Now that we have the direct I/O kiocb flag there is no real need to sample the value inside of XFS, and the invis flag was always just partially used and isn't worth keeping this infrastructure around for. This also splits the read tracepoint into buffered vs direct as we've done for writes a long time ago. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_trace.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ea94ee0fe5ea..a1bc5c64a573 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1134,15 +1134,14 @@ TRACE_EVENT(xfs_log_assign_tail_lsn, ) DECLARE_EVENT_CLASS(xfs_file_class, - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), - TP_ARGS(ip, count, offset, flags), + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset), + TP_ARGS(ip, count, offset), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(loff_t, offset) __field(size_t, count) - __field(int, flags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; @@ -1150,23 +1149,21 @@ DECLARE_EVENT_CLASS(xfs_file_class, __entry->size = ip->i_d.di_size; __entry->offset = offset; __entry->count = count; - __entry->flags = flags; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx " - "offset 0x%llx count 0x%zx ioflags %s", + TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->offset, - __entry->count, - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) + __entry->count) ) #define DEFINE_RW_EVENT(name) \ DEFINE_EVENT(xfs_file_class, name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags)) -DEFINE_RW_EVENT(xfs_file_read); + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset), \ + TP_ARGS(ip, count, offset)) +DEFINE_RW_EVENT(xfs_file_buffered_read); +DEFINE_RW_EVENT(xfs_file_direct_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); -- cgit v1.2.3 From 16d4d43595b4780daac8fcea6d042689124cb094 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:38:55 +1000 Subject: xfs: split direct I/O and DAX path So far the DAX code overloaded the direct I/O code path. There is very little in common between the two, and untangling them allows to clean up both variants. As a side effect we also get separate trace points for both I/O types. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 139 ++++++++++++++++++++++++++++++++++++++++++----------- fs/xfs/xfs_trace.h | 2 + 2 files changed, 112 insertions(+), 29 deletions(-) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index dd5185dafc9f..d97e8cb99a59 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -305,13 +305,11 @@ xfs_file_dio_aio_read( else target = ip->i_mount->m_ddev_targp; - if (!IS_DAX(inode)) { - /* DIO must be aligned to device logical sector size */ - if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { - if (iocb->ki_pos == isize) - return 0; - return -EINVAL; - } + /* DIO must be aligned to device logical sector size */ + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { + if (iocb->ki_pos == isize) + return 0; + return -EINVAL; } /* @@ -360,13 +358,37 @@ xfs_file_dio_aio_read( } data = *to; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - NULL, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, NULL, NULL, 0); + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, NULL, NULL, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(to, ret); } + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + + file_accessed(iocb->ki_filp); + return ret; +} + +STATIC ssize_t +xfs_file_dax_read( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct iov_iter data = *to; + size_t count = iov_iter_count(to); + ssize_t ret = 0; + + trace_xfs_file_dax_read(ip, count, iocb->ki_pos); + + if (!count) + return 0; /* skip atime */ + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); if (ret > 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); @@ -399,7 +421,8 @@ xfs_file_read_iter( struct kiocb *iocb, struct iov_iter *to) { - struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_mount *mp = XFS_I(inode)->i_mount; ssize_t ret = 0; XFS_STATS_INC(mp, xs_read_calls); @@ -407,7 +430,9 @@ xfs_file_read_iter( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (iocb->ki_flags & IOCB_DIRECT) + if (IS_DAX(inode)) + ret = xfs_file_dax_read(iocb, to); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_read(iocb, to); else ret = xfs_file_buffered_aio_read(iocb, to); @@ -755,8 +780,7 @@ xfs_file_dio_aio_write( mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ - if (!IS_DAX(inode) && - ((iocb->ki_pos | count) & target->bt_logical_sectormask)) + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ @@ -825,14 +849,9 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); data = *from; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, xfs_end_io_direct_write, - NULL, DIO_ASYNC_EXTEND); - } + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, xfs_end_io_direct_write, + NULL, DIO_ASYNC_EXTEND); /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { @@ -849,10 +868,70 @@ out: xfs_rw_iunlock(ip, iolock); /* - * No fallback to buffered IO on errors for XFS. DAX can result in - * partial writes, but direct IO will either complete fully or fail. + * No fallback to buffered IO on errors for XFS, direct IO will either + * complete fully or fail. + */ + ASSERT(ret < 0 || ret == count); + return ret; +} + +STATIC ssize_t +xfs_file_dax_write( + struct kiocb *iocb, + struct iov_iter *from) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + int unaligned_io = 0; + int iolock; + struct iov_iter data; + + /* "unaligned" here means not aligned to a filesystem block */ + if ((iocb->ki_pos & mp->m_blockmask) || + ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { + unaligned_io = 1; + iolock = XFS_IOLOCK_EXCL; + } else if (mapping->nrpages) { + iolock = XFS_IOLOCK_EXCL; + } else { + iolock = XFS_IOLOCK_SHARED; + } + xfs_rw_ilock(ip, iolock); + + ret = xfs_file_aio_write_checks(iocb, from, &iolock); + if (ret) + goto out; + + /* + * Yes, even DAX files can have page cache attached to them: A zeroed + * page is inserted into the pagecache when we have to serve a write + * fault on a hole. It should never be dirtied and can simply be + * dropped from the pagecache once we get real data for the page. */ - ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); + if (mapping->nrpages) { + ret = invalidate_inode_pages2(mapping); + WARN_ON_ONCE(ret); + } + + if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + + data = *from; + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, + xfs_end_io_direct_write, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(from, ret); + } +out: + xfs_rw_iunlock(ip, iolock); return ret; } @@ -934,7 +1013,9 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) + if (IS_DAX(inode)) + ret = xfs_file_dax_write(iocb, from); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a1bc5c64a573..c2876917dd89 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \ TP_ARGS(ip, count, offset)) DEFINE_RW_EVENT(xfs_file_buffered_read); DEFINE_RW_EVENT(xfs_file_direct_read); +DEFINE_RW_EVENT(xfs_file_dax_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_page_class, -- cgit v1.2.3