summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2019-05-17 02:05:21 +0200
committerDan Williams <dan.j.williams@intel.com>2019-05-21 05:43:32 +0200
commit52f476a323f9efc959be1c890d0cdcf12e1582e0 (patch)
treec35935a915a3086d527f6daa7db839f406b7ada8
parentdax: Arrange for dax_supported check to span multiple devices (diff)
downloadlinux-52f476a323f9efc959be1c890d0cdcf12e1582e0.tar.xz
linux-52f476a323f9efc959be1c890d0cdcf12e1582e0.zip
libnvdimm/pmem: Bypass CONFIG_HARDENED_USERCOPY overhead
Jeff discovered that performance improves from ~375K iops to ~519K iops on a simple psync-write fio workload when moving the location of 'struct page' from the default PMEM location to DRAM. This result is surprising because the expectation is that 'struct page' for dax is only needed for third party references to dax mappings. For example, a dax-mapped buffer passed to another system call for direct-I/O requires 'struct page' for sending the request down the driver stack and pinning the page. There is no usage of 'struct page' for first party access to a file via read(2)/write(2) and friends. However, this "no page needed" expectation is violated by CONFIG_HARDENED_USERCOPY and the check_copy_size() performed in copy_from_iter_full_nocache() and copy_to_iter_mcsafe(). The check_heap_object() helper routine assumes the buffer is backed by a slab allocator (DRAM) page and applies some checks. Those checks are invalid, dax pages do not originate from the slab, and redundant, dax_iomap_actor() has already validated that the I/O is within bounds. Specifically that routine validates that the logical file offset is within bounds of the file, then it does a sector-to-pfn translation which validates that the physical mapping is within bounds of the block device. Bypass additional hardened usercopy overhead and call the 'no check' versions of the copy_{to,from}_iter operations directly. Fixes: 0aed55af8834 ("x86, uaccess: introduce copy_from_iter_flushcache...") Cc: <stable@vger.kernel.org> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Matthew Wilcox <willy@infradead.org> Reported-and-tested-by: Jeff Smits <jeff.smits@intel.com> Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/nvdimm/pmem.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 845c5b430cdd..d9d845077b8b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -281,16 +281,22 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
+/*
+ * Use the 'no check' versions of copy_from_iter_flushcache() and
+ * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * checking, both file offset and device offset, is handled by
+ * dax_iomap_actor()
+ */
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return copy_from_iter_flushcache(addr, bytes, i);
+ return _copy_from_iter_flushcache(addr, bytes, i);
}
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return copy_to_iter_mcsafe(addr, bytes, i);
+ return _copy_to_iter_mcsafe(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {