From 0aed55af88345b5d673240f90e671d79662fb01e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:22:50 -0700 Subject: x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: "H. Peter Anvin" Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 3 +-- drivers/nvdimm/claim.c | 2 +- drivers/nvdimm/pmem.c | 13 +++++++++++-- drivers/nvdimm/region_devs.c | 4 ++-- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 656acb5d7166..cbd5596e7562 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1842,8 +1842,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy_to_pmem(mmio->addr.aperture + offset, - iobuf + copied, c); + memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c); else { if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 7ceb5fa4f2a1..b8b9c8ca7862 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -277,7 +277,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, rc = -EIO; } - memcpy_to_pmem(nsio->addr + offset, buf, size); + memcpy_flushcache(nsio->addr + offset, buf, size); nvdimm_flush(to_nd_region(ndns->dev.parent)); return rc; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c544d466ea51..2f3aefe565c6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "pmem.h" @@ -80,7 +81,7 @@ static void write_pmem(void *pmem_addr, struct page *page, { void *mem = kmap_atomic(page); - memcpy_to_pmem(pmem_addr, mem + off, len); + memcpy_flushcache(pmem_addr, mem + off, len); kunmap_atomic(mem); } @@ -235,8 +236,15 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } +static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter_flushcache(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, + .copy_from_iter = pmem_copy_from_iter, }; static void pmem_release_queue(void *q) @@ -294,7 +302,8 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (nvdimm_has_flush(nd_region) < 0) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) + || nvdimm_has_flush(nd_region) < 0) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b550edf2571f..985b0e11bd73 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1015,8 +1015,8 @@ void nvdimm_flush(struct nd_region *nd_region) * The first wmb() is needed to 'sfence' all previous writes * such that they are architecturally visible for the platform * buffer flush. Note that we've already arranged for pmem - * writes to avoid the cache via arch_memcpy_to_pmem(). The - * final wmb() ensures ordering for the NVDIMM flush write. + * writes to avoid the cache via memcpy_flushcache(). The final + * wmb() ensures ordering for the NVDIMM flush write. */ wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) -- cgit v1.2.3