summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-01-16 01:56:49 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-16 02:56:32 +0100
commit5c2c2587b13235bf8b5c9027589f22eff68bdf49 (patch)
tree718d85e1921e2d408bb207261f3000a286815aaa /kernel
parentlibnvdimm, pmem: move request_queue allocation earlier in probe (diff)
downloadlinux-5c2c2587b13235bf8b5c9027589f22eff68bdf49.tar.xz
linux-5c2c2587b13235bf8b5c9027589f22eff68bdf49.zip
mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup
get_dev_page() enables paths like get_user_pages() to pin a dynamically mapped pfn-range (devm_memremap_pages()) while the resulting struct page objects are in use. Unlike get_page() it may fail if the device is, or is in the process of being, disabled. While the initial lookup of the range may be an expensive list walk, the result is cached to speed up subsequent lookups which are likely to be in the same mapped range. devm_memremap_pages() now requires a reference counter to be specified at init time. For pmem this means moving request_queue allocation into pmem_alloc() so the existing queue usage counter can track "device pages". ZONE_DEVICE pages always have an elevated count and will never be on an lru reclaim list. That space in 'struct page' can be redirected for other uses, but for safety introduce a poison value that will always trip __list_add() to assert. This allows half of the struct list_head storage to be reclaimed with some assurance to back up the assumption that the page count never goes to zero and a list_add() is never attempted. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Tested-by: Logan Gunthorpe <logang@deltatee.com> Cc: Dave Hansen <dave@sr71.net> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/memremap.c53
1 files changed, 49 insertions, 4 deletions
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 562f6471fe90..3eb8944265d5 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -179,6 +179,29 @@ static void pgmap_radix_release(struct resource *res)
mutex_unlock(&pgmap_lock);
}
+static unsigned long pfn_first(struct page_map *page_map)
+{
+ struct dev_pagemap *pgmap = &page_map->pgmap;
+ const struct resource *res = &page_map->res;
+ struct vmem_altmap *altmap = pgmap->altmap;
+ unsigned long pfn;
+
+ pfn = res->start >> PAGE_SHIFT;
+ if (altmap)
+ pfn += vmem_altmap_offset(altmap);
+ return pfn;
+}
+
+static unsigned long pfn_end(struct page_map *page_map)
+{
+ const struct resource *res = &page_map->res;
+
+ return (res->start + resource_size(res)) >> PAGE_SHIFT;
+}
+
+#define for_each_device_pfn(pfn, map) \
+ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+
static void devm_memremap_pages_release(struct device *dev, void *data)
{
struct page_map *page_map = data;
@@ -186,6 +209,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
resource_size_t align_start, align_size;
struct dev_pagemap *pgmap = &page_map->pgmap;
+ if (percpu_ref_tryget_live(pgmap->ref)) {
+ dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
+ percpu_ref_put(pgmap->ref);
+ }
+
pgmap_radix_release(res);
/* pages are dead and unused, undo the arch mapping */
@@ -211,20 +239,26 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
* @res: "host memory" address range
+ * @ref: a live per-cpu reference count
* @altmap: optional descriptor for allocating the memmap from @res
*
- * Note, the expectation is that @res is a host memory range that could
- * feasibly be treated as a "System RAM" range, i.e. not a device mmio
- * range, but this is not enforced.
+ * Notes:
+ * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
+ * (or devm release event).
+ *
+ * 2/ @res is expected to be a host memory range that could feasibly be
+ * treated as a "System RAM" range, i.e. not a device mmio range, but
+ * this is not enforced.
*/
void *devm_memremap_pages(struct device *dev, struct resource *res,
- struct vmem_altmap *altmap)
+ struct percpu_ref *ref, struct vmem_altmap *altmap)
{
int is_ram = region_intersects(res->start, resource_size(res),
"System RAM");
resource_size_t key, align_start, align_size;
struct dev_pagemap *pgmap;
struct page_map *page_map;
+ unsigned long pfn;
int error, nid;
if (is_ram == REGION_MIXED) {
@@ -242,6 +276,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
return ERR_PTR(-ENXIO);
}
+ if (!ref)
+ return ERR_PTR(-EINVAL);
+
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
@@ -255,6 +292,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
memcpy(&page_map->altmap, altmap, sizeof(*altmap));
pgmap->altmap = &page_map->altmap;
}
+ pgmap->ref = ref;
pgmap->res = &page_map->res;
mutex_lock(&pgmap_lock);
@@ -292,6 +330,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (error)
goto err_add_memory;
+ for_each_device_pfn(pfn, page_map) {
+ struct page *page = pfn_to_page(pfn);
+
+ /* ZONE_DEVICE pages must never appear on a slab lru */
+ list_force_poison(&page->lru);
+ page->pgmap = pgmap;
+ }
devres_add(dev, page_map);
return __va(res->start);