diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 23:35:59 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 23:35:59 +0200 |
commit | 12f03ee606914317e7e6a0815e53a48205c31dae (patch) | |
tree | f8579bf77d29b3921e1877e0ae12ec65b5ebc738 /drivers | |
parent | Merge branch 'misc' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/k... (diff) | |
parent | libnvdimm, pmem: direct map legacy pmem by default (diff) | |
download | linux-12f03ee606914317e7e6a0815e53a48205c31dae.tar.xz linux-12f03ee606914317e7e6a0815e53a48205c31dae.zip |
Merge tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"This update has successfully completed a 0day-kbuild run and has
appeared in a linux-next release. The changes outside of the typical
drivers/nvdimm/ and drivers/acpi/nfit.[ch] paths are related to the
removal of IORESOURCE_CACHEABLE, the introduction of memremap(), and
the introduction of ZONE_DEVICE + devm_memremap_pages().
Summary:
- Introduce ZONE_DEVICE and devm_memremap_pages() as a generic
mechanism for adding device-driver-discovered memory regions to the
kernel's direct map.
This facility is used by the pmem driver to enable pfn_to_page()
operations on the page frames returned by DAX ('direct_access' in
'struct block_device_operations').
For now, the 'memmap' allocation for these "device" pages comes
from "System RAM". Support for allocating the memmap from device
memory will arrive in a later kernel.
- Introduce memremap() to replace usages of ioremap_cache() and
ioremap_wt(). memremap() drops the __iomem annotation for these
mappings to memory that do not have i/o side effects. The
replacement of ioremap_cache() with memremap() is limited to the
pmem driver to ease merging the api change in v4.3.
Completion of the conversion is targeted for v4.4.
- Similar to the usage of memcpy_to_pmem() + wmb_pmem() in the pmem
driver, update the VFS DAX implementation and PMEM api to provide
persistence guarantees for kernel operations on a DAX mapping.
- Convert the ACPI NFIT 'BLK' driver to map the block apertures as
cacheable to improve performance.
- Miscellaneous updates and fixes to libnvdimm including support for
issuing "address range scrub" commands, clarifying the optimal
'sector size' of pmem devices, a clarification of the usage of the
ACPI '_STA' (status) property for DIMM devices, and other minor
fixes"
* tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (34 commits)
libnvdimm, pmem: direct map legacy pmem by default
libnvdimm, pmem: 'struct page' for pmem
libnvdimm, pfn: 'struct page' provider infrastructure
x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB
add devm_memremap_pages
mm: ZONE_DEVICE for "device memory"
mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
dax: drop size parameter to ->direct_access()
nd_blk: change aperture mapping from WC to WB
nvdimm: change to use generic kvfree()
pmem, dax: have direct_access use __pmem annotation
dax: update I/O path to do proper PMEM flushing
pmem: add copy_from_iter_pmem() and clear_pmem()
pmem, x86: clean up conditional pmem includes
pmem: remove layer when calling arch_has_wmb_pmem()
pmem, x86: move x86 PMEM API to new pmem.h header
libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option
pmem: switch to devm_ allocations
devres: add devm_memremap
libnvdimm, btt: write and validate parent_uuid
...
Diffstat (limited to 'drivers')
38 files changed, 1224 insertions, 367 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 54e9729f9634..5d1015c26ff4 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -417,6 +417,7 @@ config ACPI_NFIT tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" depends on PHYS_ADDR_T_64BIT depends on BLK_DEV + depends on ARCH_HAS_MMIO_FLUSH select LIBNVDIMM help Infrastructure to probe ACPI 6 compliant platforms for diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index cf0fd96a7602..c1b8d03e262e 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <linux/pmem.h> #include <linux/io.h> +#include <asm/cacheflush.h> #include "nfit.h" /* @@ -764,9 +765,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); - unsigned long long sta; - int i, rc = -ENODEV; - acpi_status status; + int i; nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; adev = to_acpi_dev(acpi_desc); @@ -781,25 +780,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return force_enable_dimms ? 0 : -ENODEV; } - status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta); - if (status == AE_NOT_FOUND) { - dev_dbg(dev, "%s missing _STA, assuming enabled...\n", - dev_name(&adev_dimm->dev)); - rc = 0; - } else if (ACPI_FAILURE(status)) - dev_err(dev, "%s failed to retrieve_STA, disabling...\n", - dev_name(&adev_dimm->dev)); - else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0) - dev_info(dev, "%s disabled by firmware\n", - dev_name(&adev_dimm->dev)); - else - rc = 0; - for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++) if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); - return force_enable_dimms ? 0 : rc; + return 0; } static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) @@ -868,6 +853,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) struct acpi_device *adev; int i; + nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return; @@ -1032,7 +1018,7 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) if (mmio->num_lines) offset = to_interleave_offset(offset, mmio); - return readl(mmio->base + offset); + return readl(mmio->addr.base + offset); } static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, @@ -1057,11 +1043,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, if (mmio->num_lines) offset = to_interleave_offset(offset, mmio); - writeq(cmd, mmio->base + offset); + writeq(cmd, mmio->addr.base + offset); wmb_blk(nfit_blk); if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) - readq(mmio->base + offset); + readq(mmio->addr.base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1093,11 +1079,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy_to_pmem(mmio->aperture + offset, + memcpy_to_pmem(mmio->addr.aperture + offset, iobuf + copied, c); - else + else { + if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH) + mmio_flush_range((void __force *) + mmio->addr.aperture + offset, c); + memcpy_from_pmem(iobuf + copied, - mmio->aperture + offset, c); + mmio->addr.aperture + offset, c); + } copied += c; len -= c; @@ -1144,7 +1135,10 @@ static void nfit_spa_mapping_release(struct kref *kref) WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); - iounmap(spa_map->iomem); + if (spa_map->type == SPA_MAP_APERTURE) + memunmap((void __force *)spa_map->addr.aperture); + else + iounmap(spa_map->addr.base); release_mem_region(spa->address, spa->length); list_del(&spa_map->list); kfree(spa_map); @@ -1190,7 +1184,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, spa_map = find_spa_mapping(acpi_desc, spa); if (spa_map) { kref_get(&spa_map->kref); - return spa_map->iomem; + return spa_map->addr.base; } spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); @@ -1206,20 +1200,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - if (type == SPA_MAP_APERTURE) { - /* - * TODO: memremap_pmem() support, but that requires cache - * flushing when the aperture is moved. - */ - spa_map->iomem = ioremap_wc(start, n); - } else - spa_map->iomem = ioremap_nocache(start, n); + spa_map->type = type; + if (type == SPA_MAP_APERTURE) + spa_map->addr.aperture = (void __pmem *)memremap(start, n, + ARCH_MEMREMAP_PMEM); + else + spa_map->addr.base = ioremap_nocache(start, n); + - if (!spa_map->iomem) + if (!spa_map->addr.base) goto err_map; list_add_tail(&spa_map->list, &acpi_desc->spa_maps); - return spa_map->iomem; + return spa_map->addr.base; err_map: release_mem_region(start, n); @@ -1282,7 +1275,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, nfit_blk->dimm_flags = flags.flags; else if (rc == -ENOTTY) { /* fall back to a conservative default */ - nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH; rc = 0; } else rc = -ENXIO; @@ -1322,9 +1315,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, SPA_MAP_APERTURE); - if (!mmio->base) { + if (!mmio->addr.base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); return -ENOMEM; @@ -1345,9 +1338,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, SPA_MAP_CONTROL); - if (!mmio->base) { + if (!mmio->addr.base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); return -ENOMEM; @@ -1379,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return -ENOMEM; } - if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (mmio->line_size == 0) @@ -1414,7 +1407,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, for (i = 0; i < 2; i++) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; - if (mmio->base) + if (mmio->addr.base) nfit_spa_unmap(acpi_desc, mmio->spa); } nd_blk_region_set_provider_data(ndbr, NULL); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 79b6d83875c1..7e740156b9c2 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -41,6 +41,7 @@ enum nfit_uuids { }; enum { + ND_BLK_READ_FLUSH = 1, ND_BLK_DCR_LATCH = 2, }; @@ -107,6 +108,7 @@ struct acpi_nfit_desc { struct nvdimm_bus *nvdimm_bus; struct device *dev; unsigned long dimm_dsm_force_en; + unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; @@ -116,12 +118,16 @@ enum nd_blk_mmio_selector { DCR, }; +struct nd_blk_addr { + union { + void __iomem *base; + void __pmem *aperture; + }; +}; + struct nfit_blk { struct nfit_blk_mmio { - union { - void __iomem *base; - void __pmem *aperture; - }; + struct nd_blk_addr addr; u64 size; u64 base_offset; u32 line_size; @@ -148,7 +154,8 @@ struct nfit_spa_mapping { struct acpi_nfit_system_address *spa; struct list_head list; struct kref kref; - void __iomem *iomem; + enum spa_map_type type; + struct nd_blk_addr addr; }; static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index f9ab74505e69..b9794aeeb878 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -384,13 +384,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, page = brd_insert_page(brd, sector); if (!page) return -ENOSPC; - *kaddr = page_address(page); + *kaddr = (void __pmem *)page_address(page); *pfn = page_to_pfn(page); - /* - * TODO: If size > PAGE_SIZE, we could look to see if the next page in - * the file happens to be mapped to the next page of physical RAM. - */ return PAGE_SIZE; } #else diff --git a/drivers/isdn/icn/icn.h b/drivers/isdn/icn/icn.h index b713466997a0..f8f2e76d34bf 100644 --- a/drivers/isdn/icn/icn.h +++ b/drivers/isdn/icn/icn.h @@ -38,7 +38,7 @@ typedef struct icn_cdef { #include <linux/errno.h> #include <linux/fs.h> #include <linux/major.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/slab.h> diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 2fc4957cbe7f..a70eb83e68f1 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -41,7 +41,7 @@ #include <linux/fs.h> #include <linux/ioctl.h> #include <linux/init.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/mtd/mtd.h> diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index 7da266a53979..0802158a3f75 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -24,7 +24,7 @@ #include <linux/rslib.h> #include <linux/moduleparam.h> #include <linux/slab.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/mtd/mtd.h> #include <linux/mtd/nand.h> diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index 32a216d31141..ab7bda0bb245 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -18,7 +18,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/onenand.h> #include <linux/mtd/partitions.h> -#include <asm/io.h> +#include <linux/io.h> /* * Note: Driver name and platform data format have been updated! diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 72226acb5c0f..53c11621d5b1 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -21,6 +21,7 @@ config BLK_DEV_PMEM default LIBNVDIMM depends on HAS_IOMEM select ND_BTT if BTT + select ND_PFN if NVDIMM_PFN help Memory ranges for PMEM are described by either an NFIT (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a @@ -47,12 +48,16 @@ config ND_BLK (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode capabilities. +config ND_CLAIM + bool + config ND_BTT tristate config BTT bool "BTT: Block Translation Table (atomic sector updates)" default y if LIBNVDIMM + select ND_CLAIM help The Block Translation Table (BTT) provides atomic sector update semantics for persistent memory devices, so that @@ -65,4 +70,22 @@ config BTT Select Y if unsure +config ND_PFN + tristate + +config NVDIMM_PFN + bool "PFN: Map persistent (device) memory" + default LIBNVDIMM + depends on ZONE_DEVICE + select ND_CLAIM + help + Map persistent memory, i.e. advertise it to the memory + management sub-system. By default persistent memory does + not support direct I/O, RDMA, or any other usage that + requires a 'struct page' to mediate an I/O request. This + driver allocates and initializes the infrastructure needed + to support those use cases. + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 594bb97c867a..ea84d3c4e8e5 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o nd_pmem-y := pmem.o @@ -9,6 +10,8 @@ nd_btt-y := btt.o nd_blk-y := blk.o +nd_e820-y := e820.o + libnvdimm-y := core.o libnvdimm-y += bus.o libnvdimm-y += dimm_devs.o @@ -17,4 +20,6 @@ libnvdimm-y += region_devs.o libnvdimm-y += region.o libnvdimm-y += namespace_devs.o libnvdimm-y += label.o +libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 341202ed32b4..254239746020 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -583,33 +583,6 @@ static void free_arenas(struct btt *btt) } /* - * This function checks if the metadata layout is valid and error free - */ -static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, - u8 *uuid, u32 lbasize) -{ - u64 checksum; - - if (memcmp(super->uuid, uuid, 16)) - return 0; - - checksum = le64_to_cpu(super->checksum); - super->checksum = 0; - if (checksum != nd_btt_sb_checksum(super)) - return 0; - super->checksum = cpu_to_le64(checksum); - - if (lbasize != le32_to_cpu(super->external_lbasize)) - return 0; - - /* TODO: figure out action for this */ - if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) - dev_info(to_dev(arena), "Found arena with an error flag\n"); - - return 1; -} - -/* * This function reads an existing valid btt superblock and * populates the corresponding arena_info struct */ @@ -632,8 +605,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, arena->logoff = arena_off + le64_to_cpu(super->logoff); arena->info2off = arena_off + le64_to_cpu(super->info2off); - arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : - (arena->info2off - arena->infooff + BTT_PG_SIZE); + arena->size = (le64_to_cpu(super->nextoff) > 0) + ? (le64_to_cpu(super->nextoff)) + : (arena->info2off - arena->infooff + BTT_PG_SIZE); arena->flags = le32_to_cpu(super->flags); } @@ -665,8 +639,7 @@ static int discover_arenas(struct btt *btt) if (ret) goto out; - if (!arena_is_valid(arena, super, btt->nd_btt->uuid, - btt->lbasize)) { + if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { if (remaining == btt->rawsize) { btt->init_state = INIT_NOTFOUND; dev_info(to_dev(arena), "No existing arenas\n"); @@ -755,10 +728,13 @@ static int create_arenas(struct btt *btt) * It is only called for an uninitialized arena when a write * to that arena occurs for the first time. */ -static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) +static int btt_arena_write_layout(struct arena_info *arena) { int ret; + u64 sum; struct btt_sb *super; + struct nd_btt *nd_btt = arena->nd_btt; + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); ret = btt_map_init(arena); if (ret) @@ -773,7 +749,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) return -ENOMEM; strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); - memcpy(super->uuid, uuid, 16); + memcpy(super->uuid, nd_btt->uuid, 16); + memcpy(super->parent_uuid, parent_uuid, 16); super->flags = cpu_to_le32(arena->flags); super->version_major = cpu_to_le16(arena->version_major); super->version_minor = cpu_to_le16(arena->version_minor); @@ -794,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) super->info2off = cpu_to_le64(arena->info2off - arena->infooff); super->flags = 0; - super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); + sum = nd_sb_checksum((struct nd_gen_sb *) super); + super->checksum = cpu_to_le64(sum); ret = btt_info_write(arena, super); @@ -813,7 +791,7 @@ static int btt_meta_init(struct btt *btt) mutex_lock(&btt->init_lock); list_for_each_entry(arena, &btt->arena_list, list) { - ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); + ret = btt_arena_write_layout(arena); if (ret) goto unlock; @@ -1447,8 +1425,6 @@ static int __init nd_btt_init(void) { int rc; - BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); - btt_major = register_blkdev(0, "btt"); if (btt_major < 0) return btt_major; diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 75b0d80a6bd9..b2f8651e5395 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -182,4 +182,7 @@ struct btt { int init_state; int num_arenas; }; + +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); + #endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 6ac8c0fea3ec..59ad54a63d9f 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -21,63 +21,13 @@ #include "btt.h" #include "nd.h" -static void __nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || ndns->claim != &nd_btt->dev, - "%s: invalid claim\n", __func__); - ndns->claim = NULL; - nd_btt->ndns = NULL; - put_device(&ndns->dev); -} - -static void nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - if (!ndns) - return; - get_device(&ndns->dev); - device_lock(&ndns->dev); - __nd_btt_detach_ndns(nd_btt); - device_unlock(&ndns->dev); - put_device(&ndns->dev); -} - -static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - if (ndns->claim) - return false; - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || nd_btt->ndns, - "%s: invalid claim\n", __func__); - ndns->claim = &nd_btt->dev; - nd_btt->ndns = ndns; - get_device(&ndns->dev); - return true; -} - -static bool nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - bool claimed; - - device_lock(&ndns->dev); - claimed = __nd_btt_attach_ndns(nd_btt, ndns); - device_unlock(&ndns->dev); - return claimed; -} - static void nd_btt_release(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_btt *nd_btt = to_nd_btt(dev); dev_dbg(dev, "%s\n", __func__); - nd_btt_detach_ndns(nd_btt); + nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns); ida_simple_remove(&nd_region->btt_ida, nd_btt->id); kfree(nd_btt->uuid); kfree(nd_btt); @@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev, return rc; } -static int namespace_match(struct device *dev, void *data) -{ - char *name = data; - - return strcmp(name, dev_name(dev)) == 0; -} - -static bool is_nd_btt_idle(struct device *dev) -{ - struct nd_region *nd_region = to_nd_region(dev->parent); - struct nd_btt *nd_btt = to_nd_btt(dev); - - if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver) - return false; - return true; -} - -static ssize_t __namespace_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct nd_btt *nd_btt = to_nd_btt(dev); - struct nd_namespace_common *ndns; - struct device *found; - char *name; - - if (dev->driver) { - dev_dbg(dev, "%s: -EBUSY\n", __func__); - return -EBUSY; - } - - name = kstrndup(buf, len, GFP_KERNEL); - if (!name) - return -ENOMEM; - strim(name); - - if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) - /* pass */; - else { - len = -EINVAL; - goto out; - } - - ndns = nd_btt->ndns; - if (strcmp(name, "") == 0) { - /* detach the namespace and destroy / reset the btt device */ - nd_btt_detach_ndns(nd_btt); - if (is_nd_btt_idle(dev)) - nd_device_unregister(dev, ND_ASYNC); - else { - nd_btt->lbasize = 0; - kfree(nd_btt->uuid); - nd_btt->uuid = NULL; - } - goto out; - } else if (ndns) { - dev_dbg(dev, "namespace already set to: %s\n", - dev_name(&ndns->dev)); - len = -EBUSY; - goto out; - } - - found = device_find_child(dev->parent, name, namespace_match); - if (!found) { - dev_dbg(dev, "'%s' not found under %s\n", name, - dev_name(dev->parent)); - len = -ENODEV; - goto out; - } - - ndns = to_ndns(found); - if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { - dev_dbg(dev, "%s too small to host btt\n", name); - len = -ENXIO; - goto out_attach; - } - - WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev)); - if (!nd_btt_attach_ndns(nd_btt, ndns)) { - dev_dbg(dev, "%s already claimed\n", - dev_name(&ndns->dev)); - len = -EBUSY; - } - - out_attach: - put_device(&ndns->dev); /* from device_find_child */ - out: - kfree(name); - return len; -} - static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; nvdimm_bus_lock(dev); device_lock(dev); - rc = __namespace_store(dev, attr, buf, len); + rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); device_unlock(dev); @@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->type = &nd_btt_device_type; dev->groups = nd_btt_attribute_groups; device_initialize(&nd_btt->dev); - if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) { + if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", __func__, dev_name(ndns->claim)); put_device(dev); @@ -342,30 +203,54 @@ struct device *nd_btt_create(struct nd_region *nd_region) return dev; } -/* - * nd_btt_sb_checksum: compute checksum for btt info block +static bool uuid_is_null(u8 *uuid) +{ + static const u8 null_uuid[16]; + + return (memcmp(uuid, null_uuid, 16) == 0); +} + +/** + * nd_btt_arena_is_valid - check if the metadata layout is valid + * @nd_btt: device with BTT geometry and backing device info + * @super: pointer to the arena's info block being tested + * + * Check consistency of the btt info block with itself by validating + * the checksum, and with the parent namespace by verifying the + * parent_uuid contained in the info block with the one supplied in. * - * Returns a fletcher64 checksum of everything in the given info block - * except the last field (since that's where the checksum lives). + * Returns: + * false for an invalid info block, true for a valid one */ -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb) +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) { - u64 sum; - __le64 sum_save; - - sum_save = btt_sb->checksum; - btt_sb->checksum = 0; - sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1); - btt_sb->checksum = sum_save; - return sum; + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); + u64 checksum; + + if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0) + return false; + + if (!uuid_is_null(super->parent_uuid)) + if (memcmp(super->parent_uuid, parent_uuid, 16) != 0) + return false; + + checksum = le64_to_cpu(super->checksum); + super->checksum = 0; + if (checksum != nd_sb_checksum((struct nd_gen_sb *) super)) + return false; + super->checksum = cpu_to_le64(checksum); + + /* TODO: figure out action for this */ + if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) + dev_info(&nd_btt->dev, "Found arena with an error flag\n"); + + return true; } -EXPORT_SYMBOL(nd_btt_sb_checksum); +EXPORT_SYMBOL(nd_btt_arena_is_valid); static int __nd_btt_probe(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, struct btt_sb *btt_sb) { - u64 checksum; - if (!btt_sb || !ndns || !nd_btt) return -ENODEV; @@ -375,14 +260,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (nvdimm_namespace_capacity(ndns) < SZ_16M) return -ENXIO; - if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0) - return -ENODEV; - - checksum = le64_to_cpu(btt_sb->checksum); - btt_sb->checksum = 0; - if (checksum != nd_btt_sb_checksum(btt_sb)) + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) return -ENODEV; - btt_sb->checksum = cpu_to_le64(checksum); nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); @@ -416,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__, rc == 0 ? dev_name(dev) : "<none>"); if (rc < 0) { - __nd_btt_detach_ndns(to_nd_btt(dev)); + struct nd_btt *nd_btt = to_nd_btt(dev); + + __nd_detach_ndns(dev, &nd_btt->ndns); put_device(dev); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c new file mode 100644 index 000000000000..e8f03b0e95e4 --- /dev/null +++ b/drivers/nvdimm/claim.c @@ -0,0 +1,201 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/device.h> +#include <linux/sizes.h> +#include "nd-core.h" +#include "pfn.h" +#include "btt.h" +#include "nd.h" + +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex) + || ndns->claim != dev, + "%s: invalid claim\n", __func__); + ndns->claim = NULL; + *_ndns = NULL; + put_device(&ndns->dev); +} + +void nd_detach_ndns(struct device *dev, + struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + if (!ndns) + return; + get_device(&ndns->dev); + device_lock(&ndns->dev); + __nd_detach_ndns(dev, _ndns); + device_unlock(&ndns->dev); + put_device(&ndns->dev); +} + +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + if (attach->claim) + return false; + dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex) + || *_ndns, + "%s: invalid claim\n", __func__); + attach->claim = dev; + *_ndns = attach; + get_device(&attach->dev); + return true; +} + +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + bool claimed; + + device_lock(&attach->dev); + claimed = __nd_attach_ndns(dev, attach, _ndns); + device_unlock(&attach->dev); + return claimed; +} + +static int namespace_match(struct device *dev, void *data) +{ + char *name = data; + + return strcmp(name, dev_name(dev)) == 0; +} + +static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct device *seed = NULL; + + if (is_nd_btt(dev)) + seed = nd_region->btt_seed; + else if (is_nd_pfn(dev)) + seed = nd_region->pfn_seed; + + if (seed == dev || ndns || dev->driver) + return false; + return true; +} + +static void nd_detach_and_reset(struct device *dev, + struct nd_namespace_common **_ndns) +{ + /* detach the namespace and destroy / reset the device */ + nd_detach_ndns(dev, _ndns); + if (is_idle(dev, *_ndns)) { + nd_device_unregister(dev, ND_ASYNC); + } else if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + + nd_btt->lbasize = 0; + kfree(nd_btt->uuid); + nd_btt->uuid = NULL; + } else if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + kfree(nd_pfn->uuid); + nd_pfn->uuid = NULL; + nd_pfn->mode = PFN_MODE_NONE; + } +} + +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len) +{ + struct nd_namespace_common *ndns; + struct device *found; + char *name; + + if (dev->driver) { + dev_dbg(dev, "%s: -EBUSY\n", __func__); + return -EBUSY; + } + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + strim(name); + + if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) + /* pass */; + else { + len = -EINVAL; + goto out; + } + + ndns = *_ndns; + if (strcmp(name, "") == 0) { + nd_detach_and_reset(dev, _ndns); + goto out; + } else if (ndns) { + dev_dbg(dev, "namespace already set to: %s\n", + dev_name(&ndns->dev)); + len = -EBUSY; + goto out; + } + + found = device_find_child(dev->parent, name, namespace_match); + if (!found) { + dev_dbg(dev, "'%s' not found under %s\n", name, + dev_name(dev->parent)); + len = -ENODEV; + goto out; + } + + ndns = to_ndns(found); + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { + dev_dbg(dev, "%s too small to host\n", name); + len = -ENXIO; + goto out_attach; + } + + WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); + if (!nd_attach_ndns(dev, ndns, _ndns)) { + dev_dbg(dev, "%s already claimed\n", + dev_name(&ndns->dev)); + len = -EBUSY; + } + + out_attach: + put_device(&ndns->dev); /* from device_find_child */ + out: + kfree(name); + return len; +} + +/* + * nd_sb_checksum: compute checksum for a generic info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) +{ + u64 sum; + __le64 sum_save; + + BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K); + + sum_save = nd_gen_sb->checksum; + nd_gen_sb->checksum = 0; + sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1); + nd_gen_sb->checksum = sum_save; + return sum; +} +EXPORT_SYMBOL(nd_sb_checksum); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c05eb807d674..651b8d19d324 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref) nvdimm_free_dpa(ndd, res); nvdimm_bus_unlock(dev); - if (ndd->data && is_vmalloc_addr(ndd->data)) - vfree(ndd->data); - else - kfree(ndd->data); + kvfree(ndd->data); kfree(ndd); put_device(dev); } diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c new file mode 100644 index 000000000000..8282db2ef99e --- /dev/null +++ b/drivers/nvdimm/e820.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2015, Christoph Hellwig. + * Copyright (c) 2015, Intel Corporation. + */ +#include <linux/platform_device.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> + +static const struct attribute_group *e820_pmem_attribute_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +static const struct attribute_group *e820_pmem_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static int e820_pmem_remove(struct platform_device *pdev) +{ + struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(nvdimm_bus); + return 0; +} + +static int e820_pmem_probe(struct platform_device *pdev) +{ + static struct nvdimm_bus_descriptor nd_desc; + struct device *dev = &pdev->dev; + struct nvdimm_bus *nvdimm_bus; + struct resource *p; + + nd_desc.attr_groups = e820_pmem_attribute_groups; + nd_desc.provider_name = "e820"; + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); + if (!nvdimm_bus) + goto err; + platform_set_drvdata(pdev, nvdimm_bus); + + for (p = iomem_resource.child; p ; p = p->sibling) { + struct nd_region_desc ndr_desc; + + if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0) + continue; + + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.res = p; + ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + ndr_desc.numa_node = NUMA_NO_NODE; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) + goto err; + } + + return 0; + + err: + nvdimm_bus_unregister(nvdimm_bus); + dev_err(dev, "failed to register legacy persistent memory ranges\n"); + return -ENXIO; +} + +static struct platform_driver e820_pmem_driver = { + .probe = e820_pmem_probe, + .remove = e820_pmem_remove, + .driver = { + .name = "e820_pmem", + }, +}; + +static __init int e820_pmem_init(void) +{ + return platform_driver_register(&e820_pmem_driver); +} + +static __exit void e820_pmem_exit(void) +{ + platform_driver_unregister(&e820_pmem_driver); +} + +MODULE_ALIAS("platform:e820_pmem*"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +module_init(e820_pmem_init); +module_exit(e820_pmem_exit); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index fef0dd80d4ad..0955b2cb10fe 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/slab.h> +#include <linux/pmem.h> #include <linux/nd.h> #include "nd-core.h" #include "nd.h" @@ -76,22 +77,54 @@ static bool is_namespace_io(struct device *dev) return dev ? dev->type == &namespace_io_device_type : false; } +bool pmem_should_map_pages(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) + return false; + + if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags)) + return false; + + if (is_nd_pfn(dev) || is_nd_btt(dev)) + return false; + +#ifdef ARCH_MEMREMAP_PMEM + return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; +#else + return false; +#endif +} +EXPORT_SYMBOL(pmem_should_map_pages); + const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name) { struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - const char *suffix = ""; + const char *suffix = NULL; - if (ndns->claim && is_nd_btt(ndns->claim)) - suffix = "s"; + if (ndns->claim) { + if (is_nd_btt(ndns->claim)) + suffix = "s"; + else if (is_nd_pfn(ndns->claim)) + suffix = "m"; + else + dev_WARN_ONCE(&ndns->dev, 1, + "unknown claim type by %s\n", + dev_name(ndns->claim)); + } - if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) - sprintf(name, "pmem%d%s", nd_region->id, suffix); - else if (is_namespace_blk(&ndns->dev)) { + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) { + if (!suffix && pmem_should_map_pages(&ndns->dev)) + suffix = "m"; + sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : ""); + } else if (is_namespace_blk(&ndns->dev)) { struct nd_namespace_blk *nsblk; nsblk = to_nd_namespace_blk(&ndns->dev); - sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, + suffix ? suffix : ""); } else { return NULL; } @@ -100,6 +133,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, } EXPORT_SYMBOL(nvdimm_namespace_disk_name); +const u8 *nd_dev_to_uuid(struct device *dev) +{ + static const u8 null_uuid[16]; + + if (!dev) + return null_uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + return nsblk->uuid; + } else + return null_uuid; +} +EXPORT_SYMBOL(nd_dev_to_uuid); + static ssize_t nstype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1235,12 +1288,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = { struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; + struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; struct nd_namespace_common *ndns; resource_size_t size; - if (nd_btt) { - ndns = nd_btt->ndns; - if (!ndns) + if (nd_btt || nd_pfn) { + struct device *host = NULL; + + if (nd_btt) { + host = &nd_btt->dev; + ndns = nd_btt->ndns; + } else if (nd_pfn) { + host = &nd_pfn->dev; + ndns = nd_pfn->ndns; + } + + if (!ndns || !host) return ERR_PTR(-ENODEV); /* @@ -1251,12 +1314,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", - dev_name(&nd_btt->dev)); + dev_name(host)); return ERR_PTR(-EBUSY); } - if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev, + if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host, "host (%s) vs claim (%s) mismatch\n", - dev_name(&nd_btt->dev), + dev_name(host), dev_name(ndns->claim))) return ERR_PTR(-ENXIO); } else { diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index e1970c71ad1c..159aed532042 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region, int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); void get_ndd(struct nvdimm_drvdata *ndd); resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index c41f53e74277..417e521d299c 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -29,6 +29,13 @@ enum { ND_MAX_LANES = 256, SECTOR_SHIFT = 9, INT_LBASIZE_ALIGNMENT = 64, +#if IS_ENABLED(CONFIG_NVDIMM_PFN) + ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE, + ND_PFN_MASK = ND_PFN_ALIGN - 1, +#else + ND_PFN_ALIGN = 0, + ND_PFN_MASK = 0, +#endif }; struct nvdimm_drvdata { @@ -92,8 +99,11 @@ struct nd_region { struct device dev; struct ida ns_ida; struct ida btt_ida; + struct ida pfn_ida; + unsigned long flags; struct device *ns_seed; struct device *btt_seed; + struct device *pfn_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -133,6 +143,22 @@ struct nd_btt { int id; }; +enum nd_pfn_mode { + PFN_MODE_NONE, + PFN_MODE_RAM, + PFN_MODE_PMEM, +}; + +struct nd_pfn { + int id; + u8 *uuid; + struct device dev; + unsigned long npfns; + enum nd_pfn_mode mode; + struct nd_pfn_sb *pfn_sb; + struct nd_namespace_common *ndns; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -159,14 +185,19 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); struct nd_btt *to_nd_btt(struct device *dev); -struct btt_sb; -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb); + +struct nd_gen_sb { + char reserved[SZ_4K - 8]; + __le64 checksum; +}; + +u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else -static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) { return -ENODEV; } @@ -180,8 +211,36 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) { return NULL; } +#endif +struct nd_pfn *to_nd_pfn(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_PFN) +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); +bool is_nd_pfn(struct device *dev); +struct device *nd_pfn_create(struct nd_region *nd_region); +int nd_pfn_validate(struct nd_pfn *nd_pfn); +#else +static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + return -ENODEV; +} + +static inline bool is_nd_pfn(struct device *dev) +{ + return false; +} + +static inline struct device *nd_pfn_create(struct nd_region *nd_region) +{ + return NULL; +} + +static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + return -ENODEV; +} #endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); @@ -217,4 +276,6 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) } void nd_iostat_end(struct bio *bio, unsigned long start); resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); +const u8 *nd_dev_to_uuid(struct device *dev); +bool pmem_should_map_pages(struct device *dev); #endif /* __ND_H__ */ diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h new file mode 100644 index 000000000000..cc243754acef --- /dev/null +++ b/drivers/nvdimm/pfn.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __NVDIMM_PFN_H +#define __NVDIMM_PFN_H + +#include <linux/types.h> + +#define PFN_SIG_LEN 16 +#define PFN_SIG "NVDIMM_PFN_INFO\0" + +struct nd_pfn_sb { + u8 signature[PFN_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le64 dataoff; + __le64 npfns; + __le32 mode; + u8 padding[4012]; + __le64 checksum; +}; +#endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c new file mode 100644 index 000000000000..3fd7d0d81a47 --- /dev/null +++ b/drivers/nvdimm/pfn_devs.c @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "pfn.h" +#include "nd.h" + +static void nd_pfn_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_pfn); +} + +static struct device_type nd_pfn_device_type = { + .name = "nd_pfn", + .release = nd_pfn_release, +}; + +bool is_nd_pfn(struct device *dev) +{ + return dev ? dev->type == &nd_pfn_device_type : false; +} +EXPORT_SYMBOL(is_nd_pfn); + +struct nd_pfn *to_nd_pfn(struct device *dev) +{ + struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); + + WARN_ON(!is_nd_pfn(dev)); + return nd_pfn; +} +EXPORT_SYMBOL(to_nd_pfn); + +static ssize_t mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + switch (nd_pfn->mode) { + case PFN_MODE_RAM: + return sprintf(buf, "ram\n"); + case PFN_MODE_PMEM: + return sprintf(buf, "pmem\n"); + default: + return sprintf(buf, "none\n"); + } +} + +static ssize_t mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc = 0; + + device_lock(dev); + nvdimm_bus_lock(dev); + if (dev->driver) + rc = -EBUSY; + else { + size_t n = len - 1; + + if (strncmp(buf, "pmem\n", n) == 0 + || strncmp(buf, "pmem", n) == 0) { + /* TODO: allocate from PMEM support */ + rc = -ENOTTY; + } else if (strncmp(buf, "ram\n", n) == 0 + || strncmp(buf, "ram", n) == 0) + nd_pfn->mode = PFN_MODE_RAM; + else if (strncmp(buf, "none\n", n) == 0 + || strncmp(buf, "none", n) == 0) + nd_pfn->mode = PFN_MODE_NONE; + else + rc = -EINVAL; + } + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(mode); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + if (nd_pfn->uuid) + return sprintf(buf, "%pUb\n", nd_pfn->uuid); + return sprintf(buf, "\n"); +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t namespace_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + rc = sprintf(buf, "%s\n", nd_pfn->ndns + ? dev_name(&nd_pfn->ndns->dev) : ""); + nvdimm_bus_unlock(dev); + return rc; +} + +static ssize_t namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + device_lock(dev); + rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(namespace); + +static struct attribute *nd_pfn_attributes[] = { + &dev_attr_mode.attr, + &dev_attr_namespace.attr, + &dev_attr_uuid.attr, + NULL, +}; + +static struct attribute_group nd_pfn_attribute_group = { + .attrs = nd_pfn_attributes, +}; + +static const struct attribute_group *nd_pfn_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct device *__nd_pfn_create(struct nd_region *nd_region, + u8 *uuid, enum nd_pfn_mode mode, + struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn; + struct device *dev; + + /* we can only create pages for contiguous ranged of pmem */ + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); + if (!nd_pfn) + return NULL; + + nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_pfn); + return NULL; + } + + nd_pfn->mode = mode; + if (uuid) + uuid = kmemdup(uuid, 16, GFP_KERNEL); + nd_pfn->uuid = uuid; + dev = &nd_pfn->dev; + dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); + dev->parent = &nd_region->dev; + dev->type = &nd_pfn_device_type; + dev->groups = nd_pfn_attribute_groups; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); + return NULL; + } + return dev; +} + +struct device *nd_pfn_create(struct nd_region *nd_region) +{ + struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, + NULL); + + if (dev) + __nd_device_register(dev); + return dev; +} + +int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_io *nsio; + u64 checksum, offset; + + if (!pfn_sb || !ndns) + return -ENODEV; + + if (!is_nd_pmem(nd_pfn->dev.parent)) + return -ENODEV; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN) + return -ENODEV; + + if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) + return -ENXIO; + + if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) + return -ENODEV; + + checksum = le64_to_cpu(pfn_sb->checksum); + pfn_sb->checksum = 0; + if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) + return -ENODEV; + pfn_sb->checksum = cpu_to_le64(checksum); + + switch (le32_to_cpu(pfn_sb->mode)) { + case PFN_MODE_RAM: + break; + case PFN_MODE_PMEM: + /* TODO: allocate from PMEM support */ + return -ENOTTY; + default: + return -ENXIO; + } + + if (!nd_pfn->uuid) { + /* from probe we allocate */ + nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); + if (!nd_pfn->uuid) + return -ENOMEM; + } else { + /* from init we validate */ + if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) + return -EINVAL; + } + + /* + * These warnings are verbose because they can only trigger in + * the case where the physical address alignment of the + * namespace has changed since the pfn superblock was + * established. + */ + offset = le64_to_cpu(pfn_sb->dataoff); + nsio = to_nd_namespace_io(&ndns->dev); + if (nsio->res.start & ND_PFN_MASK) { + dev_err(&nd_pfn->dev, + "init failed: %s not section aligned\n", + dev_name(&ndns->dev)); + return -EBUSY; + } else if (offset >= resource_size(&nsio->res)) { + dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", + dev_name(&ndns->dev)); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL(nd_pfn_validate); + +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + int rc; + struct device *dev; + struct nd_pfn *nd_pfn; + struct nd_pfn_sb *pfn_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dev) + return -ENOMEM; + dev_set_drvdata(dev, drvdata); + pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + nd_pfn = to_nd_pfn(dev); + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__, + rc == 0 ? dev_name(dev) : "<none>"); + if (rc < 0) { + __nd_detach_ndns(dev, &nd_pfn->ndns); + put_device(dev); + } else + __nd_device_register(&nd_pfn->dev); + + return rc; +} +EXPORT_SYMBOL(nd_pfn_probe); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4c079d5cb539..b9525385c0dc 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,18 +21,24 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/module.h> +#include <linux/memory_hotplug.h> #include <linux/moduleparam.h> +#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/pmem.h> #include <linux/nd.h> +#include "pfn.h" #include "nd.h" struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; + struct nd_namespace_common *ndns; /* One contiguous memory region per device */ phys_addr_t phys_addr; + /* when non-zero this device is hosting a 'pfn' instance */ + phys_addr_t data_offset; void __pmem *virt_addr; size_t size; }; @@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, sector_t sector) { void *mem = kmap_atomic(page); - size_t pmem_off = sector << 9; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { @@ -92,19 +98,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; - size_t offset = sector << 9; - - if (!pmem) - return -ENODEV; + resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t size; + + if (pmem->data_offset) { + /* + * Limit the direct_access() size to what is covered by + * the memmap + */ + size = (pmem->size - offset) & ~ND_PFN_MASK; + } else + size = pmem->size - offset; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ - *kaddr = (void __force *) pmem->virt_addr + offset; + *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; - return pmem->size - offset; + return size; } static const struct block_device_operations pmem_fops = { @@ -119,27 +132,33 @@ static struct pmem_device *pmem_alloc(struct device *dev, { struct pmem_device *pmem; - pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return ERR_PTR(-ENOMEM); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_pmem_api()) + if (!arch_has_wmb_pmem()) dev_warn(dev, "unable to guarantee persistence of writes\n"); - if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, + dev_name(dev))) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - kfree(pmem); return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) { - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); - return ERR_PTR(-ENXIO); + if (pmem_should_map_pages(dev)) { + void *addr = devm_memremap_pages(dev, res); + + if (IS_ERR(addr)) + return addr; + pmem->virt_addr = (void __pmem *) addr; + } else { + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, + pmem->size); + if (!pmem->virt_addr) + return ERR_PTR(-ENXIO); } return pmem; @@ -147,13 +166,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, static void pmem_detach_disk(struct pmem_device *pmem) { + if (!pmem->pmem_disk) + return; + del_gendisk(pmem->pmem_disk); put_disk(pmem->pmem_disk); blk_cleanup_queue(pmem->pmem_queue); } -static int pmem_attach_disk(struct nd_namespace_common *ndns, - struct pmem_device *pmem) +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns, struct pmem_device *pmem) { struct gendisk *disk; @@ -162,6 +184,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); + blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); @@ -179,8 +202,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); - disk->driverfs_dev = &ndns->dev; - set_capacity(disk, pmem->size >> 9); + disk->driverfs_dev = dev; + set_capacity(disk, (pmem->size - pmem->data_offset) / 512); pmem->pmem_disk = disk; add_disk(disk); @@ -209,11 +232,152 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } -static void pmem_free(struct pmem_device *pmem) +static int nd_pfn_init(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_region *nd_region; + unsigned long npfns; + phys_addr_t offset; + u64 checksum; + int rc; + + if (!pfn_sb) + return -ENOMEM; + + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + if (rc == 0 || rc == -EBUSY) + return rc; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN + || pmem->phys_addr & ND_PFN_MASK) + return -ENODEV; + + nd_region = to_nd_region(nd_pfn->dev.parent); + if (nd_region->ro) { + dev_info(&nd_pfn->dev, + "%s is read-only, unable to init metadata\n", + dev_name(&nd_region->dev)); + goto err; + } + + memset(pfn_sb, 0, sizeof(*pfn_sb)); + npfns = (pmem->size - SZ_8K) / SZ_4K; + /* + * Note, we use 64 here for the standard size of struct page, + * debugging options may cause it to be larger in which case the + * implementation will limit the pfns advertised through + * ->direct_access() to those that are included in the memmap. + */ + if (nd_pfn->mode == PFN_MODE_PMEM) + offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE); + else if (nd_pfn->mode == PFN_MODE_RAM) + offset = SZ_8K; + else + goto err; + + npfns = (pmem->size - offset) / SZ_4K; + pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + pfn_sb->dataoff = cpu_to_le64(offset); + pfn_sb->npfns = cpu_to_le64(npfns); + memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); + memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + pfn_sb->version_major = cpu_to_le16(1); + checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); + pfn_sb->checksum = cpu_to_le64(checksum); + + rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)); + if (rc) + goto err; + + return 0; + err: + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + return -ENXIO; +} + +static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct pmem_device *pmem; + + /* free pmem disk */ + pmem = dev_get_drvdata(&nd_pfn->dev); + pmem_detach_disk(pmem); + + /* release nd_pfn resources */ + kfree(nd_pfn->pfn_sb); + nd_pfn->pfn_sb = NULL; + + return 0; +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) { - memunmap_pmem(pmem->virt_addr); - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct device *dev = &nd_pfn->dev; + struct vmem_altmap *altmap; + struct nd_region *nd_region; + struct nd_pfn_sb *pfn_sb; + struct pmem_device *pmem; + phys_addr_t offset; + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + nd_region = to_nd_region(dev->parent); + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + + if (PAGE_SIZE != SZ_4K) { + dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n"); + return -ENXIO; + } + if (nsio->res.start & ND_PFN_MASK) { + dev_err(dev, "%s not memory hotplug section aligned\n", + dev_name(&ndns->dev)); + return -ENXIO; + } + + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); + if (nd_pfn->mode == PFN_MODE_RAM) { + if (offset != SZ_8K) + return -EINVAL; + nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); + altmap = NULL; + } else { + rc = -ENXIO; + goto err; + } + + /* establish pfn range for lookup, and switch to direct map */ + pmem = dev_get_drvdata(dev); + memunmap_pmem(dev, pmem->virt_addr); + pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res); + if (IS_ERR(pmem->virt_addr)) { + rc = PTR_ERR(pmem->virt_addr); + goto err; + } + + /* attach pmem disk in "pfn-mode" */ + pmem->data_offset = offset; + rc = pmem_attach_disk(dev, ndns, pmem); + if (rc) + goto err; + + return rc; + err: + nvdimm_namespace_detach_pfn(ndns); + return rc; } static int nd_pmem_probe(struct device *dev) @@ -222,7 +386,6 @@ static int nd_pmem_probe(struct device *dev) struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct pmem_device *pmem; - int rc; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -233,18 +396,27 @@ static int nd_pmem_probe(struct device *dev) if (IS_ERR(pmem)) return PTR_ERR(pmem); + pmem->ndns = ndns; dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) - rc = nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(ndns, pmem) == 0) { + return nvdimm_namespace_attach_btt(ndns); + + if (is_nd_pfn(dev)) + return nvdimm_namespace_attach_pfn(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) { /* we'll come back as btt-pmem */ - rc = -ENXIO; - } else - rc = pmem_attach_disk(ndns, pmem); - if (rc) - pmem_free(pmem); - return rc; + return -ENXIO; + } + + if (nd_pfn_probe(ndns, pmem) == 0) { + /* we'll come back as pfn-pmem */ + return -ENXIO; + } + + return pmem_attach_disk(dev, ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -252,10 +424,11 @@ static int nd_pmem_remove(struct device *dev) struct pmem_device *pmem = dev_get_drvdata(dev); if (is_nd_btt(dev)) - nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + nvdimm_namespace_detach_btt(pmem->ndns); + else if (is_nd_pfn(dev)) + nvdimm_namespace_detach_pfn(pmem->ndns); else pmem_detach_disk(pmem); - pmem_free(pmem); return 0; } diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index f28f78ccff19..7da63eac78ee 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev) return -ENODEV; nd_region->btt_seed = nd_btt_create(nd_region); + nd_region->pfn_seed = nd_pfn_create(nd_region); if (err == 0) return 0; @@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev) nvdimm_bus_lock(dev); nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; + nd_region->pfn_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 7384455792bf..529f3f02e7b2 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev, } static DEVICE_ATTR_RO(btt_seed); +static ssize_t pfn_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->pfn_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(pfn_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_nstype.attr, &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, + &dev_attr_pfn_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -740,10 +758,12 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->provider_data = ndr_desc->provider_data; nd_region->nd_set = ndr_desc->nd_set; nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); + ida_init(&nd_region->pfn_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8177f3b04491..0b2be174d981 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -326,8 +326,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; dev->rom_base_reg = rom; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | - IORESOURCE_READONLY | IORESOURCE_CACHEABLE | - IORESOURCE_SIZEALIGN; + IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; __pci_read_base(dev, pci_bar_mem32, res, rom); } } diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 9357aa779048..7ad3295752ef 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -97,8 +97,6 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) /* ??? rule->flags restricted to 8 bits, all tests bogus ??? */ if (!(rule->flags & IORESOURCE_MEM_WRITEABLE)) res->flags |= IORESOURCE_READONLY; - if (rule->flags & IORESOURCE_MEM_CACHEABLE) - res->flags |= IORESOURCE_CACHEABLE; if (rule->flags & IORESOURCE_MEM_RANGELENGTH) res->flags |= IORESOURCE_RANGELENGTH; if (rule->flags & IORESOURCE_MEM_SHADOWABLE) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 2b744fbba68e..5ed44fe21380 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size); + void __pmem **kaddr, unsigned long *pfn); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -881,18 +881,20 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; + void *addr; dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; dev_sz = dev_info->end - dev_info->start; offset = secnum * 512; - *kaddr = (void *) (dev_info->start + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + addr = (void *) (dev_info->start + offset); + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; + *kaddr = (void __pmem *) addr; return dev_sz - offset; } diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 31e8576cbaab..f6c336b05d5b 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -100,12 +100,7 @@ static int asd_map_memio(struct asd_ha_struct *asd_ha) pci_name(asd_ha->pcidev)); goto Err; } - if (io_handle->flags & IORESOURCE_CACHEABLE) - io_handle->addr = ioremap(io_handle->start, - io_handle->len); - else - io_handle->addr = ioremap_nocache(io_handle->start, - io_handle->len); + io_handle->addr = ioremap(io_handle->start, io_handle->len); if (!io_handle->addr) { asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1, pci_name(asd_ha->pcidev)); diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 6ac74fb4ea9a..333db5953607 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -259,10 +259,7 @@ static bool arcmsr_remap_pciregion(struct AdapterControlBlock *acb) addr = (unsigned long)pci_resource_start(pdev, 0); range = pci_resource_len(pdev, 0); flags = pci_resource_flags(pdev, 0); - if (flags & IORESOURCE_CACHEABLE) - mem_base0 = ioremap(addr, range); - else - mem_base0 = ioremap_nocache(addr, range); + mem_base0 = ioremap(addr, range); if (!mem_base0) { pr_notice("arcmsr%d: memory mapping region fail\n", acb->host->host_no); diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index f466a6aa8830..e2d555c1bffc 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -324,13 +324,9 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex) goto err_out; res_flag_ex = pci_resource_flags(pdev, bar_ex); - if (res_flag_ex & IORESOURCE_MEM) { - if (res_flag_ex & IORESOURCE_CACHEABLE) - mvi->regs_ex = ioremap(res_start, res_len); - else - mvi->regs_ex = ioremap_nocache(res_start, - res_len); - } else + if (res_flag_ex & IORESOURCE_MEM) + mvi->regs_ex = ioremap(res_start, res_len); + else mvi->regs_ex = (void *)res_start; if (!mvi->regs_ex) goto err_out; @@ -345,10 +341,7 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex) } res_flag = pci_resource_flags(pdev, bar); - if (res_flag & IORESOURCE_CACHEABLE) - mvi->regs = ioremap(res_start, res_len); - else - mvi->regs = ioremap_nocache(res_start, res_len); + mvi->regs = ioremap(res_start, res_len); if (!mvi->regs) { if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM)) diff --git a/drivers/scsi/sun3x_esp.c b/drivers/scsi/sun3x_esp.c index e26e81de7c45..d50c5ed8f428 100644 --- a/drivers/scsi/sun3x_esp.c +++ b/drivers/scsi/sun3x_esp.c @@ -12,9 +12,9 @@ #include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <linux/interrupt.h> +#include <linux/io.h> #include <asm/sun3x.h> -#include <asm/io.h> #include <asm/dma.h> #include <asm/dvma.h> diff --git a/drivers/staging/comedi/drivers/ii_pci20kc.c b/drivers/staging/comedi/drivers/ii_pci20kc.c index 0768bc42a5db..14ef1f67dd42 100644 --- a/drivers/staging/comedi/drivers/ii_pci20kc.c +++ b/drivers/staging/comedi/drivers/ii_pci20kc.c @@ -28,6 +28,7 @@ */ #include <linux/module.h> +#include <linux/io.h> #include "../comedidev.h" /* diff --git a/drivers/staging/unisys/visorbus/visorchannel.c b/drivers/staging/unisys/visorbus/visorchannel.c index 6da7e49a6627..2693c46afdc0 100644 --- a/drivers/staging/unisys/visorbus/visorchannel.c +++ b/drivers/staging/unisys/visorbus/visorchannel.c @@ -20,6 +20,7 @@ */ #include <linux/uuid.h> +#include <linux/io.h> #include "version.h" #include "visorbus.h" @@ -35,7 +36,7 @@ static const uuid_le spar_video_guid = SPAR_CONSOLEVIDEO_CHANNEL_PROTOCOL_GUID; struct visorchannel { u64 physaddr; ulong nbytes; - void __iomem *mapped; + void *mapped; bool requested; struct channel_header chan_hdr; uuid_le guid; @@ -92,7 +93,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes, } } - channel->mapped = ioremap_cache(physaddr, size); + channel->mapped = memremap(physaddr, size, MEMREMAP_WB); if (!channel->mapped) { release_mem_region(physaddr, size); goto cleanup; @@ -112,7 +113,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes, if (uuid_le_cmp(guid, NULL_UUID_LE) == 0) guid = channel->chan_hdr.chtype; - iounmap(channel->mapped); + memunmap(channel->mapped); if (channel->requested) release_mem_region(channel->physaddr, channel->nbytes); channel->mapped = NULL; @@ -125,7 +126,8 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes, } } - channel->mapped = ioremap_cache(channel->physaddr, channel_bytes); + channel->mapped = memremap(channel->physaddr, channel_bytes, + MEMREMAP_WB); if (!channel->mapped) { release_mem_region(channel->physaddr, channel_bytes); goto cleanup; @@ -166,7 +168,7 @@ visorchannel_destroy(struct visorchannel *channel) if (!channel) return; if (channel->mapped) { - iounmap(channel->mapped); + memunmap(channel->mapped); if (channel->requested) release_mem_region(channel->physaddr, channel->nbytes); } @@ -240,7 +242,7 @@ visorchannel_read(struct visorchannel *channel, ulong offset, if (offset + nbytes > channel->nbytes) return -EIO; - memcpy_fromio(local, channel->mapped + offset, nbytes); + memcpy(local, channel->mapped + offset, nbytes); return 0; } @@ -262,7 +264,7 @@ visorchannel_write(struct visorchannel *channel, ulong offset, local, copy_size); } - memcpy_toio(channel->mapped + offset, local, nbytes); + memcpy(channel->mapped + offset, local, nbytes); return 0; } diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c index 4b76cb441ed4..94419c36d2af 100644 --- a/drivers/staging/unisys/visorbus/visorchipset.c +++ b/drivers/staging/unisys/visorbus/visorchipset.c @@ -118,7 +118,7 @@ static struct visorchannel *controlvm_channel; /* Manages the request payload in the controlvm channel */ struct visor_controlvm_payload_info { - u8 __iomem *ptr; /* pointer to base address of payload pool */ + u8 *ptr; /* pointer to base address of payload pool */ u64 offset; /* offset from beginning of controlvm * channel to beginning of payload * pool */ u32 bytes; /* number of bytes in payload pool */ @@ -400,21 +400,22 @@ parser_init_byte_stream(u64 addr, u32 bytes, bool local, bool *retry) p = __va((unsigned long) (addr)); memcpy(ctx->data, p, bytes); } else { - void __iomem *mapping; + void *mapping; if (!request_mem_region(addr, bytes, "visorchipset")) { rc = NULL; goto cleanup; } - mapping = ioremap_cache(addr, bytes); + mapping = memremap(addr, bytes, MEMREMAP_WB); if (!mapping) { release_mem_region(addr, bytes); rc = NULL; goto cleanup; } - memcpy_fromio(ctx->data, mapping, bytes); + memcpy(ctx->data, mapping, bytes); release_mem_region(addr, bytes); + memunmap(mapping); } ctx->byte_stream = true; @@ -1327,7 +1328,7 @@ static int initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes, struct visor_controlvm_payload_info *info) { - u8 __iomem *payload = NULL; + u8 *payload = NULL; int rc = CONTROLVM_RESP_SUCCESS; if (!info) { @@ -1339,7 +1340,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes, rc = -CONTROLVM_RESP_ERROR_PAYLOAD_INVALID; goto cleanup; } - payload = ioremap_cache(phys_addr + offset, bytes); + payload = memremap(phys_addr + offset, bytes, MEMREMAP_WB); if (!payload) { rc = -CONTROLVM_RESP_ERROR_IOREMAP_FAILED; goto cleanup; @@ -1352,7 +1353,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes, cleanup: if (rc < 0) { if (payload) { - iounmap(payload); + memunmap(payload); payload = NULL; } } @@ -1363,7 +1364,7 @@ static void destroy_controlvm_payload_info(struct visor_controlvm_payload_info *info) { if (info->ptr) { - iounmap(info->ptr); + memunmap(info->ptr); info->ptr = NULL; } memset(info, 0, sizeof(struct visor_controlvm_payload_info)); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index cfbb9d728e31..271d12137649 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -36,11 +36,11 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> +#include <linux/io.h> #ifdef CONFIG_SPARC #include <linux/sunserialcore.h> #endif -#include <asm/io.h> #include <asm/irq.h> #include "8250.h" diff --git a/drivers/video/fbdev/ocfb.c b/drivers/video/fbdev/ocfb.c index de9819660ca0..c9293aea8ec3 100644 --- a/drivers/video/fbdev/ocfb.c +++ b/drivers/video/fbdev/ocfb.c @@ -325,7 +325,6 @@ static int ocfb_probe(struct platform_device *pdev) dev_err(&pdev->dev, "I/O resource request failed\n"); return -ENXIO; } - res->flags &= ~IORESOURCE_CACHEABLE; fbdev->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(fbdev->regs)) return PTR_ERR(fbdev->regs); diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c index 83433cb0dfba..96aa46dc696c 100644 --- a/drivers/video/fbdev/s1d13xxxfb.c +++ b/drivers/video/fbdev/s1d13xxxfb.c @@ -32,8 +32,7 @@ #include <linux/spinlock_types.h> #include <linux/spinlock.h> #include <linux/slab.h> - -#include <asm/io.h> +#include <linux/io.h> #include <video/s1d13xxxfb.h> diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 735355b0e023..7df4228e25f0 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -64,6 +64,7 @@ #include <linux/fb.h> #include <linux/init.h> #include <linux/ioport.h> +#include <linux/io.h> #include <asm/grfioctl.h> /* for HP-UX compatibility */ #include <asm/uaccess.h> |