/* * Persistent Memory Driver * * Copyright (c) 2014-2015, Intel Corporation. * Copyright (c) 2015, Christoph Hellwig . * Copyright (c) 2015, Boaz Harrosh . * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #include #include #include #include #include #include #include #include #include #include #include "nd.h" struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; /* One contiguous memory region per device */ phys_addr_t phys_addr; void __pmem *virt_addr; size_t size; }; static int pmem_major; static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { void *mem = kmap_atomic(page); size_t pmem_off = sector << 9; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { memcpy_from_pmem(mem + off, pmem_addr, len); flush_dcache_page(page); } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); } kunmap_atomic(mem); } static void pmem_make_request(struct request_queue *q, struct bio *bio) { bool do_acct; unsigned long start; struct bio_vec bvec; struct bvec_iter iter; struct block_device *bdev = bio->bi_bdev; struct pmem_device *pmem = bdev->bd_disk->private_data; do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, bio_data_dir(bio), iter.bi_sector); if (do_acct) nd_iostat_end(bio, start); if (bio_data_dir(bio)) wmb_pmem(); bio_endio(bio, 0); } static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct page *page, int rw) { struct pmem_device *pmem = bdev->bd_disk->private_data; pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); page_endio(page, rw & WRITE, 0); return 0; } static long pmem_direct_access(struct block_device *bdev, sector_t sector, void **kaddr, unsigned long *pfn, long size) { struct pmem_device *pmem = bdev->bd_disk->private_data; size_t offset = sector << 9; if (!pmem) return -ENODEV; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ *kaddr = (void __force *) pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; return pmem->size - offset; } static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, .rw_page = pmem_rw_page, .direct_access = pmem_direct_access, .revalidate_disk = nvdimm_revalidate_disk, }; static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, int id) { struct pmem_device *pmem; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return ERR_PTR(-ENOMEM); pmem->phys_addr = res->start; pmem->size = resource_size(res); if (!arch_has_pmem_api()) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, dev_name(dev))) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); return ERR_PTR(-EBUSY); } pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size); if (!pmem->virt_addr) return ERR_PTR(-ENXIO); return pmem; } static void pmem_detach_disk(struct pmem_device *pmem) { del_gendisk(pmem->pmem_disk); put_disk(pmem->pmem_disk); blk_cleanup_queue(pmem->pmem_queue); } static int pmem_attach_disk(struct nd_namespace_common *ndns, struct pmem_device *pmem) { struct gendisk *disk; pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); if (!pmem->pmem_queue) return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); disk = alloc_disk(0); if (!disk) { blk_cleanup_queue(pmem->pmem_queue); return -ENOMEM; } disk->major = pmem_major; disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); disk->driverfs_dev = &ndns->dev; set_capacity(disk, pmem->size >> 9); pmem->pmem_disk = disk; add_disk(disk); revalidate_disk(disk); return 0; } static int pmem_rw_bytes(struct nd_namespace_common *ndns, resource_size_t offset, void *buf, size_t size, int rw) { struct pmem_device *pmem = dev_get_drvdata(ndns->claim); if (unlikely(offset + size > pmem->size)) { dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); return -EFAULT; } if (rw == READ) memcpy_from_pmem(buf, pmem->virt_addr + offset, size); else { memcpy_to_pmem(pmem->virt_addr + offset, buf, size); wmb_pmem(); } return 0; } static int nd_pmem_probe(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct pmem_device *pmem; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); nsio = to_nd_namespace_io(&ndns->dev); pmem = pmem_alloc(dev, &nsio->res, nd_region->id); if (IS_ERR(pmem)) return PTR_ERR(pmem); dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); if (nd_btt_probe(ndns, pmem) == 0) /* we'll come back as btt-pmem */ return -ENXIO; return pmem_attach_disk(ndns, pmem); } static int nd_pmem_remove(struct device *dev) { struct pmem_device *pmem = dev_get_drvdata(dev); if (is_nd_btt(dev)) nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); else pmem_detach_disk(pmem); return 0; } MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, .drv = { .name = "nd_pmem", }, .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, }; static int __init pmem_init(void) { int error; pmem_major = register_blkdev(0, "pmem"); if (pmem_major < 0) return pmem_major; error = nd_driver_register(&nd_pmem_driver); if (error) { unregister_blkdev(pmem_major, "pmem"); return error; } return 0; } module_init(pmem_init); static void pmem_exit(void) { driver_unregister(&nd_pmem_driver.drv); unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); MODULE_AUTHOR("Ross Zwisler "); MODULE_LICENSE("GPL v2");