diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-09 20:39:57 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-09 20:39:57 +0200 |
commit | 2e5fd489a4e5fcc97b035c03ace724c1d481a4c1 (patch) | |
tree | de1705048e3d2c71c74b7547e149b2973ebb29e1 | |
parent | Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (diff) | |
parent | Merge branch 'for-5.15/fsdax-cleanups' into for-5.15/libnvdimm (diff) | |
download | linux-2e5fd489a4e5fcc97b035c03ace724c1d481a4c1.tar.xz linux-2e5fd489a4e5fcc97b035c03ace724c1d481a4c1.zip |
Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Fix a race condition in the teardown path of raw mode pmem
namespaces.
- Cleanup the code that filesystems use to detect filesystem-dax
capabilities of their underlying block device.
* tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
dax: remove bdev_dax_supported
xfs: factor out a xfs_buftarg_is_dax helper
dax: stub out dax_supported for !CONFIG_FS_DAX
dax: remove __generic_fsdax_supported
dax: move the dax_read_lock() locking into dax_supported
dax: mark dax_get_by_host static
dm: use fs_dax_get_by_bdev instead of dax_get_by_host
dax: stop using bdevname
fsdax: improve the FS_DAX Kconfig description and help text
libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind
-rw-r--r-- | drivers/dax/super.c | 191 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 9 | ||||
-rw-r--r-- | drivers/md/dm.c | 2 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 4 | ||||
-rw-r--r-- | fs/Kconfig | 21 | ||||
-rw-r--r-- | fs/erofs/super.c | 2 | ||||
-rw-r--r-- | fs/ext2/super.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 16 | ||||
-rw-r--r-- | include/linux/dax.h | 41 |
10 files changed, 120 insertions, 172 deletions
diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 44736cbd446e..fc89e91beea7 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -17,6 +17,24 @@ #include <linux/fs.h> #include "dax-private.h" +/** + * struct dax_device - anchor object for dax services + * @inode: core vfs + * @cdev: optional character interface for "device dax" + * @host: optional name for lookups where the device path is not available + * @private: dax driver private data + * @flags: state and boolean properties + */ +struct dax_device { + struct hlist_node list; + struct inode inode; + struct cdev cdev; + const char *host; + void *private; + unsigned long flags; + const struct dax_operations *ops; +}; + static dev_t dax_devt; DEFINE_STATIC_SRCU(dax_srcu); static struct vfsmount *dax_mnt; @@ -40,6 +58,42 @@ void dax_read_unlock(int id) } EXPORT_SYMBOL_GPL(dax_read_unlock); +static int dax_host_hash(const char *host) +{ + return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; +} + +/** + * dax_get_by_host() - temporary lookup mechanism for filesystem-dax + * @host: alternate name for the device registered by a dax driver + */ +static struct dax_device *dax_get_by_host(const char *host) +{ + struct dax_device *dax_dev, *found = NULL; + int hash, id; + + if (!host) + return NULL; + + hash = dax_host_hash(host); + + id = dax_read_lock(); + spin_lock(&dax_host_lock); + hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { + if (!dax_alive(dax_dev) + || strcmp(host, dax_dev->host) != 0) + continue; + + if (igrab(&dax_dev->inode)) + found = dax_dev; + break; + } + spin_unlock(&dax_host_lock); + dax_read_unlock(id); + + return found; +} + #ifdef CONFIG_BLOCK #include <linux/blkdev.h> @@ -65,15 +119,13 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) return dax_get_by_host(bdev->bd_disk->disk_name); } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); -#endif -bool __generic_fsdax_supported(struct dax_device *dax_dev, +bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { bool dax_enabled = false; pgoff_t pgoff, pgoff_end; - char buf[BDEVNAME_SIZE]; void *kaddr, *end_kaddr; pfn_t pfn, end_pfn; sector_t last_page; @@ -81,29 +133,25 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, int err, id; if (blocksize != PAGE_SIZE) { - pr_info("%s: error: unsupported blocksize for dax\n", - bdevname(bdev, buf)); + pr_info("%pg: error: unsupported blocksize for dax\n", bdev); return false; } if (!dax_dev) { - pr_debug("%s: error: dax unsupported by block device\n", - bdevname(bdev, buf)); + pr_debug("%pg: error: dax unsupported by block device\n", bdev); return false; } err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); if (err) { - pr_info("%s: error: unaligned partition for dax\n", - bdevname(bdev, buf)); + pr_info("%pg: error: unaligned partition for dax\n", bdev); return false; } last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); if (err) { - pr_info("%s: error: unaligned partition for dax\n", - bdevname(bdev, buf)); + pr_info("%pg: error: unaligned partition for dax\n", bdev); return false; } @@ -112,8 +160,8 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); if (len < 1 || len2 < 1) { - pr_info("%s: error: dax access failed (%ld)\n", - bdevname(bdev, buf), len < 1 ? len : len2); + pr_info("%pg: error: dax access failed (%ld)\n", + bdev, len < 1 ? len : len2); dax_read_unlock(id); return false; } @@ -147,57 +195,32 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, dax_read_unlock(id); if (!dax_enabled) { - pr_info("%s: error: dax support not enabled\n", - bdevname(bdev, buf)); + pr_info("%pg: error: dax support not enabled\n", bdev); return false; } return true; } -EXPORT_SYMBOL_GPL(__generic_fsdax_supported); +EXPORT_SYMBOL_GPL(generic_fsdax_supported); -/** - * __bdev_dax_supported() - Check if the device supports dax for filesystem - * @bdev: block device to check - * @blocksize: The block size of the device - * - * This is a library function for filesystems to check if the block device - * can be mounted with dax option. - * - * Return: true if supported, false if unsupported - */ -bool __bdev_dax_supported(struct block_device *bdev, int blocksize) +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len) { - struct dax_device *dax_dev; - struct request_queue *q; - char buf[BDEVNAME_SIZE]; - bool ret; + bool ret = false; int id; - q = bdev_get_queue(bdev); - if (!q || !blk_queue_dax(q)) { - pr_debug("%s: error: request queue doesn't support dax\n", - bdevname(bdev, buf)); - return false; - } - - dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); - if (!dax_dev) { - pr_debug("%s: error: device does not support dax\n", - bdevname(bdev, buf)); + if (!dax_dev) return false; - } id = dax_read_lock(); - ret = dax_supported(dax_dev, bdev, blocksize, 0, - i_size_read(bdev->bd_inode) / 512); + if (dax_alive(dax_dev) && dax_dev->ops->dax_supported) + ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, + start, len); dax_read_unlock(id); - - put_dax(dax_dev); - return ret; } -EXPORT_SYMBOL_GPL(__bdev_dax_supported); -#endif +EXPORT_SYMBOL_GPL(dax_supported); +#endif /* CONFIG_FS_DAX */ +#endif /* CONFIG_BLOCK */ enum dax_device_flags { /* !alive + rcu grace period == no new operations / mappings */ @@ -208,24 +231,6 @@ enum dax_device_flags { DAXDEV_SYNC, }; -/** - * struct dax_device - anchor object for dax services - * @inode: core vfs - * @cdev: optional character interface for "device dax" - * @host: optional name for lookups where the device path is not available - * @private: dax driver private data - * @flags: state and boolean properties - */ -struct dax_device { - struct hlist_node list; - struct inode inode; - struct cdev cdev; - const char *host; - void *private; - unsigned long flags; - const struct dax_operations *ops; -}; - static ssize_t write_cache_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -323,19 +328,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, } EXPORT_SYMBOL_GPL(dax_direct_access); -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len) -{ - if (!dax_dev) - return false; - - if (!dax_alive(dax_dev)) - return false; - - return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len); -} -EXPORT_SYMBOL_GPL(dax_supported); - size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { @@ -423,11 +415,6 @@ bool dax_alive(struct dax_device *dax_dev) } EXPORT_SYMBOL_GPL(dax_alive); -static int dax_host_hash(const char *host) -{ - return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; -} - /* * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring * that any fault handlers or operations that might have seen @@ -625,38 +612,6 @@ void put_dax(struct dax_device *dax_dev) EXPORT_SYMBOL_GPL(put_dax); /** - * dax_get_by_host() - temporary lookup mechanism for filesystem-dax - * @host: alternate name for the device registered by a dax driver - */ -struct dax_device *dax_get_by_host(const char *host) -{ - struct dax_device *dax_dev, *found = NULL; - int hash, id; - - if (!host) - return NULL; - - hash = dax_host_hash(host); - - id = dax_read_lock(); - spin_lock(&dax_host_lock); - hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { - if (!dax_alive(dax_dev) - || strcmp(host, dax_dev->host) != 0) - continue; - - if (igrab(&dax_dev->inode)) - found = dax_dev; - break; - } - spin_unlock(&dax_host_lock); - dax_read_unlock(id); - - return found; -} -EXPORT_SYMBOL_GPL(dax_get_by_host); - -/** * inode_dax: convert a public inode into its dax_dev * @inode: An inode with i_cdev pointing to a dax_dev * diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b03eabc1ed7c..2111daaacaba 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -809,14 +809,9 @@ EXPORT_SYMBOL_GPL(dm_table_set_type); int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - int blocksize = *(int *) data, id; - bool rc; + int blocksize = *(int *) data; - id = dax_read_lock(); - rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); - dax_read_unlock(id); - - return rc; + return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); } /* Check devices support synchronous DAX */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 84e9145b1714..a011d09cb0fa 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -654,7 +654,7 @@ static int open_table_device(struct table_device *td, dev_t dev, } td->dm_dev.bdev = bdev; - td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev); return 0; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1e0615b8565e..72de88ff0d30 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -450,11 +450,11 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags |= PFN_MAP; bb_range = pmem->pgmap.range; } else { + addr = devm_memremap(dev, pmem->phys_addr, + pmem->size, ARCH_MEMREMAP_PMEM); if (devm_add_action_or_reset(dev, pmem_release_queue, &pmem->pgmap)) return -ENOMEM; - addr = devm_memremap(dev, pmem->phys_addr, - pmem->size, ARCH_MEMREMAP_PMEM); bb_range.start = res->start; bb_range.end = res->end; } diff --git a/fs/Kconfig b/fs/Kconfig index d8207a1b8c44..c854725db417 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -43,7 +43,7 @@ source "fs/f2fs/Kconfig" source "fs/zonefs/Kconfig" config FS_DAX - bool "Direct Access (DAX) support" + bool "File system based Direct Access (DAX) support" depends on MMU depends on !(ARM || MIPS || SPARC) select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED) @@ -53,8 +53,23 @@ config FS_DAX Direct Access (DAX) can be used on memory-backed block devices. If the block device supports DAX and the filesystem supports DAX, then you can avoid using the pagecache to buffer I/Os. Turning - on this option will compile in support for DAX; you will need to - mount the filesystem using the -o dax option. + on this option will compile in support for DAX. + + For a DAX device to support file system access it needs to have + struct pages. For the nfit based NVDIMMs this can be enabled + using the ndctl utility: + + # ndctl create-namespace --force --reconfig=namespace0.0 \ + --mode=fsdax --map=mem + + See the 'create-namespace' man page for details on the overhead of + --map=mem: + https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace + + For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most + file systems DAX support needs to be manually enabled globally or + per-inode using a mount option as well. See the file documentation in + Documentation/filesystems/dax.rst for details. If you do not have a block device that is capable of using this, or if unsure, say N. Saying Y will increase the size of the kernel diff --git a/fs/erofs/super.c b/fs/erofs/super.c index a8d49e8fc83a..11b88559f8bf 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -546,7 +546,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return err; if (test_opt(ctx, DAX_ALWAYS) && - !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) { + !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) { errorfc(fc, "DAX unsupported by block device. Turning off DAX."); clear_opt(ctx, DAX_ALWAYS); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 987bcf32ed46..d8d580b609ba 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -946,7 +946,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (test_opt(sb, DAX)) { - if (!bdev_dax_supported(sb->s_bdev, blocksize)) { + if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0, + bdev_nr_sectors(sb->s_bdev))) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); clear_opt(sbi->s_mount_opt, DAX); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 136940af00b8..0775950ee84e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4287,7 +4287,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (bdev_dax_supported(sb->s_bdev, blocksize)) + if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0, + bdev_nr_sectors(sb->s_bdev))) set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9a86d3ec2cb6..c4e0cd1c1c8c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -330,6 +330,15 @@ xfs_set_inode_alloc( return xfs_is_inode32(mp) ? maxagi : agcount; } +static bool +xfs_buftarg_is_dax( + struct super_block *sb, + struct xfs_buftarg *bt) +{ + return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0, + bdev_nr_sectors(bt->bt_bdev)); +} + STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -1588,11 +1597,10 @@ xfs_fs_fill_super( xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, - sb->s_blocksize); + datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp); if (mp->m_rtdev_targp) - rtdev_is_dax = bdev_dax_supported( - mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); + rtdev_is_dax = xfs_buftarg_is_dax(sb, + mp->m_rtdev_targp); if (!rtdev_is_dax && !datadev_is_dax) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); diff --git a/include/linux/dax.h b/include/linux/dax.h index b52f084aa643..2619d94c308d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,7 +41,6 @@ struct dax_operations { extern struct attribute_group dax_attribute_group; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *dax_get_by_host(const char *host); struct dax_device *alloc_dax(void *private, const char *host, const struct dax_operations *ops, unsigned long flags); void put_dax(struct dax_device *dax_dev); @@ -58,8 +57,6 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev) { __set_dax_synchronous(dax_dev); } -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); /* * Check if given mapping is supported by the file / underlying device. */ @@ -73,10 +70,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else -static inline struct dax_device *dax_get_by_host(const char *host) -{ - return NULL; -} static inline struct dax_device *alloc_dax(void *private, const char *host, const struct dax_operations *ops, unsigned long flags) { @@ -106,12 +99,6 @@ static inline bool dax_synchronous(struct dax_device *dax_dev) static inline void set_dax_synchronous(struct dax_device *dax_dev) { } -static inline bool dax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t len) -{ - return false; -} static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { @@ -122,22 +109,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) -bool __bdev_dax_supported(struct block_device *bdev, int blocksize); -static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) -{ - return __bdev_dax_supported(bdev, blocksize); -} - -bool __generic_fsdax_supported(struct dax_device *dax_dev, +bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors); -static inline bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors) -{ - return __generic_fsdax_supported(dax_dev, bdev, blocksize, start, - sectors); -} + +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len); static inline void fs_put_dax(struct dax_device *dax_dev) { @@ -153,15 +130,11 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else -static inline bool bdev_dax_supported(struct block_device *bdev, - int blocksize) -{ - return false; -} +#define generic_fsdax_supported NULL -static inline bool generic_fsdax_supported(struct dax_device *dax_dev, +static inline bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors) + sector_t len) { return false; } |