From 02fbd139759feb1f331cebd858523b5d774082e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 May 2016 11:58:48 +0200 Subject: dax: Remove complete_unwritten argument Fault handlers currently take complete_unwritten argument to convert unwritten extents after PTEs are updated. However no filesystem uses this anymore as the code is racy. Remove the unused argument. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/block_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..b25bb230b28a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1746,7 +1746,7 @@ static const struct address_space_operations def_blk_aops = { */ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return __dax_fault(vma, vmf, blkdev_get_block, NULL); + return __dax_fault(vma, vmf, blkdev_get_block); } static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma, @@ -1758,7 +1758,7 @@ static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma, static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { - return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); + return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block); } static const struct vm_operations_struct blkdev_dax_vm_ops = { -- cgit v1.2.3 From 2af3a8159cd204fc8437ed2f75863f0fb930f0d0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:52 -0600 Subject: block: Add vfs_msg() interface In preparation of moving DAX capability checks to the block layer from filesystem code, add a VFS message interface that aligns with filesystem's message format. For instance, a vfs_msg() message followed by XFS messages in case of a dax mount error may look like: VFS (pmem0p1): error: unaligned partition for dax XFS (pmem0p1): DAX unsupported by block device. Turning off DAX. XFS (pmem0p1): Mounting V5 Filesystem : vfs_msg() is largely based on ext4_msg(). Signed-off-by: Toshi Kani Reviewed-by: Christoph Hellwig Cc: Alexander Viro Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- fs/block_dev.c | 12 ++++++++++++ include/linux/blkdev.h | 11 +++++++++++ 2 files changed, 23 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index b25bb230b28a..91e0ec0233c0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,6 +50,18 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); +void __vfs_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk_ratelimited("%sVFS (%s): %pV\n", prefix, sb->s_id, &vaf); + va_end(args); +} + static void bdev_write_inode(struct block_device *bdev) { struct inode *inode = bdev->bd_inode; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 669e419d6234..78c48ab22f46 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -767,6 +767,17 @@ static inline void rq_flush_dcache_pages(struct request *rq) } #endif +#ifdef CONFIG_PRINTK +#define vfs_msg(sb, level, fmt, ...) \ + __vfs_msg(sb, level, fmt, ##__VA_ARGS__) +#else +#define vfs_msg(sb, level, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __vfs_msg(sb, "", " "); \ +} while (0) +#endif + extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); -- cgit v1.2.3 From 2d96afc8f70ef86c66a0b5d80c24a27d6dd13df3 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:53 -0600 Subject: block: Add bdev_dax_supported() for dax mount checks DAX imposes additional requirements to a device. Add bdev_dax_supported() which performs all the precondition checks necessary for filesystem to mount the device with dax option. Also add a new check to verify if a partition is aligned by 4KB. When a partition is unaligned, any dax read/write access fails, except for metadata update. Signed-off-by: Toshi Kani Reviewed-by: Christoph Hellwig Cc: Alexander Viro Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- fs/block_dev.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 1 + 2 files changed, 46 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 91e0ec0233c0..518cde62c01c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -509,6 +509,51 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) } EXPORT_SYMBOL_GPL(bdev_direct_access); +/** + * bdev_dax_supported() - Check if the device supports dax for filesystem + * @sb: The superblock of the device + * @blocksize: The block size of the device + * + * This is a library function for filesystems to check if the block device + * can be mounted with dax option. + * + * Return: negative errno if unsupported, 0 if supported. + */ +int bdev_dax_supported(struct super_block *sb, int blocksize) +{ + struct blk_dax_ctl dax = { + .sector = 0, + .size = PAGE_SIZE, + }; + int err; + + if (blocksize != PAGE_SIZE) { + vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); + return -EINVAL; + } + + err = bdev_direct_access(sb->s_bdev, &dax); + if (err < 0) { + switch (err) { + case -EOPNOTSUPP: + vfs_msg(sb, KERN_ERR, + "error: device does not support dax"); + break; + case -EINVAL: + vfs_msg(sb, KERN_ERR, + "error: unaligned partition for dax"); + break; + default: + vfs_msg(sb, KERN_ERR, + "error: dax access failed (%d)", err); + } + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bdev_dax_supported); + /* * pseudo-fs */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 78c48ab22f46..71231a55debd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1688,6 +1688,7 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); +extern int bdev_dax_supported(struct super_block *, int); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From a8078b1fc616da6112eb95f0063cd34531d4ccf0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:57 -0600 Subject: block: Update blkdev_dax_capable() for consistency blkdev_dax_capable() is similar to bdev_dax_supported(), but needs to remain as a separate interface for checking dax capability of a raw block device. Rename and relocate blkdev_dax_capable() to keep them maintained consistently, and call bdev_direct_access() for the dax capability check. There is no change in the behavior. Link: https://lkml.org/lkml/2016/5/9/950 Signed-off-by: Toshi Kani Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Jens Axboe Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- block/ioctl.c | 30 ------------------------------ fs/block_dev.c | 39 +++++++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 1 + include/linux/fs.h | 8 -------- 4 files changed, 38 insertions(+), 40 deletions(-) (limited to 'fs/block_dev.c') diff --git a/block/ioctl.c b/block/ioctl.c index 4ff1f92f89ca..7eeda072dc70 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -407,35 +406,6 @@ static inline int is_unrecognized_ioctl(int ret) ret == -ENOIOCTLCMD; } -#ifdef CONFIG_FS_DAX -bool blkdev_dax_capable(struct block_device *bdev) -{ - struct gendisk *disk = bdev->bd_disk; - - if (!disk->fops->direct_access) - return false; - - /* - * If the partition is not aligned on a page boundary, we can't - * do dax I/O to it. - */ - if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) - || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) - return false; - - /* - * If the device has known bad blocks, force all I/O through the - * driver / page cache. - * - * TODO: support finer grained dax error handling - */ - if (disk->bb && disk->bb->count) - return false; - - return true; -} -#endif - static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 518cde62c01c..8477d4501b1e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "internal.h" @@ -554,6 +555,40 @@ int bdev_dax_supported(struct super_block *sb, int blocksize) } EXPORT_SYMBOL_GPL(bdev_dax_supported); +/** + * bdev_dax_capable() - Return if the raw device is capable for dax + * @bdev: The device for raw block device access + */ +bool bdev_dax_capable(struct block_device *bdev) +{ + struct gendisk *disk = bdev->bd_disk; + struct blk_dax_ctl dax = { + .size = PAGE_SIZE, + }; + + if (!IS_ENABLED(CONFIG_FS_DAX)) + return false; + + dax.sector = 0; + if (bdev_direct_access(bdev, &dax) < 0) + return false; + + dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512); + if (bdev_direct_access(bdev, &dax) < 0) + return false; + + /* + * If the device has known bad blocks, force all I/O through the + * driver / page cache. + * + * TODO: support finer grained dax error handling + */ + if (disk->bb && disk->bb->count) + return false; + + return true; +} + /* * pseudo-fs */ @@ -1295,7 +1330,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); - if (!blkdev_dax_capable(bdev)) + if (!bdev_dax_capable(bdev)) bdev->bd_inode->i_flags &= ~S_DAX; } @@ -1332,7 +1367,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); - if (!blkdev_dax_capable(bdev)) + if (!bdev_dax_capable(bdev)) bdev->bd_inode->i_flags &= ~S_DAX; } } else { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 71231a55debd..27cbefe8c985 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1689,6 +1689,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); +extern bool bdev_dax_capable(struct block_device *); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f2813090d1b..17f934fcf564 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2319,14 +2319,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; -- cgit v1.2.3 From 0a70bd43053331d99881211e1d09f32de531432f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 24 Feb 2016 14:02:11 -0800 Subject: dax: enable dax in the presence of known media errors (badblocks) 1/ If a mapping overlaps a bad sector fail the request. 2/ Do not opportunistically report more dax-capable capacity than is requested when errors present. Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams [vishal: fix a conflict with system RAM collision patches] [vishal: add a 'size' parameter to ->direct_access] [vishal: fix a conflict with DAX alignment check patches] Signed-off-by: Vishal Verma --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/block/brd.c | 2 +- drivers/nvdimm/pmem.c | 10 +++++++++- drivers/s390/block/dcssblk.c | 4 ++-- fs/block_dev.c | 13 +------------ include/linux/blkdev.h | 2 +- 6 files changed, 15 insertions(+), 18 deletions(-) (limited to 'fs/block_dev.c') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 0d112b94d91d..ff75d70f7285 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -143,7 +143,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void __pmem **kaddr, pfn_t *pfn) + void __pmem **kaddr, pfn_t *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 51a071e32221..c04bd9bc39fd 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -381,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, pfn_t *pfn) + void __pmem **kaddr, pfn_t *pfn, long size) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f798899338ed..c447579bd853 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -182,14 +182,22 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, pfn_t *pfn) + void __pmem **kaddr, pfn_t *pfn, long size) { struct pmem_device *pmem = bdev->bd_disk->private_data; resource_size_t offset = sector * 512 + pmem->data_offset; + if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) + return -EIO; *kaddr = pmem->virt_addr + offset; *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + /* + * If badblocks are present, limit known good range to the + * requested range. + */ + if (unlikely(pmem->bb.count)) + return size; return pmem->size - pmem->pfn_pad - offset; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 1bce9cf51b1e..6ac33984bc0f 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode); static blk_qc_t dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, pfn_t *pfn); + void __pmem **kaddr, pfn_t *pfn, long size); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -883,7 +883,7 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, pfn_t *pfn) + void __pmem **kaddr, pfn_t *pfn, long size) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; diff --git a/fs/block_dev.c b/fs/block_dev.c index 8477d4501b1e..45839b27972c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include "internal.h" @@ -501,7 +500,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) sector += get_start_sect(bdev); if (sector % (PAGE_SIZE / 512)) return -EINVAL; - avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn); + avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size); if (!avail) return -ERANGE; if (avail > 0 && avail & ~PAGE_MASK) @@ -561,7 +560,6 @@ EXPORT_SYMBOL_GPL(bdev_dax_supported); */ bool bdev_dax_capable(struct block_device *bdev) { - struct gendisk *disk = bdev->bd_disk; struct blk_dax_ctl dax = { .size = PAGE_SIZE, }; @@ -577,15 +575,6 @@ bool bdev_dax_capable(struct block_device *bdev) if (bdev_direct_access(bdev, &dax) < 0) return false; - /* - * If the device has known bad blocks, force all I/O through the - * driver / page cache. - * - * TODO: support finer grained dax error handling - */ - if (disk->bb && disk->bb->count) - return false; - return true; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 27cbefe8c985..cf7c13c2c38d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1668,7 +1668,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - pfn_t *); + pfn_t *, long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ -- cgit v1.2.3