From d8e4bb8103df02a2c509868732dc93fb66110a12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Oct 2015 14:10:47 +0200 Subject: block: cleanup blkdev_ioctl Split out helpers for all non-trivial ioctls to make this function simpler, and also start passing around a pointer version of the argument, as that's what most ioctl handlers actually need. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/ioctl.c | 227 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 127 insertions(+), 100 deletions(-) (limited to 'block') diff --git a/block/ioctl.c b/block/ioctl.c index 8061eba42887..df62b47d2379 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -193,10 +193,20 @@ int blkdev_reread_part(struct block_device *bdev) } EXPORT_SYMBOL(blkdev_reread_part); -static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, - uint64_t len, int secure) +static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, + unsigned long arg, unsigned long flags) { - unsigned long flags = 0; + uint64_t range[2]; + uint64_t start, len; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; if (start & 511) return -EINVAL; @@ -207,14 +217,24 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, if (start + len > (i_size_read(bdev->bd_inode) >> 9)) return -EINVAL; - if (secure) - flags |= BLKDEV_DISCARD_SECURE; return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } -static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, - uint64_t len) +static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, + unsigned long arg) { + uint64_t range[2]; + uint64_t start, len; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; + if (start & 511) return -EINVAL; if (len & 511) @@ -295,89 +315,115 @@ static inline int is_unrecognized_ioctl(int ret) ret == -ENOIOCTLCMD; } -/* - * always keep this in sync with compat_blkdev_ioctl() - */ -int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, - unsigned long arg) +static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg) { - struct gendisk *disk = bdev->bd_disk; - struct backing_dev_info *bdi; - loff_t size; - int ret, n; - unsigned int max_sectors; + int ret; - switch(cmd) { - case BLKFLSBUF: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; - fsync_bdev(bdev); - invalidate_bdev(bdev); - return 0; + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (!is_unrecognized_ioctl(ret)) + return ret; - case BLKROSET: - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (get_user(n, (int __user *)(arg))) - return -EFAULT; - set_device_ro(bdev, n); - return 0; + fsync_bdev(bdev); + invalidate_bdev(bdev); + return 0; +} - case BLKDISCARD: - case BLKSECDISCARD: { - uint64_t range[2]; +static int blkdev_roset(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg) +{ + int ret, n; - if (!(mode & FMODE_WRITE)) - return -EBADF; + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (!is_unrecognized_ioctl(ret)) + return ret; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (get_user(n, (int __user *)arg)) + return -EFAULT; + set_device_ro(bdev, n); + return 0; +} - if (copy_from_user(range, (void __user *)arg, sizeof(range))) - return -EFAULT; +static int blkdev_getgeo(struct block_device *bdev, + struct hd_geometry __user *argp) +{ + struct gendisk *disk = bdev->bd_disk; + struct hd_geometry geo; + int ret; - return blk_ioctl_discard(bdev, range[0], range[1], - cmd == BLKSECDISCARD); - } - case BLKZEROOUT: { - uint64_t range[2]; + if (!argp) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + memset(&geo, 0, sizeof(geo)); + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(bdev, &geo); + if (ret) + return ret; + if (copy_to_user(argp, &geo, sizeof(geo))) + return -EFAULT; + return 0; +} - if (!(mode & FMODE_WRITE)) - return -EBADF; +/* set the logical block size */ +static int blkdev_bszset(struct block_device *bdev, fmode_t mode, + int __user *argp) +{ + int ret, n; - if (copy_from_user(range, (void __user *)arg, sizeof(range))) - return -EFAULT; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!argp) + return -EINVAL; + if (get_user(n, argp)) + return -EFAULT; - return blk_ioctl_zeroout(bdev, range[0], range[1]); + if (!(mode & FMODE_EXCL)) { + bdgrab(bdev); + if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) + return -EBUSY; } - case HDIO_GETGEO: { - struct hd_geometry geo; + ret = set_blocksize(bdev, n); + if (!(mode & FMODE_EXCL)) + blkdev_put(bdev, mode | FMODE_EXCL); + return ret; +} - if (!arg) - return -EINVAL; - if (!disk->fops->getgeo) - return -ENOTTY; - - /* - * We need to set the startsect first, the driver may - * want to override it. - */ - memset(&geo, 0, sizeof(geo)); - geo.start = get_start_sect(bdev); - ret = disk->fops->getgeo(bdev, &geo); - if (ret) - return ret; - if (copy_to_user((struct hd_geometry __user *)arg, &geo, - sizeof(geo))) - return -EFAULT; - return 0; - } +/* + * always keep this in sync with compat_blkdev_ioctl() + */ +int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, + unsigned long arg) +{ + struct backing_dev_info *bdi; + void __user *argp = (void __user *)arg; + loff_t size; + unsigned int max_sectors; + + switch (cmd) { + case BLKFLSBUF: + return blkdev_flushbuf(bdev, mode, cmd, arg); + case BLKROSET: + return blkdev_roset(bdev, mode, cmd, arg); + case BLKDISCARD: + return blk_ioctl_discard(bdev, mode, arg, 0); + case BLKSECDISCARD: + return blk_ioctl_discard(bdev, mode, arg, + BLKDEV_DISCARD_SECURE); + case BLKZEROOUT: + return blk_ioctl_zeroout(bdev, mode, arg); + case HDIO_GETGEO: + return blkdev_getgeo(bdev, argp); case BLKRAGET: case BLKFRAGET: if (!arg) @@ -414,28 +460,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKBSZSET: - /* set the logical block size */ - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (!arg) - return -EINVAL; - if (get_user(n, (int __user *) arg)) - return -EFAULT; - if (!(mode & FMODE_EXCL)) { - bdgrab(bdev); - if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) - return -EBUSY; - } - ret = set_blocksize(bdev, n); - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); - return ret; + return blkdev_bszset(bdev, mode, argp); case BLKPG: - ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); - break; + return blkpg_ioctl(bdev, argp); case BLKRRPART: - ret = blkdev_reread_part(bdev); - break; + return blkdev_reread_part(bdev); case BLKGETSIZE: size = i_size_read(bdev->bd_inode); if ((size >> 9) > ~0UL) @@ -447,11 +476,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKTRACESTOP: case BLKTRACESETUP: case BLKTRACETEARDOWN: - ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg); - break; + return blk_trace_ioctl(bdev, cmd, argp); default: - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + return __blkdev_driver_ioctl(bdev, mode, cmd, arg); } - return ret; } EXPORT_SYMBOL_GPL(blkdev_ioctl); -- cgit v1.2.3 From bbd3e064362e5057cc4799ba2e4d68c7593e490b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Oct 2015 14:10:48 +0200 Subject: block: add an API for Persistent Reservations This commits adds a driver API and ioctls for controlling Persistent Reservations s/genericly/generically/ at the block layer. Persistent Reservations are supported by SCSI and NVMe and allow controlling who gets access to a device in a shared storage setup. Note that we add a pr_ops structure to struct block_device_operations instead of adding the members directly to avoid bloating all instances of devices that will never support Persistent Reservations. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/pr.txt | 119 +++++++++++++++++++++++++++++++++++++++++++++ block/ioctl.c | 103 +++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 2 + include/linux/pr.h | 18 +++++++ include/uapi/linux/pr.h | 48 ++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 Documentation/block/pr.txt create mode 100644 include/linux/pr.h create mode 100644 include/uapi/linux/pr.h (limited to 'block') diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt new file mode 100644 index 000000000000..d3eb1ca65051 --- /dev/null +++ b/Documentation/block/pr.txt @@ -0,0 +1,119 @@ + +Block layer support for Persistent Reservations +=============================================== + +The Linux kernel supports a user space interface for simplified +Persistent Reservations which map to block devices that support +these (like SCSI). Persistent Reservations allow restricting +access to block devices to specific initiators in a shared storage +setup. + +This document gives a general overview of the support ioctl commands. +For a more detailed reference please refer the the SCSI Primary +Commands standard, specifically the section on Reservations and the +"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands. + +All implementations are expected to ensure the reservations survive +a power loss and cover all connections in a multi path environment. +These behaviors are optional in SPC but will be automatically applied +by Linux. + + +The following types of reservations are supported: +-------------------------------------------------- + + - PR_WRITE_EXCLUSIVE + + Only the initiator that owns the reservation can write to the + device. Any initiator can read from the device. + + - PR_EXCLUSIVE_ACCESS + + Only the initiator that owns the reservation can access the + device. + + - PR_WRITE_EXCLUSIVE_REG_ONLY + + Only initiators with a registered key can write to the device, + Any initiator can read from the device. + + - PR_EXCLUSIVE_ACCESS_REG_ONLY + + Only initiators with a registered key can access the device. + + - PR_WRITE_EXCLUSIVE_ALL_REGS + + Only initiators with a registered key can write to the device, + Any initiator can read from the device. + All initiators with a registered key are considered reservation + holders. + Please reference the SPC spec on the meaning of a reservation + holder if you want to use this type. + + - PR_EXCLUSIVE_ACCESS_ALL_REGS + + Only initiators with a registered key can access the device. + All initiators with a registered key are considered reservation + holders. + Please reference the SPC spec on the meaning of a reservation + holder if you want to use this type. + + +The following ioctl are supported: +---------------------------------- + +1. IOC_PR_REGISTER + +This ioctl command registers a new reservation if the new_key argument +is non-null. If no existing reservation exists old_key must be zero, +if an existing reservation should be replaced old_key must contain +the old reservation key. + +If the new_key argument is 0 it unregisters the existing reservation passed +in old_key. + + +2. IOC_PR_RESERVE + +This ioctl command reserves the device and thus restricts access for other +devices based on the type argument. The key argument must be the existing +reservation key for the device as acquired by the IOC_PR_REGISTER, +IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands. + + +3. IOC_PR_RELEASE + +This ioctl command releases the reservation specified by key and flags +and thus removes any access restriction implied by it. + + +4. IOC_PR_PREEMPT + +This ioctl command releases the existing reservation referred to by +old_key and replaces it with a a new reservation of type for the +reservation key new_key. + + +5. IOC_PR_PREEMPT_ABORT + +This ioctl command works like IOC_PR_PREEMPT except that it also aborts +any outstanding command sent over a connection identified by old_key. + +6. IOC_PR_CLEAR + +This ioctl command unregisters both key and any other reservation key +registered with the device and drops any existing reservation. + + +Flags +----- + +All the ioctls have a flag field. Currently only one flag is supported: + + - PR_FL_IGNORE_KEY + + Ignore the existing reservation key. This is commonly supported for + IOC_PR_REGISTER, and some implementation may support the flag for + IOC_PR_RESERVE. + +For all unknown flags the kernel will return -EOPNOTSUPP. diff --git a/block/ioctl.c b/block/ioctl.c index df62b47d2379..0918aed2d847 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) @@ -295,6 +296,96 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, */ EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); +static int blkdev_pr_register(struct block_device *bdev, + struct pr_registration __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_registration reg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ops || !ops->pr_register) + return -EOPNOTSUPP; + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + + if (reg.flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); +} + +static int blkdev_pr_reserve(struct block_device *bdev, + struct pr_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_reservation rsv; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ops || !ops->pr_reserve) + return -EOPNOTSUPP; + if (copy_from_user(&rsv, arg, sizeof(rsv))) + return -EFAULT; + + if (rsv.flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); +} + +static int blkdev_pr_release(struct block_device *bdev, + struct pr_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_reservation rsv; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ops || !ops->pr_release) + return -EOPNOTSUPP; + if (copy_from_user(&rsv, arg, sizeof(rsv))) + return -EFAULT; + + if (rsv.flags) + return -EOPNOTSUPP; + return ops->pr_release(bdev, rsv.key, rsv.type); +} + +static int blkdev_pr_preempt(struct block_device *bdev, + struct pr_preempt __user *arg, bool abort) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_preempt p; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ops || !ops->pr_preempt) + return -EOPNOTSUPP; + if (copy_from_user(&p, arg, sizeof(p))) + return -EFAULT; + + if (p.flags) + return -EOPNOTSUPP; + return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); +} + +static int blkdev_pr_clear(struct block_device *bdev, + struct pr_clear __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_clear c; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ops || !ops->pr_clear) + return -EOPNOTSUPP; + if (copy_from_user(&c, arg, sizeof(c))) + return -EFAULT; + + if (c.flags) + return -EOPNOTSUPP; + return ops->pr_clear(bdev, c.key); +} + /* * Is it an unrecognized ioctl? The correct returns are either * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a @@ -477,6 +568,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKTRACESETUP: case BLKTRACETEARDOWN: return blk_trace_ioctl(bdev, cmd, argp); + case IOC_PR_REGISTER: + return blkdev_pr_register(bdev, argp); + case IOC_PR_RESERVE: + return blkdev_pr_reserve(bdev, argp); + case IOC_PR_RELEASE: + return blkdev_pr_release(bdev, argp); + case IOC_PR_PREEMPT: + return blkdev_pr_preempt(bdev, argp, false); + case IOC_PR_PREEMPT_ABORT: + return blkdev_pr_preempt(bdev, argp, true); + case IOC_PR_CLEAR: + return blkdev_pr_clear(bdev, argp); default: return __blkdev_driver_ioctl(bdev, mode, cmd, arg); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 19c2e947d4d1..fe25da05e823 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -35,6 +35,7 @@ struct sg_io_hdr; struct bsg_job; struct blkcg_gq; struct blk_flush_queue; +struct pr_ops; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -1633,6 +1634,7 @@ struct block_device_operations { /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); struct module *owner; + const struct pr_ops *pr_ops; }; extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, diff --git a/include/linux/pr.h b/include/linux/pr.h new file mode 100644 index 000000000000..65c01c10b335 --- /dev/null +++ b/include/linux/pr.h @@ -0,0 +1,18 @@ +#ifndef LINUX_PR_H +#define LINUX_PR_H + +#include + +struct pr_ops { + int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, + u32 flags); + int (*pr_reserve)(struct block_device *bdev, u64 key, + enum pr_type type, u32 flags); + int (*pr_release)(struct block_device *bdev, u64 key, + enum pr_type type); + int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, + enum pr_type type, bool abort); + int (*pr_clear)(struct block_device *bdev, u64 key); +}; + +#endif /* LINUX_PR_H */ diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h new file mode 100644 index 000000000000..57d7c0f916b6 --- /dev/null +++ b/include/uapi/linux/pr.h @@ -0,0 +1,48 @@ +#ifndef _UAPI_PR_H +#define _UAPI_PR_H + +enum pr_type { + PR_WRITE_EXCLUSIVE = 1, + PR_EXCLUSIVE_ACCESS = 2, + PR_WRITE_EXCLUSIVE_REG_ONLY = 3, + PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, + PR_WRITE_EXCLUSIVE_ALL_REGS = 5, + PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, +}; + +struct pr_reservation { + __u64 key; + __u32 type; + __u32 flags; +}; + +struct pr_registration { + __u64 old_key; + __u64 new_key; + __u32 flags; + __u32 __pad; +}; + +struct pr_preempt { + __u64 old_key; + __u64 new_key; + __u32 type; + __u32 flags; +}; + +struct pr_clear { + __u64 key; + __u32 flags; + __u32 __pad; +}; + +#define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ + +#define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) +#define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation) +#define IOC_PR_RELEASE _IOW('p', 202, struct pr_reservation) +#define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt) +#define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) +#define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) + +#endif /* _UAPI_PR_H */ -- cgit v1.2.3