diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 3 | ||||
-rw-r--r-- | block/bdev.c | 69 | ||||
-rw-r--r-- | block/bio-integrity.c | 59 | ||||
-rw-r--r-- | block/bio.c | 142 | ||||
-rw-r--r-- | block/blk-cgroup.c | 34 | ||||
-rw-r--r-- | block/blk-core.c | 7 | ||||
-rw-r--r-- | block/blk-crypto-fallback.c | 36 | ||||
-rw-r--r-- | block/blk-flush.c | 26 | ||||
-rw-r--r-- | block/blk-iocost.c | 5 | ||||
-rw-r--r-- | block/blk-iolatency.c | 35 | ||||
-rw-r--r-- | block/blk-mq.c | 68 | ||||
-rw-r--r-- | block/blk-settings.c | 7 | ||||
-rw-r--r-- | block/blk-sysfs.c | 21 | ||||
-rw-r--r-- | block/blk.h | 10 | ||||
-rw-r--r-- | block/disk-events.c | 23 | ||||
-rw-r--r-- | block/elevator.c | 3 | ||||
-rw-r--r-- | block/fops.c | 150 | ||||
-rw-r--r-- | block/genhd.c | 45 | ||||
-rw-r--r-- | block/ioctl.c | 9 | ||||
-rw-r--r-- | block/mq-deadline.c | 3 | ||||
-rw-r--r-- | block/opal_proto.h | 4 | ||||
-rw-r--r-- | block/partitions/cmdline.c | 12 | ||||
-rw-r--r-- | block/partitions/core.c | 5 | ||||
-rw-r--r-- | block/sed-opal.c | 252 |
24 files changed, 705 insertions, 323 deletions
diff --git a/block/Kconfig b/block/Kconfig index 86122e459fe0..f1364d1c0d93 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -5,6 +5,7 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y + select FS_IOMAP select SBITMAP help Provide block layer support for the kernel. @@ -183,6 +184,8 @@ config BLK_DEBUG_FS_ZONED config BLK_SED_OPAL bool "Logic for interfacing with Opal enabled SEDs" + depends on KEYS + select PSERIES_PLPKS if PPC_PSERIES help Builds Logic for interfacing with Opal enabled controllers. Enabling this option enables users to setup/unlock/lock diff --git a/block/bdev.c b/block/bdev.c index 979e28a46b98..f3b13aa1b7d4 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -206,23 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) } EXPORT_SYMBOL(sync_blockdev_range); -/* - * Write out and wait upon all dirty data associated with this - * device. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_bdev(struct block_device *bdev) -{ - struct super_block *sb = get_super(bdev); - if (sb) { - int res = sync_filesystem(sb); - drop_super(sb); - return res; - } - return sync_blockdev(bdev); -} -EXPORT_SYMBOL(fsync_bdev); - /** * freeze_bdev - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock @@ -248,9 +231,9 @@ int freeze_bdev(struct block_device *bdev) if (!sb) goto sync; if (sb->s_op->freeze_super) - error = sb->s_op->freeze_super(sb); + error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); else - error = freeze_super(sb); + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); deactivate_super(sb); if (error) { @@ -291,9 +274,9 @@ int thaw_bdev(struct block_device *bdev) goto out; if (sb->s_op->thaw_super) - error = sb->s_op->thaw_super(sb); + error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); else - error = thaw_super(sb); + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (error) bdev->bd_fsfreeze_count++; else @@ -960,26 +943,38 @@ out_path_put: } EXPORT_SYMBOL(lookup_bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty) +/** + * bdev_mark_dead - mark a block device as dead + * @bdev: block device to operate on + * @surprise: indicate a surprise removal + * + * Tell the file system that this devices or media is dead. If @surprise is set + * to %true the device or media is already gone, if not we are preparing for an + * orderly removal. + * + * This calls into the file system, which then typicall syncs out all dirty data + * and writes back inodes and then invalidates any cached data in the inodes on + * the file system. In addition we also invalidate the block device mapping. + */ +void bdev_mark_dead(struct block_device *bdev, bool surprise) { - struct super_block *sb = get_super(bdev); - int res = 0; + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) + bdev->bd_holder_ops->mark_dead(bdev, surprise); + else + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_holder_lock); - if (sb) { - /* - * no need to lock the super, get_super holds the - * read mutex so the filesystem cannot go away - * under us (->put_super runs with the write lock - * hold). - */ - shrink_dcache_sb(sb); - res = invalidate_inodes(sb, kill_dirty); - drop_super(sb); - } invalidate_bdev(bdev); - return res; } -EXPORT_SYMBOL(__invalidate_device); +#ifdef CONFIG_DASD_MODULE +/* + * Drivers should not use this directly, but the DASD driver has historically + * had a shutdown to offline mode that doesn't actually remove the gendisk + * that otherwise looks a lot like a safe device removal. + */ +EXPORT_SYMBOL_GPL(bdev_mark_dead); +#endif void sync_bdevs(bool wait) { diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 4533eb491661..ec8ac8cf6e1b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio) int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct bio_integrity_payload *bip = bio_integrity(bio); - if (bip->bip_vcnt >= bip->bip_max_vcnt) { - printk(KERN_ERR "%s: bip_vec full\n", __func__); + if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) > + queue_max_hw_sectors(q)) return 0; - } - if (bip->bip_vcnt && - bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits, - &bip->bip_vec[bip->bip_vcnt - 1], offset)) - return 0; + if (bip->bip_vcnt > 0) { + struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; + bool same_page = false; + + if (bvec_try_merge_hw_page(q, bv, page, len, offset, + &same_page)) { + bip->bip_iter.bi_size += len; + return len; + } + + if (bip->bip_vcnt >= + min(bip->bip_max_vcnt, queue_max_integrity_segments(q))) + return 0; + + /* + * If the queue doesn't support SG gaps and adding this segment + * would create a gap, disallow it. + */ + if (bvec_gap_to_prev(&q->limits, bv, offset)) + return 0; + } bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset); bip->bip_vcnt++; + bip->bip_iter.bi_size += len; return len; } @@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio) unsigned long start, end; unsigned int len, nr_pages; unsigned int bytes, offset, i; - unsigned int intervals; - blk_status_t status; if (!bi) return true; @@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio) !(bi->flags & BLK_INTEGRITY_GENERATE)) return true; } - intervals = bio_integrity_intervals(bi, bio_sectors(bio)); /* Allocate kernel buffer for protection data */ - len = intervals * bi->tuple_size; + len = bio_integrity_bytes(bi, bio_sectors(bio)); buf = kmalloc(len, GFP_NOIO); - status = BLK_STS_RESOURCE; if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); goto err_end_io; @@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio) if (IS_ERR(bip)) { printk(KERN_ERR "could not allocate data integrity bioset\n"); kfree(buf); - status = BLK_STS_RESOURCE; goto err_end_io; } bip->bip_flags |= BIP_BLOCK_INTEGRITY; - bip->bip_iter.bi_size = len; bip_set_seed(bip, bio->bi_iter.bi_sector); if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) @@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio) /* Map it */ offset = offset_in_page(buf); - for (i = 0 ; i < nr_pages ; i++) { - int ret; + for (i = 0; i < nr_pages && len > 0; i++) { bytes = PAGE_SIZE - offset; - if (len <= 0) - break; - if (bytes > len) bytes = len; - ret = bio_integrity_add_page(bio, virt_to_page(buf), - bytes, offset); - - if (ret == 0) { + if (bio_integrity_add_page(bio, virt_to_page(buf), + bytes, offset) < bytes) { printk(KERN_ERR "could not attach integrity payload\n"); - status = BLK_STS_RESOURCE; goto err_end_io; } - if (ret < bytes) - break; - buf += bytes; len -= bytes; offset = 0; @@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio) return true; err_end_io: - bio->bi_status = status; + bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return false; - } EXPORT_SYMBOL(bio_integrity_prep); diff --git a/block/bio.c b/block/bio.c index 8672179213b9..816d412c06e9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask) } EXPORT_SYMBOL(bio_kmalloc); -void zero_fill_bio(struct bio *bio) +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) { struct bio_vec bv; struct bvec_iter iter; - bio_for_each_segment(bv, bio, iter) + __bio_for_each_segment(bv, bio, iter, start) memzero_bvec(&bv); } -EXPORT_SYMBOL(zero_fill_bio); +EXPORT_SYMBOL(zero_fill_bio_iter); /** * bio_truncate - truncate the bio to small size of @new_size @@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len) return false; } -static inline bool page_is_mergeable(const struct bio_vec *bv, - struct page *page, unsigned int len, unsigned int off, - bool *same_page) +static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, + unsigned int len, unsigned int off, bool *same_page) { size_t bv_end = bv->bv_offset + bv->bv_len; phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1; @@ -919,49 +918,15 @@ static inline bool page_is_mergeable(const struct bio_vec *bv, return false; *same_page = ((vec_end_addr & PAGE_MASK) == page_addr); - if (*same_page) - return true; - else if (IS_ENABLED(CONFIG_KMSAN)) - return false; - return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE); -} - -/** - * __bio_try_merge_page - try appending data to an existing bvec. - * @bio: destination bio - * @page: start page to add - * @len: length of the data to add - * @off: offset of the data relative to @page - * @same_page: return if the segment has been merged inside the same page - * - * Try to add the data at @page + @off to the last bvec of @bio. This is a - * useful optimisation for file systems with a block size smaller than the - * page size. - * - * Warn if (@len, @off) crosses pages in case that @same_page is true. - * - * Return %true on success or %false on failure. - */ -static bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off, bool *same_page) -{ - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) - return false; - - if (bio->bi_vcnt > 0) { - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (page_is_mergeable(bv, page, len, off, same_page)) { - if (bio->bi_iter.bi_size > UINT_MAX - len) { - *same_page = false; - return false; - } - bv->bv_len += len; - bio->bi_iter.bi_size += len; - return true; - } + if (!*same_page) { + if (IS_ENABLED(CONFIG_KMSAN)) + return false; + if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE) + return false; } - return false; + + bv->bv_len += len; + return true; } /* @@ -969,11 +934,10 @@ static bool __bio_try_merge_page(struct bio *bio, struct page *page, * size limit. This is not for normal read/write bios, but for passthrough * or Zone Append operations that we can't split. */ -static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, - struct page *page, unsigned len, - unsigned offset, bool *same_page) +bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, + struct page *page, unsigned len, unsigned offset, + bool *same_page) { - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; unsigned long mask = queue_segment_boundary(q); phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset; phys_addr_t addr2 = page_to_phys(page) + offset + len - 1; @@ -982,7 +946,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, return false; if (bv->bv_len + len > queue_max_segment_size(q)) return false; - return __bio_try_merge_page(bio, page, len, offset, same_page); + return bvec_try_merge_page(bv, page, len, offset, same_page); } /** @@ -1002,33 +966,33 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page) { - struct bio_vec *bvec; - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return 0; - if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) + if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors) return 0; if (bio->bi_vcnt > 0) { - if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page)) + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; + + if (bvec_try_merge_hw_page(q, bv, page, len, offset, + same_page)) { + bio->bi_iter.bi_size += len; return len; + } + + if (bio->bi_vcnt >= + min(bio->bi_max_vecs, queue_max_segments(q))) + return 0; /* * If the queue doesn't support SG gaps and adding this segment * would create a gap, disallow it. */ - bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; - if (bvec_gap_to_prev(&q->limits, bvec, offset)) + if (bvec_gap_to_prev(&q->limits, bv, offset)) return 0; } - if (bio_full(bio, len)) - return 0; - - if (bio->bi_vcnt >= queue_max_segments(q)) - return 0; - bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset); bio->bi_vcnt++; bio->bi_iter.bi_size += len; @@ -1129,11 +1093,21 @@ int bio_add_page(struct bio *bio, struct page *page, { bool same_page = false; - if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (bio_full(bio, len)) - return 0; - __bio_add_page(bio, page, len, offset); + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) + return 0; + if (bio->bi_iter.bi_size > UINT_MAX - len) + return 0; + + if (bio->bi_vcnt > 0 && + bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], + page, len, offset, &same_page)) { + bio->bi_iter.bi_size += len; + return len; } + + if (bio->bi_vcnt >= bio->bi_max_vecs) + return 0; + __bio_add_page(bio, page, len, offset); return len; } EXPORT_SYMBOL(bio_add_page); @@ -1207,13 +1181,18 @@ static int bio_iov_add_page(struct bio *bio, struct page *page, { bool same_page = false; - if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { - __bio_add_page(bio, page, len, offset); + if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len)) + return -EIO; + + if (bio->bi_vcnt > 0 && + bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], + page, len, offset, &same_page)) { + bio->bi_iter.bi_size += len; + if (same_page) + bio_release_page(bio, page); return 0; } - - if (same_page) - bio_release_page(bio, page); + __bio_add_page(bio, page, len, offset); return 0; } @@ -1252,7 +1231,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) struct page **pages = (struct page **)bv; ssize_t size, left; unsigned len, i = 0; - size_t offset, trim; + size_t offset; int ret = 0; /* @@ -1281,10 +1260,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); - trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); - iov_iter_revert(iter, trim); + if (bio->bi_bdev) { + size_t trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); + iov_iter_revert(iter, trim); + size -= trim; + } - size -= trim; if (unlikely(!size)) { ret = -EFAULT; goto out; @@ -1337,6 +1318,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { int ret = 0; + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) + return -EIO; + if (iov_iter_is_bvec(iter)) { bio_iov_bvec_set(bio, iter); iov_iter_advance(iter, bio->bi_iter.bi_size); @@ -1490,6 +1474,7 @@ void bio_set_pages_dirty(struct bio *bio) set_page_dirty_lock(bvec->bv_page); } } +EXPORT_SYMBOL_GPL(bio_set_pages_dirty); /* * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. @@ -1549,6 +1534,7 @@ defer: spin_unlock_irqrestore(&bio_dirty_lock, flags); schedule_work(&bio_dirty_work); } +EXPORT_SYMBOL_GPL(bio_check_pages_dirty); static inline bool bio_remaining_done(struct bio *bio) { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fc49be622e05..4a42ea2972ad 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); + spin_lock_irq(&q->queue_lock); list_del_init(&blkg->q_node); + spin_unlock_irq(&q->queue_lock); mutex_unlock(&q->blkcg_mutex); blk_put_queue(q); @@ -1509,7 +1511,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) retry: spin_lock_irq(&q->queue_lock); - /* blkg_list is pushed at the head, reverse walk to allocate parents first */ + /* blkg_list is pushed at the head, reverse walk to initialize parents first */ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd; @@ -1547,21 +1549,20 @@ retry: goto enomem; } - blkg->pd[pol->plid] = pd; + spin_lock(&blkg->blkcg->lock); + pd->blkg = blkg; pd->plid = pol->plid; - pd->online = false; - } + blkg->pd[pol->plid] = pd; - /* all allocated, init in the same order */ - if (pol->pd_init_fn) - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) - pol->pd_init_fn(blkg->pd[pol->plid]); + if (pol->pd_init_fn) + pol->pd_init_fn(pd); - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { if (pol->pd_online_fn) - pol->pd_online_fn(blkg->pd[pol->plid]); - blkg->pd[pol->plid]->online = true; + pol->pd_online_fn(pd); + pd->online = true; + + spin_unlock(&blkg->blkcg->lock); } __set_bit(pol->plid, q->blkcg_pols); @@ -1578,14 +1579,19 @@ out: return ret; enomem: - /* alloc failed, nothing's initialized yet, free everything */ + /* alloc failed, take down everything */ spin_lock_irq(&q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; + struct blkg_policy_data *pd; spin_lock(&blkcg->lock); - if (blkg->pd[pol->plid]) { - pol->pd_free_fn(blkg->pd[pol->plid]); + pd = blkg->pd[pol->plid]; + if (pd) { + if (pd->online && pol->pd_offline_fn) + pol->pd_offline_fn(pd); + pd->online = false; + pol->pd_free_fn(pd); blkg->pd[pol->plid] = NULL; } spin_unlock(&blkcg->lock); diff --git a/block/blk-core.c b/block/blk-core.c index 90de50082146..9d51e9894ece 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -208,6 +208,7 @@ const char *blk_status_to_str(blk_status_t status) return "<null>"; return blk_errors[idx].name; } +EXPORT_SYMBOL_GPL(blk_status_to_str); /** * blk_sync_queue - cancel any pending callbacks on a queue @@ -722,14 +723,9 @@ void submit_bio_noacct(struct bio *bio) struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); blk_status_t status = BLK_STS_IOERR; - struct blk_plug *plug; might_sleep(); - plug = blk_mq_plug(bio); - if (plug && plug->nowait) - bio->bi_opf |= REQ_NOWAIT; - /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue does not support NOWAIT. @@ -1059,7 +1055,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) plug->rq_count = 0; plug->multiple_queues = false; plug->has_elevator = false; - plug->nowait = false; INIT_LIST_HEAD(&plug->cb_list); /* diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index ad9844c5b40c..e6468eab2681 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot { struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; } *blk_crypto_keyslots; -static struct blk_crypto_profile blk_crypto_fallback_profile; +static struct blk_crypto_profile *blk_crypto_fallback_profile; static struct workqueue_struct *blk_crypto_wq; static mempool_t *blk_crypto_bounce_page_pool; static struct bio_set crypto_bio_split; @@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for * this bio's algorithm and key. */ - blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, + blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile, bc->bc_key, &slot); if (blk_st != BLK_STS_OK) { src_bio->bi_status = blk_st; @@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for * this bio's algorithm and key. */ - blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, + blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile, bc->bc_key, &slot); if (blk_st != BLK_STS_OK) { bio->bi_status = blk_st; @@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) return false; } - if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile, + if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile, &bc->bc_key->crypto_cfg)) { bio->bi_status = BLK_STS_NOTSUPP; return false; @@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) { - return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key); + return __blk_crypto_evict_key(blk_crypto_fallback_profile, key); } static bool blk_crypto_fallback_inited; @@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void) { int i; int err; - struct blk_crypto_profile *profile = &blk_crypto_fallback_profile; if (blk_crypto_fallback_inited) return 0; @@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void) if (err) goto out; - err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots); - if (err) + /* Dynamic allocation is needed because of lockdep_register_key(). */ + blk_crypto_fallback_profile = + kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL); + if (!blk_crypto_fallback_profile) { + err = -ENOMEM; goto fail_free_bioset; + } + + err = blk_crypto_profile_init(blk_crypto_fallback_profile, + blk_crypto_num_keyslots); + if (err) + goto fail_free_profile; err = -ENOMEM; - profile->ll_ops = blk_crypto_fallback_ll_ops; - profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; + blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops; + blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; /* All blk-crypto modes have a crypto API fallback. */ for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) - profile->modes_supported[i] = 0xFFFFFFFF; - profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; + blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF; + blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; blk_crypto_wq = alloc_workqueue("blk_crypto_wq", WQ_UNBOUND | WQ_HIGHPRI | @@ -597,7 +605,9 @@ fail_free_keyslots: fail_free_wq: destroy_workqueue(blk_crypto_wq); fail_destroy_profile: - blk_crypto_profile_destroy(profile); + blk_crypto_profile_destroy(blk_crypto_fallback_profile); +fail_free_profile: + kfree(blk_crypto_fallback_profile); fail_free_bioset: bioset_exit(&crypto_bio_split); out: diff --git a/block/blk-flush.c b/block/blk-flush.c index 8220517c2d67..e73dc22d05c1 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -183,13 +183,13 @@ static void blk_flush_complete_seq(struct request *rq, /* queue for flush */ if (list_empty(pending)) fq->flush_pending_since = jiffies; - list_move_tail(&rq->flush.list, pending); + list_move_tail(&rq->queuelist, pending); break; case REQ_FSEQ_DATA: - list_move_tail(&rq->flush.list, &fq->flush_data_in_flight); + fq->flush_data_in_flight++; spin_lock(&q->requeue_lock); - list_add(&rq->queuelist, &q->requeue_list); + list_move(&rq->queuelist, &q->requeue_list); spin_unlock(&q->requeue_lock); blk_mq_kick_requeue_list(q); break; @@ -201,7 +201,7 @@ static void blk_flush_complete_seq(struct request *rq, * flush data request completion path. Restore @rq for * normal completion and end it. */ - list_del_init(&rq->flush.list); + list_del_init(&rq->queuelist); blk_flush_restore_request(rq); blk_mq_end_request(rq, error); break; @@ -257,7 +257,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq, fq->flush_running_idx ^= 1; /* and push the waiting requests to the next stage */ - list_for_each_entry_safe(rq, n, running, flush.list) { + list_for_each_entry_safe(rq, n, running, queuelist) { unsigned int seq = blk_flush_cur_seq(rq); BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); @@ -291,7 +291,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, { struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; struct request *first_rq = - list_first_entry(pending, struct request, flush.list); + list_first_entry(pending, struct request, queuelist); struct request *flush_rq = fq->flush_rq; /* C1 described at the top of this file */ @@ -299,7 +299,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, return; /* C2 and C3 */ - if (!list_empty(&fq->flush_data_in_flight) && + if (fq->flush_data_in_flight && time_before(jiffies, fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return; @@ -374,6 +374,12 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, * the comment in flush_end_io(). */ spin_lock_irqsave(&fq->mq_flush_lock, flags); + fq->flush_data_in_flight--; + /* + * May have been corrupted by rq->rq_next reuse, we need to + * re-initialize rq->queuelist before reusing it here. + */ + INIT_LIST_HEAD(&rq->queuelist); blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); @@ -384,7 +390,6 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, static void blk_rq_init_flush(struct request *rq) { rq->flush.seq = 0; - INIT_LIST_HEAD(&rq->flush.list); rq->rq_flags |= RQF_FLUSH_SEQ; rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = mq_flush_data_end_io; @@ -443,9 +448,9 @@ bool blk_insert_flush(struct request *rq) * the post flush, and then just pass the command on. */ blk_rq_init_flush(rq); - rq->flush.seq |= REQ_FSEQ_POSTFLUSH; + rq->flush.seq |= REQ_FSEQ_PREFLUSH; spin_lock_irq(&fq->mq_flush_lock); - list_move_tail(&rq->flush.list, &fq->flush_data_in_flight); + fq->flush_data_in_flight++; spin_unlock_irq(&fq->mq_flush_lock); return false; default: @@ -496,7 +501,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, INIT_LIST_HEAD(&fq->flush_queue[0]); INIT_LIST_HEAD(&fq->flush_queue[1]); - INIT_LIST_HEAD(&fq->flush_data_in_flight); return fq; diff --git a/block/blk-iocost.c b/block/blk-iocost.c index dd64e2066f01..089fcb9cfce3 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, if (qos[QOS_MIN] > qos[QOS_MAX]) goto einval; - if (enable) { + if (enable && !ioc->enabled) { blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; - } else { + } else if (!enable && ioc->enabled) { + blk_stat_disable_accounting(disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index fd5fec989e39..c1a6aba1d59e 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -824,29 +824,6 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg) } } -static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx) -{ - static DEFINE_MUTEX(init_mutex); - int ret; - - ret = blkg_conf_open_bdev(ctx); - if (ret) - return ret; - - /* - * blk_iolatency_init() may fail after rq_qos_add() succeeds which can - * confuse iolat_rq_qos() test. Make the test and init atomic. - */ - mutex_lock(&init_mutex); - - if (!iolat_rq_qos(ctx->bdev->bd_queue)) - ret = blk_iolatency_init(ctx->bdev->bd_disk); - - mutex_unlock(&init_mutex); - - return ret; -} - static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -861,7 +838,17 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg_conf_init(&ctx, buf); - ret = blk_iolatency_try_init(&ctx); + ret = blkg_conf_open_bdev(&ctx); + if (ret) + goto out; + + /* + * blk_iolatency_init() may fail after rq_qos_add() succeeds which can + * confuse iolat_rq_qos() test. Make the test and init atomic. + */ + lockdep_assert_held(&ctx.bdev->bd_queue->rq_qos_mutex); + if (!iolat_rq_qos(ctx.bdev->bd_queue)) + ret = blk_iolatency_init(ctx.bdev->bd_disk); if (ret) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index b04ff6f56926..ec922c6bccbe 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -43,6 +43,7 @@ #include "blk-ioprio.h" static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); +static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); static void blk_mq_insert_request(struct request *rq, blk_insert_t flags); static void blk_mq_request_bypass_insert(struct request *rq, @@ -681,6 +682,21 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void blk_mq_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if (rq->rq_flags & RQF_USE_SCHED) { + q->elevator->type->ops.finish_request(rq); + /* + * For postflush request that may need to be + * completed twice, we should clear this flag + * to avoid double finish_request() on the rq. + */ + rq->rq_flags &= ~RQF_USE_SCHED; + } +} + static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; @@ -707,9 +723,7 @@ void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - if ((rq->rq_flags & RQF_USE_SCHED) && - q->elevator->type->ops.finish_request) - q->elevator->type->ops.finish_request(rq); + blk_mq_finish_request(rq); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); @@ -1020,6 +1034,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (blk_mq_need_time_stamp(rq)) __blk_mq_end_request_acct(rq, ktime_get_ns()); + blk_mq_finish_request(rq); + if (rq->end_io) { rq_qos_done(rq->q, rq); if (rq->end_io(rq, error) == RQ_END_IO_FREE) @@ -1074,6 +1090,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + blk_mq_finish_request(rq); + rq_qos_done(rq->q, rq); /* @@ -1157,15 +1175,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) static void blk_mq_complete_send_ipi(struct request *rq) { - struct llist_head *list; unsigned int cpu; cpu = rq->mq_ctx->cpu; - list = &per_cpu(blk_cpu_done, cpu); - if (llist_add(&rq->ipi_list, list)) { - INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); - smp_call_function_single_async(cpu, &rq->csd); - } + if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu))) + smp_call_function_single_async(cpu, &per_cpu(blk_cpu_csd, cpu)); } static void blk_mq_raise_softirq(struct request *rq) @@ -1326,7 +1340,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head) } blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); @@ -2225,6 +2239,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) */ WARN_ON_ONCE(!async && in_interrupt()); + might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING); + /* * When queue is quiesced, we may be switching io scheduler, or * updating nr_hw_queues, or other things, and we can't run queue @@ -2240,8 +2256,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) if (!need_run) return; - if (async || (hctx->flags & BLK_MQ_F_BLOCKING) || - !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { + if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { blk_mq_delay_run_hw_queue(hctx, 0); return; } @@ -2376,7 +2391,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) { clear_bit(BLK_MQ_S_STOPPED, &hctx->state); - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); } EXPORT_SYMBOL(blk_mq_start_hw_queue); @@ -2406,7 +2421,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) unsigned long i; queue_for_each_hw_ctx(q, hctx, i) - blk_mq_start_stopped_hw_queue(hctx, async); + blk_mq_start_stopped_hw_queue(hctx, async || + (hctx->flags & BLK_MQ_F_BLOCKING)); } EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); @@ -2464,6 +2480,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, list_for_each_entry(rq, list, queuelist) { BUG_ON(rq->mq_ctx != ctx); trace_block_rq_insert(rq); + if (rq->cmd_flags & REQ_NOWAIT) + run_queue_async = true; } spin_lock(&ctx->lock); @@ -2624,7 +2642,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) { blk_mq_insert_request(rq, 0); - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT); return; } @@ -4385,9 +4403,13 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, int new_nr_hw_queues) { struct blk_mq_tags **new_tags; + int i; - if (set->nr_hw_queues >= new_nr_hw_queues) + if (set->nr_hw_queues >= new_nr_hw_queues) { + for (i = new_nr_hw_queues; i < set->nr_hw_queues; i++) + __blk_mq_free_map_and_rqs(set, i); goto done; + } new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); @@ -4399,6 +4421,16 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, sizeof(*set->tags)); kfree(set->tags); set->tags = new_tags; + + for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { + if (!__blk_mq_alloc_map_and_rqs(set, i)) { + while (--i >= set->nr_hw_queues) + __blk_mq_free_map_and_rqs(set, i); + return -ENOMEM; + } + cond_resched(); + } + done: set->nr_hw_queues = new_nr_hw_queues; return 0; @@ -4732,7 +4764,6 @@ fallback: __blk_mq_free_map_and_rqs(set, i); set->nr_hw_queues = prev_nr_hw_queues; - blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); goto fallback; } blk_mq_map_swqueue(q); @@ -4836,6 +4867,9 @@ static int __init blk_mq_init(void) for_each_possible_cpu(i) init_llist_head(&per_cpu(blk_cpu_done, i)); + for_each_possible_cpu(i) + INIT_CSD(&per_cpu(blk_cpu_csd, i), + __blk_mq_complete_request_remote, NULL); open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, diff --git a/block/blk-settings.c b/block/blk-settings.c index 4dd59059b788..0046b447268f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -830,10 +830,13 @@ EXPORT_SYMBOL(blk_set_queue_depth); */ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) { - if (wc) + if (wc) { + blk_queue_flag_set(QUEUE_FLAG_HW_WC, q); blk_queue_flag_set(QUEUE_FLAG_WC, q); - else + } else { + blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q); blk_queue_flag_clear(QUEUE_FLAG_WC, q); + } if (fua) blk_queue_flag_set(QUEUE_FLAG_FUA, q); else diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index afc797fb0dfc..63e481262336 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -449,21 +449,16 @@ static ssize_t queue_wc_show(struct request_queue *q, char *page) static ssize_t queue_wc_store(struct request_queue *q, const char *page, size_t count) { - int set = -1; - - if (!strncmp(page, "write back", 10)) - set = 1; - else if (!strncmp(page, "write through", 13) || - !strncmp(page, "none", 4)) - set = 0; - - if (set == -1) - return -EINVAL; - - if (set) + if (!strncmp(page, "write back", 10)) { + if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags)) + return -EINVAL; blk_queue_flag_set(QUEUE_FLAG_WC, q); - else + } else if (!strncmp(page, "write through", 13) || + !strncmp(page, "none", 4)) { blk_queue_flag_clear(QUEUE_FLAG_WC, q); + } else { + return -EINVAL; + } return count; } diff --git a/block/blk.h b/block/blk.h index 608c5dcc516b..08a358bc0919 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,15 +15,14 @@ struct elevator_type; extern struct dentry *blk_debugfs_root; struct blk_flush_queue { + spinlock_t mq_flush_lock; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; blk_status_t rq_status; unsigned long flush_pending_since; struct list_head flush_queue[2]; - struct list_head flush_data_in_flight; + unsigned long flush_data_in_flight; struct request *flush_rq; - - spinlock_t mq_flush_lock; }; bool is_flush_rq(struct request *req); @@ -76,6 +75,10 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); +bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, + struct page *page, unsigned len, unsigned offset, + bool *same_page); + static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { @@ -251,7 +254,6 @@ static inline void bio_integrity_free(struct bio *bio) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); -const char *blk_status_to_str(blk_status_t status); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); diff --git a/block/disk-events.c b/block/disk-events.c index 0cfac464e6d1..422db8292d09 100644 --- a/block/disk-events.c +++ b/block/disk-events.c @@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk) if (!(events & DISK_EVENT_MEDIA_CHANGE)) return false; - if (__invalidate_device(disk->part0, true)) - pr_warn("VFS: busy inodes on changed media %s\n", - disk->disk_name); + bdev_mark_dead(disk->part0, true); set_bit(GD_NEED_PART_SCAN, &disk->state); return true; } @@ -294,25 +292,16 @@ EXPORT_SYMBOL(disk_check_media_change); * @disk: the disk which will raise the event * @events: the events to raise * - * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present, - * attempt to free all dentries and inodes and invalidates all block + * Should be called when the media changes for @disk. Generates a uevent + * and attempts to free all dentries and inodes and invalidates all block * device page cache entries in that case. - * - * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not. */ -bool disk_force_media_change(struct gendisk *disk, unsigned int events) +void disk_force_media_change(struct gendisk *disk) { - disk_event_uevent(disk, events); - - if (!(events & DISK_EVENT_MEDIA_CHANGE)) - return false; - + disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE); inc_diskseq(disk); - if (__invalidate_device(disk->part0, true)) - pr_warn("VFS: busy inodes on changed media %s\n", - disk->disk_name); + bdev_mark_dead(disk->part0, true); set_bit(GD_NEED_PART_SCAN, &disk->state); - return true; } EXPORT_SYMBOL_GPL(disk_force_media_change); diff --git a/block/elevator.c b/block/elevator.c index 8400e303fbcb..5ff093cb3cf8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q) int elv_register(struct elevator_type *e) { + /* finish request is mandatory */ + if (WARN_ON_ONCE(!e->ops.finish_request)) + return -EINVAL; /* insert_requests and dispatch_request are mandatory */ if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request)) return -EINVAL; diff --git a/block/fops.c b/block/fops.c index a286bf3325c5..a24a624d3bf7 100644 --- a/block/fops.c +++ b/block/fops.c @@ -15,6 +15,7 @@ #include <linux/falloc.h> #include <linux/suspend.h> #include <linux/fs.h> +#include <linux/iomap.h> #include <linux/module.h> #include "blk.h" @@ -23,15 +24,6 @@ static inline struct inode *bdev_file_inode(struct file *file) return file->f_mapping->host; } -static int blkdev_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) -{ - bh->b_bdev = I_BDEV(inode); - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; -} - static blk_opf_t dio_bio_write_op(struct kiocb *iocb) { blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; @@ -358,13 +350,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, task_io_account_write(bio->bi_iter.bi_size); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + if (iocb->ki_flags & IOCB_HIPRI) { - bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; + bio->bi_opf |= REQ_POLLED; submit_bio(bio); WRITE_ONCE(iocb->private, bio); } else { - if (iocb->ki_flags & IOCB_NOWAIT) - bio->bi_opf |= REQ_NOWAIT; submit_bio(bio); } return -EIOCBQUEUED; @@ -386,6 +379,37 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); } +static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, struct iomap *srcmap) +{ + struct block_device *bdev = I_BDEV(inode); + loff_t isize = i_size_read(inode); + + iomap->bdev = bdev; + iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev)); + if (iomap->offset >= isize) + return -EIO; + iomap->type = IOMAP_MAPPED; + iomap->addr = iomap->offset; + iomap->length = isize - iomap->offset; + iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */ + return 0; +} + +static const struct iomap_ops blkdev_iomap_ops = { + .iomap_begin = blkdev_iomap_begin, +}; + +#ifdef CONFIG_BUFFER_HEAD +static int blkdev_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + bh->b_bdev = I_BDEV(inode); + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); @@ -428,10 +452,58 @@ const struct address_space_operations def_blk_aops = { .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, - .direct_IO = blkdev_direct_IO, .migrate_folio = buffer_migrate_folio_norefs, .is_dirty_writeback = buffer_check_dirty_writeback, }; +#else /* CONFIG_BUFFER_HEAD */ +static int blkdev_read_folio(struct file *file, struct folio *folio) +{ + return iomap_read_folio(folio, &blkdev_iomap_ops); +} + +static void blkdev_readahead(struct readahead_control *rac) +{ + iomap_readahead(rac, &blkdev_iomap_ops); +} + +static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset) +{ + loff_t isize = i_size_read(inode); + + if (WARN_ON_ONCE(offset >= isize)) + return -EIO; + if (offset >= wpc->iomap.offset && + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + return blkdev_iomap_begin(inode, offset, isize - offset, + IOMAP_WRITE, &wpc->iomap, NULL); +} + +static const struct iomap_writeback_ops blkdev_writeback_ops = { + .map_blocks = blkdev_map_blocks, +}; + +static int blkdev_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops); +} + +const struct address_space_operations def_blk_aops = { + .dirty_folio = filemap_dirty_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, + .read_folio = blkdev_read_folio, + .readahead = blkdev_readahead, + .writepages = blkdev_writepages, + .is_partially_uptodate = iomap_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, + .migrate_folio = filemap_migrate_folio, +}; +#endif /* CONFIG_BUFFER_HEAD */ /* * for a block special file file_inode(file)->i_size is zero @@ -505,7 +577,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) * during an unstable branch. */ filp->f_flags |= O_LARGEFILE; - filp->f_mode |= FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; /* * Use the file private data to store the holder for exclusive openes. @@ -533,6 +605,35 @@ static int blkdev_release(struct inode *inode, struct file *filp) return 0; } +static ssize_t +blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from) +{ + size_t count = iov_iter_count(from); + ssize_t written; + + written = kiocb_invalidate_pages(iocb, count); + if (written) { + if (written == -EBUSY) + return 0; + return written; + } + + written = blkdev_direct_IO(iocb, from); + if (written > 0) { + kiocb_invalidate_post_direct_write(iocb, count); + iocb->ki_pos += written; + count -= written; + } + if (written != -EIOCBQUEUED) + iov_iter_revert(from, count - iov_iter_count(from)); + return written; +} + +static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from) +{ + return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops); +} + /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. @@ -542,7 +643,8 @@ static int blkdev_release(struct inode *inode, struct file *filp) */ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); + struct file *file = iocb->ki_filp; + struct block_device *bdev = I_BDEV(file->f_mapping->host); struct inode *bd_inode = bdev->bd_inode; loff_t size = bdev_nr_bytes(bdev); size_t shorted = 0; @@ -569,7 +671,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) iov_iter_truncate(from, size); } - ret = __generic_file_write_iter(iocb, from); + ret = file_remove_privs(file); + if (ret) + return ret; + + ret = file_update_time(file); + if (ret) + return ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = blkdev_direct_write(iocb, from); + if (ret >= 0 && iov_iter_count(from)) + ret = direct_write_fallback(iocb, from, ret, + blkdev_buffered_write(iocb, from)); + } else { + ret = blkdev_buffered_write(iocb, from); + } + if (ret > 0) ret = generic_write_sync(iocb, ret); iov_iter_reexpand(from, iov_iter_count(from) + shorted); diff --git a/block/genhd.c b/block/genhd.c index 3d287b32d50d..cc32a0c704eb 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -554,7 +554,7 @@ out_exit_elevator: } EXPORT_SYMBOL(device_add_disk); -static void blk_report_disk_dead(struct gendisk *disk) +static void blk_report_disk_dead(struct gendisk *disk, bool surprise) { struct block_device *bdev; unsigned long idx; @@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk) continue; rcu_read_unlock(); - mutex_lock(&bdev->bd_holder_lock); - if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) - bdev->bd_holder_ops->mark_dead(bdev); - mutex_unlock(&bdev->bd_holder_lock); + bdev_mark_dead(bdev, surprise); put_device(&bdev->bd_device); rcu_read_lock(); @@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk) rcu_read_unlock(); } -/** - * blk_mark_disk_dead - mark a disk as dead - * @disk: disk to mark as dead - * - * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O - * to this disk. - */ -void blk_mark_disk_dead(struct gendisk *disk) +static void __blk_mark_disk_dead(struct gendisk *disk) { /* * Fail any new I/O. @@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(disk->queue); +} - blk_report_disk_dead(disk); +/** + * blk_mark_disk_dead - mark a disk as dead + * @disk: disk to mark as dead + * + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O + * to this disk. + */ +void blk_mark_disk_dead(struct gendisk *disk) +{ + __blk_mark_disk_dead(disk); + blk_report_disk_dead(disk, true); } EXPORT_SYMBOL_GPL(blk_mark_disk_dead); @@ -641,18 +642,20 @@ void del_gendisk(struct gendisk *disk) disk_del_events(disk); /* - * Prevent new openers by unlinked the bdev inode, and write out - * dirty data before marking the disk dead and stopping all I/O. + * Prevent new openers by unlinked the bdev inode. */ mutex_lock(&disk->open_mutex); - xa_for_each(&disk->part_tbl, idx, part) { + xa_for_each(&disk->part_tbl, idx, part) remove_inode_hash(part->bd_inode); - fsync_bdev(part); - __invalidate_device(part, true); - } mutex_unlock(&disk->open_mutex); - blk_mark_disk_dead(disk); + /* + * Tell the file system to write back all dirty data and shut down if + * it hasn't been notified earlier. + */ + if (!test_bit(GD_DEAD, &disk->state)) + blk_report_disk_dead(disk, false); + __blk_mark_disk_dead(disk); /* * Drop all partitions now that the disk is marked dead. diff --git a/block/ioctl.c b/block/ioctl.c index 3be11941fb2d..648670ddb164 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, { if (!capable(CAP_SYS_ADMIN)) return -EACCES; - fsync_bdev(bdev); + + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) + bdev->bd_holder_ops->sync(bdev); + else + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_holder_lock); + invalidate_bdev(bdev); return 0; } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 02a916ba62ee..f958e79277b8 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -646,8 +646,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; + unsigned int shift = tags->bitmap_tags.sb.shift; - dd->async_depth = max(1UL, 3 * q->nr_requests / 4); + dd->async_depth = max(1U, 3 * (1U << shift) / 4); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); } diff --git a/block/opal_proto.h b/block/opal_proto.h index a4e56845dd82..dec7ce3a3edb 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -225,6 +225,10 @@ enum opal_parameter { OPAL_SUM_SET_LIST = 0x060000, }; +enum opal_revertlsp { + OPAL_KEEP_GLOBAL_RANGE_KEY = 0x060000, +}; + /* Packets derived from: * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 * Secion: 3.2.3 ComPackets, Packets & Subpackets diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c index 1af610f0ba8c..c03bc105e575 100644 --- a/block/partitions/cmdline.c +++ b/block/partitions/cmdline.c @@ -81,8 +81,7 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) length = min_t(int, next - partdef, sizeof(new_subpart->name) - 1); - strncpy(new_subpart->name, partdef, length); - new_subpart->name[length] = '\0'; + strscpy(new_subpart->name, partdef, length); partdef = ++next; } else @@ -140,8 +139,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef) } length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1); - strncpy(newparts->name, bdevdef, length); - newparts->name[length] = '\0'; + strscpy(newparts->name, bdevdef, length); newparts->nr_subparts = 0; next_subpart = &newparts->subpart; @@ -153,8 +151,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef) length = (!next) ? (sizeof(buf) - 1) : min_t(int, next - bdevdef, sizeof(buf) - 1); - strncpy(buf, bdevdef, length); - buf[length] = '\0'; + strscpy(buf, bdevdef, length); ret = parse_subpart(next_subpart, buf); if (ret) @@ -267,8 +264,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart, label_min = min_t(int, sizeof(info->volname) - 1, sizeof(subpart->name)); - strncpy(info->volname, subpart->name, label_min); - info->volname[label_min] = '\0'; + strscpy(info->volname, subpart->name, label_min); snprintf(tmp, sizeof(tmp), "(%s)", info->volname); strlcat(state->pp_buf, tmp, PAGE_SIZE); diff --git a/block/partitions/core.c b/block/partitions/core.c index 13a7341299a9..e137a87f4db0 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part) * looked up any more even when openers still hold references. */ remove_inode_hash(part->bd_inode); - - fsync_bdev(part); - __invalidate_device(part, true); - + bdev_mark_dead(part, false); drop_partition(part); } diff --git a/block/sed-opal.c b/block/sed-opal.c index c18339446ef3..6d7f25d1711b 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -20,6 +20,9 @@ #include <linux/sed-opal.h> #include <linux/string.h> #include <linux/kdev_t.h> +#include <linux/key.h> +#include <linux/key-type.h> +#include <keys/user-type.h> #include "opal_proto.h" @@ -29,6 +32,8 @@ /* Number of bytes needed by cmd_finalize. */ #define CMD_FINALIZE_BYTES_NEEDED 7 +static struct key *sed_opal_keyring; + struct opal_step { int (*fn)(struct opal_dev *dev, void *data); void *data; @@ -269,6 +274,101 @@ static void print_buffer(const u8 *ptr, u32 length) #endif } +/* + * Allocate/update a SED Opal key and add it to the SED Opal keyring. + */ +static int update_sed_opal_key(const char *desc, u_char *key_data, int keylen) +{ + key_ref_t kr; + + if (!sed_opal_keyring) + return -ENOKEY; + + kr = key_create_or_update(make_key_ref(sed_opal_keyring, true), "user", + desc, (const void *)key_data, keylen, + KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_WRITE, + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(kr)) { + pr_err("Error adding SED key (%ld)\n", PTR_ERR(kr)); + return PTR_ERR(kr); + } + + return 0; +} + +/* + * Read a SED Opal key from the SED Opal keyring. + */ +static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) +{ + int ret; + key_ref_t kref; + struct key *key; + + if (!sed_opal_keyring) + return -ENOKEY; + + kref = keyring_search(make_key_ref(sed_opal_keyring, true), + &key_type_user, key_name, true); + + if (IS_ERR(kref)) + ret = PTR_ERR(kref); + + key = key_ref_to_ptr(kref); + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) { + if (buflen > key->datalen) + buflen = key->datalen; + + ret = key->type->read(key, (char *)buffer, buflen); + } + up_read(&key->sem); + + key_ref_put(kref); + + return ret; +} + +static int opal_get_key(struct opal_dev *dev, struct opal_key *key) +{ + int ret = 0; + + switch (key->key_type) { + case OPAL_INCLUDED: + /* the key is ready to use */ + break; + case OPAL_KEYRING: + /* the key is in the keyring */ + ret = read_sed_opal_key(OPAL_AUTH_KEY, key->key, OPAL_KEY_MAX); + if (ret > 0) { + if (ret > U8_MAX) { + ret = -ENOSPC; + goto error; + } + key->key_len = ret; + key->key_type = OPAL_INCLUDED; + } + break; + default: + ret = -EINVAL; + break; + } + if (ret < 0) + goto error; + + /* must have a PEK by now or it's an error */ + if (key->key_type != OPAL_INCLUDED || key->key_len == 0) { + ret = -EINVAL; + goto error; + } + return 0; +error: + pr_debug("Error getting password: %d\n", ret); + return ret; +} + static bool check_tper(const void *data) { const struct d0_tper_features *tper = data; @@ -463,8 +563,11 @@ out_error: return error; } -static int opal_discovery0_end(struct opal_dev *dev) +static int opal_discovery0_end(struct opal_dev *dev, void *data) { + struct opal_discovery *discv_out = data; /* may be NULL */ + u8 __user *buf_out; + u64 len_out; bool found_com_id = false, supported = true, single_user = false; const struct d0_header *hdr = (struct d0_header *)dev->resp; const u8 *epos = dev->resp, *cpos = dev->resp; @@ -480,6 +583,15 @@ static int opal_discovery0_end(struct opal_dev *dev) return -EFAULT; } + if (discv_out) { + buf_out = (u8 __user *)(uintptr_t)discv_out->data; + len_out = min_t(u64, discv_out->size, hlen); + if (buf_out && copy_to_user(buf_out, dev->resp, len_out)) + return -EFAULT; + + discv_out->size = hlen; /* actual size of data */ + } + epos += hlen; /* end of buffer */ cpos += sizeof(*hdr); /* current position on buffer */ @@ -565,13 +677,13 @@ static int opal_discovery0(struct opal_dev *dev, void *data) if (ret) return ret; - return opal_discovery0_end(dev); + return opal_discovery0_end(dev, data); } static int opal_discovery0_step(struct opal_dev *dev) { const struct opal_step discovery0_step = { - opal_discovery0, + opal_discovery0, NULL }; return execute_step(dev, &discovery0_step, 0); @@ -1757,6 +1869,26 @@ static int internal_activate_user(struct opal_dev *dev, void *data) return finalize_and_send(dev, parse_and_check_status); } +static int revert_lsp(struct opal_dev *dev, void *data) +{ + struct opal_revert_lsp *rev = data; + int err; + + err = cmd_start(dev, opaluid[OPAL_THISSP_UID], + opalmethod[OPAL_REVERTSP]); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u64(&err, dev, OPAL_KEEP_GLOBAL_RANGE_KEY); + add_token_u8(&err, dev, (rev->options & OPAL_PRESERVE) ? + OPAL_TRUE : OPAL_FALSE); + add_token_u8(&err, dev, OPAL_ENDNAME); + if (err) { + pr_debug("Error building REVERT SP command.\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + static int erase_locking_range(struct opal_dev *dev, void *data) { struct opal_session_info *session = data; @@ -2427,6 +2559,9 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev, }; int ret; + ret = opal_get_key(dev, &opal_session->opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps)); @@ -2435,6 +2570,42 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev, return ret; } +static int opal_get_discv(struct opal_dev *dev, struct opal_discovery *discv) +{ + const struct opal_step discovery0_step = { + opal_discovery0, discv + }; + int ret = 0; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + ret = execute_step(dev, &discovery0_step, 0); + mutex_unlock(&dev->dev_lock); + if (ret) + return ret; + return discv->size; /* modified to actual length of data */ +} + +static int opal_revertlsp(struct opal_dev *dev, struct opal_revert_lsp *rev) +{ + /* controller will terminate session */ + const struct opal_step steps[] = { + { start_admin1LSP_opal_session, &rev->key }, + { revert_lsp, rev } + }; + int ret; + + ret = opal_get_key(dev, &rev->key); + if (ret) + return ret; + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + ret = execute_steps(dev, steps, ARRAY_SIZE(steps)); + mutex_unlock(&dev->dev_lock); + + return ret; +} + static int opal_erase_locking_range(struct opal_dev *dev, struct opal_session_info *opal_session) { @@ -2445,6 +2616,9 @@ static int opal_erase_locking_range(struct opal_dev *dev, }; int ret; + ret = opal_get_key(dev, &opal_session->opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps)); @@ -2473,6 +2647,9 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, opal_mbr->enable_disable != OPAL_MBR_DISABLE) return -EINVAL; + ret = opal_get_key(dev, &opal_mbr->key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); @@ -2498,6 +2675,9 @@ static int opal_set_mbr_done(struct opal_dev *dev, mbr_done->done_flag != OPAL_MBR_NOT_DONE) return -EINVAL; + ret = opal_get_key(dev, &mbr_done->key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); @@ -2519,6 +2699,9 @@ static int opal_write_shadow_mbr(struct opal_dev *dev, if (info->size == 0) return 0; + ret = opal_get_key(dev, &info->key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); @@ -2576,6 +2759,9 @@ static int opal_add_user_to_lr(struct opal_dev *dev, return -EINVAL; } + ret = opal_get_key(dev, &lk_unlk->session.opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, steps, ARRAY_SIZE(steps)); @@ -2598,6 +2784,10 @@ static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal, bool psi int ret; + ret = opal_get_key(dev, opal); + + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); if (psid) @@ -2698,6 +2888,9 @@ static int opal_lock_unlock(struct opal_dev *dev, if (lk_unlk->session.who > OPAL_USER9) return -EINVAL; + ret = opal_get_key(dev, &lk_unlk->session.opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); opal_lock_check_for_saved_key(dev, lk_unlk); ret = __opal_lock_unlock(dev, lk_unlk); @@ -2721,6 +2914,9 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) if (!dev) return -ENODEV; + ret = opal_get_key(dev, opal); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, owner_steps, ARRAY_SIZE(owner_steps)); @@ -2743,6 +2939,9 @@ static int opal_activate_lsp(struct opal_dev *dev, if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS) return -EINVAL; + ret = opal_get_key(dev, &opal_lr_act->key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps)); @@ -2761,6 +2960,9 @@ static int opal_setup_locking_range(struct opal_dev *dev, }; int ret; + ret = opal_get_key(dev, &opal_lrs->session.opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps)); @@ -2814,6 +3016,14 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps)); mutex_unlock(&dev->dev_lock); + if (ret) + return ret; + + /* update keyring with new password */ + ret = update_sed_opal_key(OPAL_AUTH_KEY, + opal_pw->new_user_pw.opal_key.key, + opal_pw->new_user_pw.opal_key.key_len); + return ret; } @@ -2834,6 +3044,9 @@ static int opal_activate_user(struct opal_dev *dev, return -EINVAL; } + ret = opal_get_key(dev, &opal_session->opal_key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); ret = execute_steps(dev, act_steps, ARRAY_SIZE(act_steps)); @@ -2920,6 +3133,9 @@ static int opal_generic_read_write_table(struct opal_dev *dev, { int ret, bit_set; + ret = opal_get_key(dev, &rw_tbl->key); + if (ret) + return ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); @@ -2988,9 +3204,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (!dev) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!(dev->flags & OPAL_FL_SUPPORTED)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (cmd & IOC_IN) { p = memdup_user(arg, _IOC_SIZE(cmd)); @@ -3056,6 +3272,13 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_GET_GEOMETRY: ret = opal_get_geometry(dev, arg); break; + case IOC_OPAL_REVERT_LSP: + ret = opal_revertlsp(dev, p); + break; + case IOC_OPAL_DISCOVERY: + ret = opal_get_discv(dev, p); + break; + default: break; } @@ -3065,3 +3288,22 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) return ret; } EXPORT_SYMBOL_GPL(sed_ioctl); + +static int __init sed_opal_init(void) +{ + struct key *kr; + + kr = keyring_alloc(".sed_opal", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | + KEY_USR_READ | KEY_USR_SEARCH | KEY_USR_WRITE, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(kr)) + return PTR_ERR(kr); + + sed_opal_keyring = kr; + + return 0; +} +late_initcall(sed_opal_init); |