diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 17 | ||||
-rw-r--r-- | block/bfq-iosched.h | 5 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 41 | ||||
-rw-r--r-- | block/bio-integrity.c | 165 | ||||
-rw-r--r-- | block/bio.c | 13 | ||||
-rw-r--r-- | block/blk-core.c | 5 | ||||
-rw-r--r-- | block/blk-lib.c | 23 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 8 | ||||
-rw-r--r-- | block/blk-mq.c | 4 | ||||
-rw-r--r-- | block/blk.h | 11 | ||||
-rw-r--r-- | block/t10-pi.c | 9 |
11 files changed, 173 insertions, 128 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 12bbc6b8657d..436b6ca6b175 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) } } } - /* Update weight both if it must be raised and if it must be lowered */ + /* + * To improve latency (for this or other queues), immediately + * update weight both if it must be raised and if it must be + * lowered. Since, entity may be on some active tree here, and + * might have a pending change of its ioprio class, invoke + * next function with the last parameter unset (see the + * comments on the function). + */ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) - __bfq_entity_update_weight_prio( - bfq_entity_service_tree(entity), - entity); + __bfq_entity_update_weight_prio(bfq_entity_service_tree(entity), + entity, false); } /* @@ -4293,6 +4299,9 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_NO_MORE_REQUESTS); } + + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); } static void bfq_put_rq_priv_body(struct bfq_queue *bfqq) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 5c3bf9861492..63e771ab56d8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -52,7 +52,7 @@ struct bfq_entity; struct bfq_service_tree { /* tree for active entities (i.e., those backlogged) */ struct rb_root active; - /* tree for idle entities (i.e., not backlogged, with V <= F_i)*/ + /* tree for idle entities (i.e., not backlogged, with V < F_i)*/ struct rb_root idle; /* idle entity with minimum F_i */ @@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity); struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity); + struct bfq_entity *entity, + bool update_class_too); void bfq_bfqq_served(struct bfq_queue *bfqq, int served); void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned long time_ms); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 8726ede19eef..979f8f21b7e2 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity) return sched_data->service_tree + idx; } - +/* + * Update weight and priority of entity. If update_class_too is true, + * then update the ioprio_class of entity too. + * + * The reason why the update of ioprio_class is controlled through the + * last parameter is as follows. Changing the ioprio class of an + * entity implies changing the destination service trees for that + * entity. If such a change occurred when the entity is already on one + * of the service trees for its previous class, then the state of the + * entity would become more complex: none of the new possible service + * trees for the entity, according to bfq_entity_service_tree(), would + * match any of the possible service trees on which the entity + * is. Complex operations involving these trees, such as entity + * activations and deactivations, should take into account this + * additional complexity. To avoid this issue, this function is + * invoked with update_class_too unset in the points in the code where + * entity may happen to be on some tree. + */ struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity) + struct bfq_entity *entity, + bool update_class_too) { struct bfq_service_tree *new_st = old_st; @@ -739,9 +757,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, bfq_weight_to_ioprio(entity->orig_weight); } - if (bfqq) + if (bfqq && update_class_too) bfqq->ioprio_class = bfqq->new_ioprio_class; - entity->prio_changed = 0; + + /* + * Reset prio_changed only if the ioprio_class change + * is not pending any longer. + */ + if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class) + entity->prio_changed = 0; /* * NOTE: here we may be changing the weight too early, @@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - st = __bfq_entity_update_weight_prio(st, entity); + /* + * When this function is invoked, entity is not in any service + * tree, then it is safe to invoke next function with the last + * parameter set (see the comments on the function). + */ + st = __bfq_entity_update_weight_prio(st, entity, true); bfq_calc_finish(entity, entity->budget); /* @@ -1268,7 +1297,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value) * * This function searches the first schedulable entity, starting from the * root of the tree and going on the left every time on this side there is - * a subtree with at least one eligible (start >= vtime) entity. The path on + * a subtree with at least one eligible (start <= vtime) entity. The path on * the right is followed only if a) the left subtree contains no eligible * entities and b) no eligible entity has been found yet. */ diff --git a/block/bio-integrity.c b/block/bio-integrity.c index b8a3a65f7364..83e92beb3c9f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc); * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +static void bio_integrity_free(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; @@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio) } bio->bi_integrity = NULL; + bio->bi_opf &= ~REQ_INTEGRITY; } -EXPORT_SYMBOL(bio_integrity_free); /** * bio_integrity_add_page - Attach integrity metadata @@ -160,44 +160,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, EXPORT_SYMBOL(bio_integrity_add_page); /** - * bio_integrity_enabled - Check whether integrity can be passed - * @bio: bio to check - * - * Description: Determines whether bio_integrity_prep() can be called - * on this bio or not. bio data direction and target device must be - * set prior to calling. The functions honors the write_generate and - * read_verify flags in sysfs. - */ -bool bio_integrity_enabled(struct bio *bio) -{ - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - - if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) - return false; - - if (!bio_sectors(bio)) - return false; - - /* Already protected? */ - if (bio_integrity(bio)) - return false; - - if (bi == NULL) - return false; - - if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL && - (bi->flags & BLK_INTEGRITY_VERIFY)) - return true; - - if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL && - (bi->flags & BLK_INTEGRITY_GENERATE)) - return true; - - return false; -} -EXPORT_SYMBOL(bio_integrity_enabled); - -/** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device * @sectors: Size of the bio in 512-byte sectors @@ -222,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, /** * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for + * @proc_iter: iterator to process * @proc_fn: Pointer to the relevant processing function */ static blk_status_t bio_integrity_process(struct bio *bio, - integrity_processing_fn *proc_fn) + struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_iter iter; @@ -238,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio, iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; - iter.seed = bip_get_seed(bip); + iter.seed = proc_iter->bi_sector; iter.prot_buf = prot_buf; - bio_for_each_segment(bv, bio, bviter) { + __bio_for_each_segment(bv, bio, bviter, *proc_iter) { void *kaddr = kmap_atomic(bv.bv_page); iter.data_buf = kaddr + bv.bv_offset; @@ -262,14 +225,15 @@ static blk_status_t bio_integrity_process(struct bio *bio, * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * - * Description: Allocates a buffer for integrity metadata, maps the - * pages and attaches them to a bio. The bio must have data - * direction, target device and start sector set priot to calling. In - * the WRITE case, integrity metadata will be generated using the - * block device's integrity function. In the READ case, the buffer + * Description: Checks if the bio already has an integrity payload attached. + * If it does, the payload has been generated by another kernel subsystem, + * and we just pass it through. Otherwise allocates integrity payload. + * The bio must have data direction, target device and start sector set priot + * to calling. In the WRITE case, integrity metadata will be generated using + * the block device's integrity function. In the READ case, the buffer * will be prepared for DMA and a suitable end_io handler set up. */ -int bio_integrity_prep(struct bio *bio) +bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; struct blk_integrity *bi; @@ -279,20 +243,41 @@ int bio_integrity_prep(struct bio *bio) unsigned int len, nr_pages; unsigned int bytes, offset, i; unsigned int intervals; + blk_status_t status; bi = bdev_get_integrity(bio->bi_bdev); q = bdev_get_queue(bio->bi_bdev); - BUG_ON(bi == NULL); - BUG_ON(bio_integrity(bio)); + if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) + return true; + if (!bio_sectors(bio)) + return true; + + /* Already protected? */ + if (bio_integrity(bio)) + return true; + + if (bi == NULL) + return true; + + if (bio_data_dir(bio) == READ) { + if (!bi->profile->verify_fn || + !(bi->flags & BLK_INTEGRITY_VERIFY)) + return true; + } else { + if (!bi->profile->generate_fn || + !(bi->flags & BLK_INTEGRITY_GENERATE)) + return true; + } intervals = bio_integrity_intervals(bi, bio_sectors(bio)); /* Allocate kernel buffer for protection data */ len = intervals * bi->tuple_size; buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); + status = BLK_STS_RESOURCE; if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); - return -ENOMEM; + goto err_end_io; } end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -304,7 +289,8 @@ int bio_integrity_prep(struct bio *bio) if (IS_ERR(bip)) { printk(KERN_ERR "could not allocate data integrity bioset\n"); kfree(buf); - return PTR_ERR(bip); + status = BLK_STS_RESOURCE; + goto err_end_io; } bip->bip_flags |= BIP_BLOCK_INTEGRITY; @@ -330,7 +316,7 @@ int bio_integrity_prep(struct bio *bio) bytes, offset); if (ret == 0) - return 0; + return false; if (ret < bytes) break; @@ -340,17 +326,18 @@ int bio_integrity_prep(struct bio *bio) offset = 0; } - /* Install custom I/O completion handler if read verify is enabled */ - if (bio_data_dir(bio) == READ) { - bip->bip_end_io = bio->bi_end_io; - bio->bi_end_io = bio_integrity_endio; + /* Auto-generate integrity metadata if this is a write */ + if (bio_data_dir(bio) == WRITE) { + bio_integrity_process(bio, &bio->bi_iter, + bi->profile->generate_fn); } + return true; - /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) - bio_integrity_process(bio, bi->profile->generate_fn); +err_end_io: + bio->bi_status = status; + bio_endio(bio); + return false; - return 0; } EXPORT_SYMBOL(bio_integrity_prep); @@ -368,16 +355,26 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct bvec_iter iter = bio->bi_iter; - bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn); + /* + * At the moment verify is called bio's iterator was advanced + * during split and completion, we need to rewind iterator to + * it's original position. + */ + if (bio_rewind_iter(bio, &iter, iter.bi_done)) { + bio->bi_status = bio_integrity_process(bio, &iter, + bi->profile->verify_fn); + } else { + bio->bi_status = BLK_STS_IOERR; + } - /* Restore original bio completion handler */ - bio->bi_end_io = bip->bip_end_io; + bio_integrity_free(bio); bio_endio(bio); } /** - * bio_integrity_endio - Integrity I/O completion function + * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio * @error: Pointer to errno * @@ -388,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work) * in process context. This function postpones completion * accordingly. */ -void bio_integrity_endio(struct bio *bio) +bool __bio_integrity_endio(struct bio *bio) { - struct bio_integrity_payload *bip = bio_integrity(bio); - - BUG_ON(bip->bip_bio != bio); + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) { + struct bio_integrity_payload *bip = bio_integrity(bio); - /* In case of an I/O error there is no point in verifying the - * integrity metadata. Restore original bio end_io handler - * and run it. - */ - if (bio->bi_status) { - bio->bi_end_io = bip->bip_end_io; - bio_endio(bio); - - return; + INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); + queue_work(kintegrityd_wq, &bip->bip_work); + return false; } - INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); - queue_work(kintegrityd_wq, &bip->bip_work); + bio_integrity_free(bio); + return true; } -EXPORT_SYMBOL(bio_integrity_endio); /** * bio_integrity_advance - Advance integrity vector @@ -425,6 +414,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); + bip->bip_iter.bi_sector += bytes_done >> 9; bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } EXPORT_SYMBOL(bio_integrity_advance); @@ -432,22 +422,15 @@ EXPORT_SYMBOL(bio_integrity_advance); /** * bio_integrity_trim - Trim integrity vector * @bio: bio whose integrity vector to update - * @offset: offset to first data sector - * @sectors: number of data sectors * * Description: Used to trim the integrity vector in a cloned bio. - * The ivec will be advanced corresponding to 'offset' data sectors - * and the length will be truncated corresponding to 'len' data - * sectors. */ -void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +void bio_integrity_trim(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - bio_integrity_advance(bio, offset << 9); - bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors); + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); } EXPORT_SYMBOL(bio_integrity_trim); diff --git a/block/bio.c b/block/bio.c index 1cfcd0df3f30..9a63597aaacc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -243,9 +243,6 @@ fallback: void bio_uninit(struct bio *bio) { bio_disassociate_task(bio); - - if (bio_integrity(bio)) - bio_integrity_free(bio); } EXPORT_SYMBOL(bio_uninit); @@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio) again: if (!bio_remaining_done(bio)) return; + if (!bio_integrity_endio(bio)) + return; /* * Need to have a real endio function for chained bios, otherwise @@ -1834,6 +1833,8 @@ again: } blk_throtl_bio_endio(bio); + /* release cgroup info */ + bio_uninit(bio); if (bio->bi_end_io) bio->bi_end_io(bio); } @@ -1868,7 +1869,7 @@ struct bio *bio_split(struct bio *bio, int sectors, split->bi_iter.bi_size = sectors << 9; if (bio_integrity(split)) - bio_integrity_trim(split, 0, sectors); + bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); @@ -1900,6 +1901,10 @@ void bio_trim(struct bio *bio, int offset, int size) bio_advance(bio, offset << 9); bio->bi_iter.bi_size = size; + + if (bio_integrity(bio)) + bio_integrity_trim(bio); + } EXPORT_SYMBOL_GPL(bio_trim); diff --git a/block/blk-core.c b/block/blk-core.c index af393d5a9680..970b9c9638c5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; - } if (op_is_flush(bio->bi_opf)) { spin_lock_irq(q->queue_lock); diff --git a/block/blk-lib.c b/block/blk-lib.c index e8caecd71688..3fe0aec90597 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -261,6 +261,19 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, return 0; } +/* + * Convert a number of 512B sectors to a number of pages. + * The result is limited to a number of pages that can fit into a BIO. + * Also make sure that the result is always at least 1 (page) for the cases + * where nr_sects is lower than the number of sectors in a page. + */ +static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) +{ + sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1; + + return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES); +} + /** * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue @@ -307,18 +320,18 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, ret = 0; while (nr_sects != 0) { - bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES), - gfp_mask); + bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), + gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); - bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); + sz = min((sector_t) PAGE_SIZE, nr_sects << 9); + bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); nr_sects -= bi_size >> 9; sector += bi_size >> 9; - if (bi_size < (sz << 9)) + if (bi_size < sz) break; } cond_resched(); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 7f0dc48ffb40..4ab69435708c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -515,10 +515,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) } /* - * Default to 256, since we don't split into sync/async like the - * old code did. Additionally, this is a per-hw queue depth. + * Default to double of smaller one between hw queue_depth and 128, + * since we don't split into sync/async like the old code did. + * Additionally, this is a per-hw queue depth. */ - q->nr_requests = 2 * BLKDEV_MAX_RQ; + q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, + BLKDEV_MAX_RQ); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_sched_alloc_tags(q, hctx, i); diff --git a/block/blk-mq.c b/block/blk-mq.c index 6cef42f419a5..041f7b7fa0d6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1547,10 +1547,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio_io_error(bio); + if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; - } if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) diff --git a/block/blk.h b/block/blk.h index 01ebb8185f6b..3a3d715bd725 100644 --- a/block/blk.h +++ b/block/blk.h @@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q) #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); +bool __bio_integrity_endio(struct bio *); +static inline bool bio_integrity_endio(struct bio *bio) +{ + if (bio_integrity(bio)) + return __bio_integrity_endio(bio); + return true; +} #else static inline void blk_flush_integrity(void) { } +static inline bool bio_integrity_endio(struct bio *bio) +{ + return true; +} #endif void blk_timeout_work(struct work_struct *work); diff --git a/block/t10-pi.c b/block/t10-pi.c index 3416dadf7b15..a98db384048f 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -28,9 +28,6 @@ typedef __be16 (csum_fn) (void *, unsigned int); -static const __be16 APP_ESCAPE = (__force __be16) 0xffff; -static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff; - static __be16 t10_pi_crc_fn(void *data, unsigned int len) { return cpu_to_be16(crc_t10dif(data, len)); @@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, switch (type) { case 1: case 2: - if (pi->app_tag == APP_ESCAPE) + if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; if (be32_to_cpu(pi->ref_tag) != @@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, } break; case 3: - if (pi->app_tag == APP_ESCAPE && - pi->ref_tag == REF_ESCAPE) + if (pi->app_tag == T10_PI_APP_ESCAPE && + pi->ref_tag == T10_PI_REF_ESCAPE) goto next; break; } |