diff options
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 166 |
1 files changed, 161 insertions, 5 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6e1c29860eec..140da29f5800 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -28,6 +28,8 @@ static int throtl_quantum = 32; /* default latency target is 0, eg, guarantee IO latency by default */ #define DFL_LATENCY_TARGET (0) +#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) + static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ @@ -165,6 +167,19 @@ struct throtl_grp { unsigned long idletime_threshold; /* us */ }; +/* We measure latency for request size from <= 4k to >= 1M */ +#define LATENCY_BUCKET_SIZE 9 + +struct latency_bucket { + unsigned long total_latency; /* ns / 1024 */ + int samples; +}; + +struct avg_latency_bucket { + unsigned long latency; /* ns / 1024 */ + bool valid; +}; + struct throtl_data { /* service tree for active throtl groups */ @@ -188,6 +203,13 @@ struct throtl_data unsigned long low_downgrade_time; unsigned int scale; + + struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE]; + struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; + struct latency_bucket __percpu *latency_buckets; + unsigned long last_calculate_time; + + bool track_bio_latency; }; static void throtl_pending_timer_fn(unsigned long arg); @@ -306,6 +328,9 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) return ret; } +#define request_bucket_index(sectors) \ + clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) + /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported @@ -1931,6 +1956,73 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg) tg->checked_last_finish_time = last_finish_time; } +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW +static void throtl_update_latency_buckets(struct throtl_data *td) +{ + struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE]; + int i, cpu; + unsigned long last_latency = 0; + unsigned long latency; + + if (!blk_queue_nonrot(td->queue)) + return; + if (time_before(jiffies, td->last_calculate_time + HZ)) + return; + td->last_calculate_time = jiffies; + + memset(avg_latency, 0, sizeof(avg_latency)); + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + struct latency_bucket *tmp = &td->tmp_buckets[i]; + + for_each_possible_cpu(cpu) { + struct latency_bucket *bucket; + + /* this isn't race free, but ok in practice */ + bucket = per_cpu_ptr(td->latency_buckets, cpu); + tmp->total_latency += bucket[i].total_latency; + tmp->samples += bucket[i].samples; + bucket[i].total_latency = 0; + bucket[i].samples = 0; + } + + if (tmp->samples >= 32) { + int samples = tmp->samples; + + latency = tmp->total_latency; + + tmp->total_latency = 0; + tmp->samples = 0; + latency /= samples; + if (latency == 0) + continue; + avg_latency[i].latency = latency; + } + } + + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + if (!avg_latency[i].latency) { + if (td->avg_buckets[i].latency < last_latency) + td->avg_buckets[i].latency = last_latency; + continue; + } + + if (!td->avg_buckets[i].valid) + latency = avg_latency[i].latency; + else + latency = (td->avg_buckets[i].latency * 7 + + avg_latency[i].latency) >> 3; + + td->avg_buckets[i].latency = max(latency, last_latency); + td->avg_buckets[i].valid = true; + last_latency = td->avg_buckets[i].latency; + } +} +#else +static inline void throtl_update_latency_buckets(struct throtl_data *td) +{ +} +#endif + bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { @@ -1939,6 +2031,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; + struct throtl_data *td = tg->td; int ret; WARN_ON_ONCE(!rcu_read_lock_held()); @@ -1949,6 +2042,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, spin_lock_irq(q->queue_lock); + throtl_update_latency_buckets(td); + if (unlikely(blk_queue_bypass(q))) goto out_unlock; @@ -1956,6 +2051,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (ret == 0 || ret == -EBUSY) bio->bi_cg_private = tg; + blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio)); #endif blk_throtl_update_idletime(tg); @@ -1974,8 +2070,8 @@ again: /* if above limits, break to queue */ if (!tg_may_dispatch(tg, bio, NULL)) { tg->last_low_overflow_time[rw] = jiffies; - if (throtl_can_upgrade(tg->td, tg)) { - throtl_upgrade_state(tg->td); + if (throtl_can_upgrade(td, tg)) { + throtl_upgrade_state(td); goto again; } break; @@ -2019,7 +2115,7 @@ again: tg->last_low_overflow_time[rw] = jiffies; - tg->td->nr_queued[rw]++; + td->nr_queued[rw]++; throtl_add_bio_tg(bio, qn, tg); throttled = true; @@ -2044,20 +2140,67 @@ out: */ if (!throttled) bio_clear_flag(bio, BIO_THROTTLED); + +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + if (throttled || !td->track_bio_latency) + bio->bi_issue_stat.stat |= SKIP_LATENCY; +#endif return throttled; } #ifdef CONFIG_BLK_DEV_THROTTLING_LOW +static void throtl_track_latency(struct throtl_data *td, sector_t size, + int op, unsigned long time) +{ + struct latency_bucket *latency; + int index; + + if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ || + !blk_queue_nonrot(td->queue)) + return; + + index = request_bucket_index(size); + + latency = get_cpu_ptr(td->latency_buckets); + latency[index].total_latency += time; + latency[index].samples++; + put_cpu_ptr(td->latency_buckets); +} + +void blk_throtl_stat_add(struct request *rq, u64 time_ns) +{ + struct request_queue *q = rq->q; + struct throtl_data *td = q->td; + + throtl_track_latency(td, blk_stat_size(&rq->issue_stat), + req_op(rq), time_ns >> 10); +} + void blk_throtl_bio_endio(struct bio *bio) { struct throtl_grp *tg; + u64 finish_time_ns; + unsigned long finish_time; + unsigned long start_time; + unsigned long lat; tg = bio->bi_cg_private; if (!tg) return; bio->bi_cg_private = NULL; - tg->last_finish_time = ktime_get_ns() >> 10; + finish_time_ns = ktime_get_ns(); + tg->last_finish_time = finish_time_ns >> 10; + + start_time = blk_stat_time(&bio->bi_issue_stat) >> 10; + finish_time = __blk_stat_time(finish_time_ns) >> 10; + /* this is only for bio based driver */ + if (start_time && finish_time > start_time && + !(bio->bi_issue_stat.stat & SKIP_LATENCY)) { + lat = finish_time - start_time; + throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat), + bio_op(bio), lat); + } } #endif @@ -2133,6 +2276,12 @@ int blk_throtl_init(struct request_queue *q) td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) return -ENOMEM; + td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) * + LATENCY_BUCKET_SIZE, __alignof__(u64)); + if (!td->latency_buckets) { + kfree(td); + return -ENOMEM; + } INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); throtl_service_queue_init(&td->service_queue); @@ -2147,8 +2296,10 @@ int blk_throtl_init(struct request_queue *q) /* activate policy */ ret = blkcg_activate_policy(q, &blkcg_policy_throtl); - if (ret) + if (ret) { + free_percpu(td->latency_buckets); kfree(td); + } return ret; } @@ -2157,6 +2308,7 @@ void blk_throtl_exit(struct request_queue *q) BUG_ON(!q->td); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); + free_percpu(q->td->latency_buckets); kfree(q->td); } @@ -2181,6 +2333,10 @@ void blk_throtl_register_queue(struct request_queue *q) td->throtl_slice = DFL_THROTL_SLICE_HD; #endif + td->track_bio_latency = !q->mq_ops && !q->request_fn; + if (!td->track_bio_latency) + blk_stat_enable_accounting(q); + /* * some tg are created before queue is fully initialized, eg, nonrot * isn't initialized yet |