diff options
Diffstat (limited to 'block/blk-iocost.c')
-rw-r--r-- | block/blk-iocost.c | 86 |
1 files changed, 65 insertions, 21 deletions
diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7c1fe605d0d6..8ac4aad66ebc 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -260,6 +260,7 @@ enum { VTIME_PER_SEC_SHIFT = 37, VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT, VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC, + VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC, /* bound vrate adjustments within two orders of magnitude */ VRATE_MIN_PPM = 10000, /* 1% */ @@ -1206,14 +1207,14 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) +static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); u64 vtime = atomic64_read(&iocg->vtime); u64 vmargin = ioc->margin_us * now->vrate; u64 margin_ns = ioc->margin_us * NSEC_PER_USEC; - u64 expires, oexpires; + u64 delta_ns, expires, oexpires; u32 hw_inuse; lockdep_assert_held(&iocg->waitq.lock); @@ -1236,15 +1237,10 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) return false; /* use delay */ - if (cost) { - u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC, - now->vrate); - blkcg_add_delay(blkg, now->now_ns, cost_ns); - } - blkcg_use_delay(blkg); - - expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow, - now->vrate) * NSEC_PER_USEC; + delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow, + now->vrate) * NSEC_PER_USEC; + blkcg_set_delay(blkg, delta_ns); + expires = now->now_ns + delta_ns; /* if already active and close enough, don't bother */ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer)); @@ -1265,7 +1261,7 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) spin_lock_irqsave(&iocg->waitq.lock, flags); ioc_now(iocg->ioc, &now); - iocg_kick_delay(iocg, &now, 0); + iocg_kick_delay(iocg, &now); spin_unlock_irqrestore(&iocg->waitq.lock, flags); return HRTIMER_NORESTART; @@ -1383,7 +1379,7 @@ static void ioc_timer_fn(struct timer_list *timer) if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, &now); - iocg_kick_delay(iocg, &now, 0); + iocg_kick_delay(iocg, &now); } else if (iocg_is_idle(iocg)) { /* no waiter and idle, deactivate */ iocg->last_inuse = iocg->inuse; @@ -1543,19 +1539,39 @@ skip_surplus_transfers: if (rq_wait_pct > RQ_WAIT_BUSY_PCT || missed_ppm[READ] > ppm_rthr || missed_ppm[WRITE] > ppm_wthr) { + /* clearly missing QoS targets, slow down vrate */ ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level++; } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { - /* take action iff there is contention */ - if (nr_shortages && !nr_lagging) { + /* QoS targets are being met with >25% margin */ + if (nr_shortages) { + /* + * We're throttling while the device has spare + * capacity. If vrate was being slowed down, stop. + */ ioc->busy_level = min(ioc->busy_level, 0); - /* redistribute surpluses first */ - if (!nr_surpluses) + + /* + * If there are IOs spanning multiple periods, wait + * them out before pushing the device harder. If + * there are surpluses, let redistribution work it + * out first. + */ + if (!nr_lagging && !nr_surpluses) ioc->busy_level--; + } else { + /* + * Nobody is being throttled and the users aren't + * issuing enough IOs to saturate the device. We + * simply don't know how close the device is to + * saturation. Coast. + */ + ioc->busy_level = 0; } } else { + /* inside the hysterisis margin, we're good */ ioc->busy_level = 0; } @@ -1678,6 +1694,31 @@ static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge) return cost; } +static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc, + u64 *costp) +{ + unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT; + + switch (req_op(rq)) { + case REQ_OP_READ: + *costp = pages * ioc->params.lcoefs[LCOEF_RPAGE]; + break; + case REQ_OP_WRITE: + *costp = pages * ioc->params.lcoefs[LCOEF_WPAGE]; + break; + default: + *costp = 0; + } +} + +static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) +{ + u64 cost; + + calc_size_vtime_cost_builtin(rq, ioc, &cost); + return cost; +} + static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; @@ -1762,7 +1803,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) */ if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { iocg->abs_vdebt += abs_cost; - if (iocg_kick_delay(iocg, &now, cost)) + if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->q, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); spin_unlock_irq(&iocg->waitq.lock); @@ -1850,7 +1891,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, spin_lock_irqsave(&iocg->waitq.lock, flags); if (likely(!list_empty(&iocg->active_list))) { iocg->abs_vdebt += abs_cost; - iocg_kick_delay(iocg, &now, cost); + iocg_kick_delay(iocg, &now); } else { iocg_commit_bio(iocg, bio, cost); } @@ -1868,7 +1909,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) { struct ioc *ioc = rqos_to_ioc(rqos); - u64 on_q_ns, rq_wait_ns; + u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) @@ -1889,8 +1930,10 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) on_q_ns = ktime_get_ns() - rq->alloc_time_ns; rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; + size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC); - if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC) + if (on_q_ns <= size_nsec || + on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC) this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met); else this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed); @@ -2297,6 +2340,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, spin_lock_irq(&ioc->lock); if (enable) { + blk_stat_enable_accounting(ioc->rqos.q); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q); ioc->enabled = true; } else { |