diff options
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 128 |
1 files changed, 71 insertions, 57 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fee3325edf27..b771c4299982 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -15,10 +15,10 @@ #include "blk-cgroup-rwstat.h" /* Max dispatch from a group in 1 round */ -static int throtl_grp_quantum = 8; +#define THROTL_GRP_QUANTUM 8 /* Total max dispatch from all groups in one round */ -static int throtl_quantum = 32; +#define THROTL_QUANTUM 32 /* Throttling is performed over a slice and after that slice is renewed */ #define DFL_THROTL_SLICE_HD (HZ / 10) @@ -150,7 +150,7 @@ struct throtl_grp { /* user configured IOPS limits */ unsigned int iops_conf[2][LIMIT_CNT]; - /* Number of bytes disptached in current slice */ + /* Number of bytes dispatched in current slice */ uint64_t bytes_disp[2]; /* Number of bio's dispatched in current slice */ unsigned int io_disp[2]; @@ -423,12 +423,13 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, */ static struct bio *throtl_peek_queued(struct list_head *queued) { - struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); + struct throtl_qnode *qn; struct bio *bio; if (list_empty(queued)) return NULL; + qn = list_first_entry(queued, struct throtl_qnode, node); bio = bio_list_peek(&qn->bios); WARN_ON_ONCE(!bio); return bio; @@ -451,12 +452,13 @@ static struct bio *throtl_peek_queued(struct list_head *queued) static struct bio *throtl_pop_queued(struct list_head *queued, struct throtl_grp **tg_to_put) { - struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); + struct throtl_qnode *qn; struct bio *bio; if (list_empty(queued)) return NULL; + qn = list_first_entry(queued, struct throtl_qnode, node); bio = bio_list_pop(&qn->bios); WARN_ON_ONCE(!bio); @@ -636,9 +638,6 @@ static struct throtl_grp * throtl_rb_first(struct throtl_service_queue *parent_sq) { struct rb_node *n; - /* Service tree is empty */ - if (!parent_sq->nr_pending) - return NULL; n = rb_first_cached(&parent_sq->pending_tree); WARN_ON_ONCE(!n); @@ -692,29 +691,21 @@ static void tg_service_queue_add(struct throtl_grp *tg) leftmost); } -static void __throtl_enqueue_tg(struct throtl_grp *tg) -{ - tg_service_queue_add(tg); - tg->flags |= THROTL_TG_PENDING; - tg->service_queue.parent_sq->nr_pending++; -} - static void throtl_enqueue_tg(struct throtl_grp *tg) { - if (!(tg->flags & THROTL_TG_PENDING)) - __throtl_enqueue_tg(tg); -} - -static void __throtl_dequeue_tg(struct throtl_grp *tg) -{ - throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); - tg->flags &= ~THROTL_TG_PENDING; + if (!(tg->flags & THROTL_TG_PENDING)) { + tg_service_queue_add(tg); + tg->flags |= THROTL_TG_PENDING; + tg->service_queue.parent_sq->nr_pending++; + } } static void throtl_dequeue_tg(struct throtl_grp *tg) { - if (tg->flags & THROTL_TG_PENDING) - __throtl_dequeue_tg(tg); + if (tg->flags & THROTL_TG_PENDING) { + throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); + tg->flags &= ~THROTL_TG_PENDING; + } } /* Call with queue lock held */ @@ -817,7 +808,7 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw, static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, unsigned long jiffy_end) { - tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice); + throtl_set_slice_end(tg, rw, jiffy_end); throtl_log(&tg->service_queue, "[%c] extend slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], @@ -852,7 +843,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) /* * A bio has been dispatched. Also adjust slice_end. It might happen * that initially cgroup limit was very low resulting in high - * slice_end, but later limit was bumped up and bio was dispached + * slice_end, but later limit was bumped up and bio was dispatched * sooner, then we need to reduce slice_end. A high bogus slice_end * is bad because it does not allow new slice to start. */ @@ -894,13 +885,19 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) } static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, - unsigned long *wait) + u32 iops_limit, unsigned long *wait) { bool rw = bio_data_dir(bio); unsigned int io_allowed; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; u64 tmp; + if (iops_limit == UINT_MAX) { + if (wait) + *wait = 0; + return true; + } + jiffy_elapsed = jiffies - tg->slice_start[rw]; /* Round up to the next throttle slice, wait time must be nonzero */ @@ -913,7 +910,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, * have been trimmed. */ - tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd; + tmp = (u64)iops_limit * jiffy_elapsed_rnd; do_div(tmp, HZ); if (tmp > UINT_MAX) @@ -936,13 +933,19 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, } static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, - unsigned long *wait) + u64 bps_limit, unsigned long *wait) { bool rw = bio_data_dir(bio); u64 bytes_allowed, extra_bytes, tmp; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); + if (bps_limit == U64_MAX) { + if (wait) + *wait = 0; + return true; + } + jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; /* Slice has just started. Consider one slice interval */ @@ -951,7 +954,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); - tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd; + tmp = bps_limit * jiffy_elapsed_rnd; do_div(tmp, HZ); bytes_allowed = tmp; @@ -963,7 +966,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, /* Calc approx time to dispatch */ extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; - jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); + jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit); if (!jiffy_wait) jiffy_wait = 1; @@ -987,6 +990,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, { bool rw = bio_data_dir(bio); unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; + u64 bps_limit = tg_bps_limit(tg, rw); + u32 iops_limit = tg_iops_limit(tg, rw); /* * Currently whole state machine of group depends on first bio @@ -998,8 +1003,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, bio != throtl_peek_queued(&tg->service_queue.queued[rw])); /* If tg->bps = -1, then BW is unlimited */ - if (tg_bps_limit(tg, rw) == U64_MAX && - tg_iops_limit(tg, rw) == UINT_MAX) { + if (bps_limit == U64_MAX && iops_limit == UINT_MAX) { if (wait) *wait = 0; return true; @@ -1021,8 +1025,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, jiffies + tg->td->throtl_slice); } - if (tg_with_in_bps_limit(tg, bio, &bps_wait) && - tg_with_in_iops_limit(tg, bio, &iops_wait)) { + if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) && + tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) { if (wait) *wait = 0; return true; @@ -1082,7 +1086,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn, * If @tg doesn't currently have any bios queued in the same * direction, queueing @bio can change when @tg should be * dispatched. Mark that @tg was empty. This is automatically - * cleaered on the next tg_update_disptime(). + * cleared on the next tg_update_disptime(). */ if (!sq->nr_queued[rw]) tg->flags |= THROTL_TG_WAS_EMPTY; @@ -1175,8 +1179,8 @@ static int throtl_dispatch_tg(struct throtl_grp *tg) { struct throtl_service_queue *sq = &tg->service_queue; unsigned int nr_reads = 0, nr_writes = 0; - unsigned int max_nr_reads = throtl_grp_quantum*3/4; - unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; + unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4; + unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads; struct bio *bio; /* Try to dispatch 75% READS and 25% WRITES */ @@ -1209,9 +1213,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) unsigned int nr_disp = 0; while (1) { - struct throtl_grp *tg = throtl_rb_first(parent_sq); + struct throtl_grp *tg; struct throtl_service_queue *sq; + if (!parent_sq->nr_pending) + break; + + tg = throtl_rb_first(parent_sq); if (!tg) break; @@ -1226,7 +1234,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) if (sq->nr_queued[0] || sq->nr_queued[1]) tg_update_disptime(tg); - if (nr_disp >= throtl_quantum) + if (nr_disp >= THROTL_QUANTUM) break; } @@ -1303,7 +1311,7 @@ again: } } } else { - /* reached the top-level, queue issueing */ + /* reached the top-level, queue issuing */ queue_work(kthrotld_workqueue, &td->dispatch_work); } out_unlock: @@ -1314,8 +1322,8 @@ out_unlock: * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work * @work: work item being executed * - * This function is queued for execution when bio's reach the bio_lists[] - * of throtl_data->service_queue. Those bio's are ready and issued by this + * This function is queued for execution when bios reach the bio_lists[] + * of throtl_data->service_queue. Those bios are ready and issued by this * function. */ static void blk_throtl_dispatch_work_fn(struct work_struct *work) @@ -1428,8 +1436,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) * that a group's limit are dropped suddenly and we don't want to * account recently dispatched IO with new low rate. */ - throtl_start_new_slice(tg, 0); - throtl_start_new_slice(tg, 1); + throtl_start_new_slice(tg, READ); + throtl_start_new_slice(tg, WRITE); if (tg->flags & THROTL_TG_PENDING) { tg_update_disptime(tg); @@ -1674,13 +1682,13 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, goto out_finish; ret = -EINVAL; - if (!strcmp(tok, "rbps")) + if (!strcmp(tok, "rbps") && val > 1) v[0] = val; - else if (!strcmp(tok, "wbps")) + else if (!strcmp(tok, "wbps") && val > 1) v[1] = val; - else if (!strcmp(tok, "riops")) + else if (!strcmp(tok, "riops") && val > 1) v[2] = min_t(u64, val, UINT_MAX); - else if (!strcmp(tok, "wiops")) + else if (!strcmp(tok, "wiops") && val > 1) v[3] = min_t(u64, val, UINT_MAX); else if (off == LIMIT_LOW && !strcmp(tok, "idle")) idle_time = val; @@ -1957,7 +1965,7 @@ static void throtl_upgrade_state(struct throtl_data *td) queue_work(kthrotld_workqueue, &td->dispatch_work); } -static void throtl_downgrade_state(struct throtl_data *td, int new) +static void throtl_downgrade_state(struct throtl_data *td) { td->scale /= 2; @@ -1967,7 +1975,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new) return; } - td->limit_index = new; + td->limit_index = LIMIT_LOW; td->low_downgrade_time = jiffies; } @@ -2054,7 +2062,7 @@ static void throtl_downgrade_check(struct throtl_grp *tg) * cgroups */ if (throtl_hierarchy_can_downgrade(tg)) - throtl_downgrade_state(tg->td, LIMIT_LOW); + throtl_downgrade_state(tg->td); tg->last_bytes_disp[READ] = 0; tg->last_bytes_disp[WRITE] = 0; @@ -2064,10 +2072,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg) static void blk_throtl_update_idletime(struct throtl_grp *tg) { - unsigned long now = ktime_get_ns() >> 10; + unsigned long now; unsigned long last_finish_time = tg->last_finish_time; - if (now <= last_finish_time || last_finish_time == 0 || + if (last_finish_time == 0) + return; + + now = ktime_get_ns() >> 10; + if (now <= last_finish_time || last_finish_time == tg->checked_last_finish_time) return; @@ -2083,7 +2095,7 @@ static void throtl_update_latency_buckets(struct throtl_data *td) unsigned long last_latency[2] = { 0 }; unsigned long latency[2]; - if (!blk_queue_nonrot(td->queue)) + if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW]) return; if (time_before(jiffies, td->last_calculate_time + HZ)) return; @@ -2230,7 +2242,7 @@ again: /* * @bio passed through this layer without being throttled. - * Climb up the ladder. If we''re already at the top, it + * Climb up the ladder. If we're already at the top, it * can be executed directly. */ qn = &tg->qnode_on_parent[rw]; @@ -2321,6 +2333,8 @@ void blk_throtl_bio_endio(struct bio *bio) if (!blkg) return; tg = blkg_to_tg(blkg); + if (!tg->td->limit_valid[LIMIT_LOW]) + return; finish_time_ns = ktime_get_ns(); tg->last_finish_time = finish_time_ns >> 10; |