diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 135 | ||||
-rw-r--r-- | block/blk-cgroup.c | 9 | ||||
-rw-r--r-- | block/blk-iolatency.c | 3 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 9 | ||||
-rw-r--r-- | block/blk-mq.c | 10 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 7 | ||||
-rw-r--r-- | block/blk-settings.c | 3 | ||||
-rw-r--r-- | block/genhd.c | 2 |
8 files changed, 111 insertions, 67 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 72860325245a..b33be928d164 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1924,12 +1924,13 @@ static void bfq_add_request(struct request *rq) * confirmed no later than during the next * I/O-plugging interval for bfqq. */ - if (!bfq_bfqq_has_short_ttime(bfqq) && + if (bfqd->last_completed_rq_bfqq && + !bfq_bfqq_has_short_ttime(bfqq) && ktime_get_ns() - bfqd->last_completion < 200 * NSEC_PER_USEC) { if (bfqd->last_completed_rq_bfqq != bfqq && - bfqd->last_completed_rq_bfqq != - bfqq->waker_bfqq) { + bfqd->last_completed_rq_bfqq != + bfqq->waker_bfqq) { /* * First synchronization detected with * a candidate waker queue, or with a @@ -2250,9 +2251,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { struct bfq_queue *bfqq = bfq_init_rq(req); - struct bfq_data *bfqd = bfqq->bfqd; + struct bfq_data *bfqd; struct request *prev, *next_rq; + if (!bfqq) + return; + + bfqd = bfqq->bfqd; + /* Reposition request in its sort_list */ elv_rb_del(&bfqq->sort_list, req); elv_rb_add(&bfqq->sort_list, req); @@ -2299,6 +2305,9 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct bfq_queue *bfqq = bfq_init_rq(rq), *next_bfqq = bfq_init_rq(next); + if (!bfqq) + return; + /* * If next and rq belong to the same bfq_queue and next is older * than rq, then reposition rq in the fifo (by substituting next @@ -3354,38 +3363,57 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) * there is no active group, then the primary expectation for * this device is probably a high throughput. * - * We are now left only with explaining the additional - * compound condition that is checked below for deciding - * whether the scenario is asymmetric. To explain this - * compound condition, we need to add that the function + * We are now left only with explaining the two sub-conditions in the + * additional compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain the first + * sub-condition, we need to add that the function * bfq_asymmetric_scenario checks the weights of only - * non-weight-raised queues, for efficiency reasons (see - * comments on bfq_weights_tree_add()). Then the fact that - * bfqq is weight-raised is checked explicitly here. More - * precisely, the compound condition below takes into account - * also the fact that, even if bfqq is being weight-raised, - * the scenario is still symmetric if all queues with requests - * waiting for completion happen to be - * weight-raised. Actually, we should be even more precise - * here, and differentiate between interactive weight raising - * and soft real-time weight raising. + * non-weight-raised queues, for efficiency reasons (see comments on + * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised + * is checked explicitly here. More precisely, the compound condition + * below takes into account also the fact that, even if bfqq is being + * weight-raised, the scenario is still symmetric if all queues with + * requests waiting for completion happen to be + * weight-raised. Actually, we should be even more precise here, and + * differentiate between interactive weight raising and soft real-time + * weight raising. + * + * The second sub-condition checked in the compound condition is + * whether there is a fair amount of already in-flight I/O not + * belonging to bfqq. If so, I/O dispatching is to be plugged, for the + * following reason. The drive may decide to serve in-flight + * non-bfqq's I/O requests before bfqq's ones, thereby delaying the + * arrival of new I/O requests for bfqq (recall that bfqq is sync). If + * I/O-dispatching is not plugged, then, while bfqq remains empty, a + * basically uncontrolled amount of I/O from other queues may be + * dispatched too, possibly causing the service of bfqq's I/O to be + * delayed even longer in the drive. This problem gets more and more + * serious as the speed and the queue depth of the drive grow, + * because, as these two quantities grow, the probability to find no + * queue busy but many requests in flight grows too. By contrast, + * plugging I/O dispatching minimizes the delay induced by already + * in-flight I/O, and enables bfqq to recover the bandwidth it may + * lose because of this delay. * * As a side note, it is worth considering that the above - * device-idling countermeasures may however fail in the - * following unlucky scenario: if idling is (correctly) - * disabled in a time period during which all symmetry - * sub-conditions hold, and hence the device is allowed to - * enqueue many requests, but at some later point in time some - * sub-condition stops to hold, then it may become impossible - * to let requests be served in the desired order until all - * the requests already queued in the device have been served. + * device-idling countermeasures may however fail in the following + * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled + * in a time period during which all symmetry sub-conditions hold, and + * therefore the device is allowed to enqueue many requests, but at + * some later point in time some sub-condition stops to hold, then it + * may become impossible to make requests be served in the desired + * order until all the requests already queued in the device have been + * served. The last sub-condition commented above somewhat mitigates + * this problem for weight-raised queues. */ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { return (bfqq->wr_coeff > 1 && - bfqd->wr_busy_queues < - bfq_tot_busy_queues(bfqd)) || + (bfqd->wr_busy_queues < + bfq_tot_busy_queues(bfqd) || + bfqd->rq_in_driver >= + bfqq->dispatched + 4)) || bfq_asymmetric_scenario(bfqd, bfqq); } @@ -4745,6 +4773,8 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) */ void bfq_put_queue(struct bfq_queue *bfqq) { + struct bfq_queue *item; + struct hlist_node *n; #ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqg = bfqq_group(bfqq); #endif @@ -4789,6 +4819,36 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->burst_size--; } + /* + * bfqq does not exist any longer, so it cannot be woken by + * any other queue, and cannot wake any other queue. Then bfqq + * must be removed from the woken list of its possible waker + * queue, and all queues in the woken list of bfqq must stop + * having a waker queue. Strictly speaking, these updates + * should be performed when bfqq remains with no I/O source + * attached to it, which happens before bfqq gets freed. In + * particular, this happens when the last process associated + * with bfqq exits or gets associated with a different + * queue. However, both events lead to bfqq being freed soon, + * and dangling references would come out only after bfqq gets + * freed. So these updates are done here, as a simple and safe + * way to handle all cases. + */ + /* remove bfqq from woken list */ + if (!hlist_unhashed(&bfqq->woken_list_node)) + hlist_del_init(&bfqq->woken_list_node); + + /* reset waker for all queues in woken list */ + hlist_for_each_entry_safe(item, n, &bfqq->woken_list, + woken_list_node) { + item->waker_bfqq = NULL; + bfq_clear_bfqq_has_waker(item); + hlist_del_init(&item->woken_list_node); + } + + if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq) + bfqq->bfqd->last_completed_rq_bfqq = NULL; + kmem_cache_free(bfq_pool, bfqq); #ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_and_blkg_put(bfqg); @@ -4816,9 +4876,6 @@ static void bfq_put_cooperator(struct bfq_queue *bfqq) static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_queue *item; - struct hlist_node *n; - if (bfqq == bfqd->in_service_queue) { __bfq_bfqq_expire(bfqd, bfqq, BFQQE_BUDGET_TIMEOUT); bfq_schedule_dispatch(bfqd); @@ -4828,18 +4885,6 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - /* remove bfqq from woken list */ - if (!hlist_unhashed(&bfqq->woken_list_node)) - hlist_del_init(&bfqq->woken_list_node); - - /* reset waker for all queues in woken list */ - hlist_for_each_entry_safe(item, n, &bfqq->woken_list, - woken_list_node) { - item->waker_bfqq = NULL; - bfq_clear_bfqq_has_waker(item); - hlist_del_init(&item->woken_list_node); - } - bfq_put_queue(bfqq); /* release process reference */ } @@ -5417,12 +5462,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); - if (at_head || blk_rq_is_passthrough(rq)) { + if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else list_add_tail(&rq->queuelist, &bfqd->dispatch); - } else { /* bfqq is assumed to be non null here */ + } else { idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 24ed26957367..55a7dc227dfb 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ -static bool blkcg_debug_stats = false; +bool blkcg_debug_stats = false; static struct workqueue_struct *blkcg_punt_bio_wq; static bool blkcg_policy_enabled(struct request_queue *q, @@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) dbytes, dios); } - if (!blkcg_debug_stats) - goto next; - - if (atomic_read(&blkg->use_delay)) { + if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { has_stats = true; off += scnprintf(buf+off, size-off, " use_delay=%d delay_nsec=%llu", @@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) has_stats = true; off += written; } -next: + if (has_stats) { if (off < size - 1) { off += scnprintf(buf+off, size-off, "\n"); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index d973c38ee4fd..0fff7b56df0e 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, unsigned long long avg_lat; unsigned long long cur_win; + if (!blkcg_debug_stats) + return 0; + if (iolat->ssd) return iolatency_ssd_stat(iolat, buf, size); diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index cf22ab00fefb..126021fc3a11 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) e->type->ops.completed_request(rq, now); } -static inline void blk_mq_sched_started_request(struct request *rq) -{ - struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; - - if (e && e->type->ops.started_request) - e->type->ops.started_request(rq); -} - static inline void blk_mq_sched_requeue_request(struct request *rq) { struct request_queue *q = rq->q; diff --git a/block/blk-mq.c b/block/blk-mq.c index b038ec680e84..f78d3287dd82 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - blk_mq_sched_started_request(rq); - trace_block_rq_issue(q, rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { @@ -1960,9 +1958,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) rq = blk_mq_get_request(q, bio, &data); if (unlikely(!rq)) { rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) + + cookie = BLK_QC_T_NONE; + if (bio->bi_opf & REQ_NOWAIT_INLINE) + cookie = BLK_QC_T_EAGAIN; + else if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); - return BLK_QC_T_NONE; + return cookie; } trace_block_getrq(q, bio, bio->bi_opf); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 659ccb8b693f..3954c0dc1443 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, return -1; data->got_token = true; + smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; @@ -244,7 +245,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, return; prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); + has_sleeper = !wq_has_single_sleeper(&rqw->wait); do { + /* The memory barrier in set_task_state saves us here. */ if (data.got_token) break; if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { @@ -255,12 +258,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ + smp_rmb(); if (data.got_token) cleanup_cb(rqw, private_data); break; } io_schedule(); - has_sleeper = false; + has_sleeper = true; + set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } diff --git a/block/blk-settings.c b/block/blk-settings.c index 2ae348c101a0..2c1831207a8f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -752,7 +752,8 @@ void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask) * page (which might not be idential to the Linux PAGE_SIZE). Because * of that they are not limited by our notion of "segment size". */ - q->limits.max_segment_size = UINT_MAX; + if (mask) + q->limits.max_segment_size = UINT_MAX; } EXPORT_SYMBOL(blk_queue_virt_boundary); diff --git a/block/genhd.c b/block/genhd.c index 97887e59f3b2..54f1f0d381f4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1969,7 +1969,7 @@ static const struct attribute *disk_events_attrs[] = { * The default polling interval can be specified by the kernel * parameter block.events_dfl_poll_msecs which defaults to 0 * (disable). This can also be modified runtime by writing to - * /sys/module/block/events_dfl_poll_msecs. + * /sys/module/block/parameters/events_dfl_poll_msecs. */ static int disk_events_set_dfl_poll_msecs(const char *val, const struct kernel_param *kp) |