summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-08-25 12:47:21 +0200
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 08:56:06 +0200
commitc9959059161ddd7bf4670cf47367033d6b2f79c4 (patch)
tree6454db55f8e34361fe472358e10e0c5cfac1e366 /block
parentblock: fix disk->part[] dereferencing race (diff)
downloadlinux-c9959059161ddd7bf4670cf47367033d6b2f79c4.tar.xz
linux-c9959059161ddd7bf4670cf47367033d6b2f79c4.zip
block: fix diskstats access
There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c52
-rw-r--r--block/blk-merge.c11
-rw-r--r--block/genhd.c20
3 files changed, 44 insertions, 39 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d6128d9ad601..e0a5ee36849c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -56,25 +56,26 @@ static void drive_stat_acct(struct request *rq, int new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
+ int cpu;
if (!blk_fs_request(rq) || !rq->rq_disk)
return;
- rcu_read_lock();
-
+ cpu = disk_stat_lock();
part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+
if (!new_io)
- __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+ all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector);
else {
- disk_round_stats(rq->rq_disk);
+ disk_round_stats(cpu, rq->rq_disk);
rq->rq_disk->in_flight++;
if (part) {
- part_round_stats(part);
+ part_round_stats(cpu, part);
part->in_flight++;
}
}
- rcu_read_unlock();
+ disk_stat_unlock();
}
void blk_queue_congestion_threshold(struct request_queue *q)
@@ -997,7 +998,7 @@ static inline void add_request(struct request_queue *q, struct request *req)
* /proc/diskstats. This accounts immediately for all queue usage up to
* the current jiffies and restarts the counters again.
*/
-void disk_round_stats(struct gendisk *disk)
+void disk_round_stats(int cpu, struct gendisk *disk)
{
unsigned long now = jiffies;
@@ -1005,15 +1006,15 @@ void disk_round_stats(struct gendisk *disk)
return;
if (disk->in_flight) {
- __disk_stat_add(disk, time_in_queue,
- disk->in_flight * (now - disk->stamp));
- __disk_stat_add(disk, io_ticks, (now - disk->stamp));
+ disk_stat_add(cpu, disk, time_in_queue,
+ disk->in_flight * (now - disk->stamp));
+ disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp));
}
disk->stamp = now;
}
EXPORT_SYMBOL_GPL(disk_round_stats);
-void part_round_stats(struct hd_struct *part)
+void part_round_stats(int cpu, struct hd_struct *part)
{
unsigned long now = jiffies;
@@ -1021,9 +1022,9 @@ void part_round_stats(struct hd_struct *part)
return;
if (part->in_flight) {
- __part_stat_add(part, time_in_queue,
- part->in_flight * (now - part->stamp));
- __part_stat_add(part, io_ticks, (now - part->stamp));
+ part_stat_add(cpu, part, time_in_queue,
+ part->in_flight * (now - part->stamp));
+ part_stat_add(cpu, part, io_ticks, (now - part->stamp));
}
part->stamp = now;
}
@@ -1563,12 +1564,13 @@ static int __end_that_request_first(struct request *req, int error,
if (blk_fs_request(req) && req->rq_disk) {
const int rw = rq_data_dir(req);
struct hd_struct *part;
+ int cpu;
- rcu_read_lock();
+ cpu = disk_stat_lock();
part = disk_map_sector_rcu(req->rq_disk, req->sector);
- all_stat_add(req->rq_disk, part, sectors[rw],
- nr_bytes >> 9, req->sector);
- rcu_read_unlock();
+ all_stat_add(cpu, req->rq_disk, part, sectors[rw],
+ nr_bytes >> 9, req->sector);
+ disk_stat_unlock();
}
total_bytes = bio_nbytes = 0;
@@ -1753,21 +1755,21 @@ static void end_that_request_last(struct request *req, int error)
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
struct hd_struct *part;
+ int cpu;
- rcu_read_lock();
-
+ cpu = disk_stat_lock();
part = disk_map_sector_rcu(disk, req->sector);
- __all_stat_inc(disk, part, ios[rw], req->sector);
- __all_stat_add(disk, part, ticks[rw], duration, req->sector);
- disk_round_stats(disk);
+ all_stat_inc(cpu, disk, part, ios[rw], req->sector);
+ all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector);
+ disk_round_stats(cpu, disk);
disk->in_flight--;
if (part) {
- part_round_stats(part);
+ part_round_stats(cpu, part);
part->in_flight--;
}
- rcu_read_unlock();
+ disk_stat_unlock();
}
if (req->end_io)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index eb2a3ca58303..d926a24bf1fd 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -388,18 +388,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (req->rq_disk) {
struct hd_struct *part;
+ int cpu;
- rcu_read_lock();
-
+ cpu = disk_stat_lock();
part = disk_map_sector_rcu(req->rq_disk, req->sector);
- disk_round_stats(req->rq_disk);
+
+ disk_round_stats(cpu, req->rq_disk);
req->rq_disk->in_flight--;
if (part) {
- part_round_stats(part);
+ part_round_stats(cpu, part);
part->in_flight--;
}
- rcu_read_unlock();
+ disk_stat_unlock();
}
req->ioprio = ioprio_best(req->ioprio, next->ioprio);
diff --git a/block/genhd.c b/block/genhd.c
index b431d6543942..430626e440f0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -633,10 +633,11 @@ static ssize_t disk_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
+ int cpu;
- preempt_disable();
- disk_round_stats(disk);
- preempt_enable();
+ cpu = disk_stat_lock();
+ disk_round_stats(cpu, disk);
+ disk_stat_unlock();
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -749,6 +750,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct disk_part_iter piter;
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
+ int cpu;
/*
if (&gp->dev.kobj.entry == block_class.devices.next)
@@ -758,9 +760,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n\n");
*/
- preempt_disable();
- disk_round_stats(gp);
- preempt_enable();
+ cpu = disk_stat_lock();
+ disk_round_stats(cpu, gp);
+ disk_stat_unlock();
seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)),
disk_name(gp, 0, buf),
@@ -777,9 +779,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
/* now show all non-0 size partitions of it */
disk_part_iter_init(&piter, gp, 0);
while ((hd = disk_part_iter_next(&piter))) {
- preempt_disable();
- part_round_stats(hd);
- preempt_enable();
+ cpu = disk_stat_lock();
+ part_round_stats(cpu, hd);
+ disk_stat_unlock();
seq_printf(seqf, "%4d %4d %s %lu %lu %llu "
"%u %lu %lu %llu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),