block: fix disk->part[] dereferencing race

disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Tejun Heo <tj@kernel.org> 2008-09-03 09:03:02 +0200
committer: Jens Axboe <jens.axboe@oracle.com> 2008-10-09 08:56:06 +0200
commit: e71bf0d0ee89e51b92776391c5634938236977d5 (patch)
tree: 9fc62352a40ad388deebdd8ed497cab926cf0470 /block/blk-core.c
parent: block: don't depend on consecutive minor space (diff)
download: linux-e71bf0d0ee89e51b92776391c5634938236977d5.tar.xz
linux-e71bf0d0ee89e51b92776391c5634938236977d5.zip
1 files changed, 16 insertions, 4 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index a0dc2e72fcbb..d6128d9ad601 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -60,7 +60,9 @@ static void drive_stat_acct(struct request *rq, int new_io)
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 
-	part = disk_map_sector(rq->rq_disk, rq->sector);
+	rcu_read_lock();
+
+	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
 	if (!new_io)
 		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
 	else {
@@ -71,6 +73,8 @@ static void drive_stat_acct(struct request *rq, int new_io)
 			part->in_flight++;
 		}
 	}
+
+	rcu_read_unlock();
 }
 
 void blk_queue_congestion_threshold(struct request_queue *q)
@@ -1557,12 +1561,14 @@ static int __end_that_request_first(struct request *req, int error,
 	}
 
 	if (blk_fs_request(req) && req->rq_disk) {
-		struct hd_struct *part =
-			disk_map_sector(req->rq_disk, req->sector);
 		const int rw = rq_data_dir(req);
+		struct hd_struct *part;
 
+		rcu_read_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
 		all_stat_add(req->rq_disk, part, sectors[rw],
 				nr_bytes >> 9, req->sector);
+		rcu_read_unlock();
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1746,7 +1752,11 @@ static void end_that_request_last(struct request *req, int error)
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
-		struct hd_struct *part = disk_map_sector(disk, req->sector);
+		struct hd_struct *part;
+
+		rcu_read_lock();
+
+		part = disk_map_sector_rcu(disk, req->sector);
 
 		__all_stat_inc(disk, part, ios[rw], req->sector);
 		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
@@ -1756,6 +1766,8 @@ static void end_that_request_last(struct request *req, int error)
 			part_round_stats(part);
 			part->in_flight--;
 		}
+
+		rcu_read_unlock();
 	}
 
 	if (req->end_io)
author	Tejun Heo <tj@kernel.org>	2008-09-03 09:03:02 +0200
committer	Jens Axboe <jens.axboe@oracle.com>	2008-10-09 08:56:06 +0200
commit	e71bf0d0ee89e51b92776391c5634938236977d5 (patch)
tree	9fc62352a40ad388deebdd8ed497cab926cf0470 /block/blk-core.c
parent	block: don't depend on consecutive minor space (diff)
download	linux-e71bf0d0ee89e51b92776391c5634938236977d5.tar.xz linux-e71bf0d0ee89e51b92776391c5634938236977d5.zip