dm cache: fix race condition in the writeback mode overwrite_bio optimisation

When a DM cache in writeback mode moves data between the slow and fast device it can often avoid a copy if the triggering bio either: i) covers the whole block (no point copying if we're about to overwrite it) ii) the migration is a promotion and the origin block is currently discarded Prior to this fix there was a race with case (ii). The discard status was checked with a shared lock held (rather than exclusive). This meant another bio could run in parallel and write data to the origin, removing the discard state. After the promotion the parallel write would have been lost. With this fix the discard status is re-checked once the exclusive lock has been aquired. If the block is no longer discarded it falls back to the slower full copy path. Fixes: b29d4986d ("dm cache: significant rework to leverage dm-bio-prison-v2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
author: Joe Thornber <ejt@redhat.com> 2017-11-10 13:53:31 +0100
committer: Mike Snitzer <snitzer@redhat.com> 2017-11-10 21:43:39 +0100
commit: d1260e2a3f85f4c1010510a15f89597001318b1b (patch)
tree: 55b36baaa68a1c813e457e4d20c734ba353886f5 /drivers/md
parent: dm cache: convert dm_cache_metadata.ref_count from atomic_t to refcount_t (diff)
download: linux-d1260e2a3f85f4c1010510a15f89597001318b1b.tar.xz
linux-d1260e2a3f85f4c1010510a15f89597001318b1b.zip
1 files changed, 53 insertions, 33 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 8785134c9f1f..0b7edfd0b454 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1201,6 +1201,18 @@ static void background_work_end(struct cache *cache)
 
 /*----------------------------------------------------------------*/
 
+static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
+{
+	return (bio_data_dir(bio) == WRITE) &&
+		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
+}
+
+static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
+{
+	return writeback_mode(&cache->features) &&
+		(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
+}
+
 static void quiesce(struct dm_cache_migration *mg,
 		    void (*continuation)(struct work_struct *))
 {
@@ -1474,13 +1486,51 @@ static void mg_upgrade_lock(struct work_struct *ws)
 	}
 }
 
+static void mg_full_copy(struct work_struct *ws)
+{
+	struct dm_cache_migration *mg = ws_to_mg(ws);
+	struct cache *cache = mg->cache;
+	struct policy_work *op = mg->op;
+	bool is_policy_promote = (op->op == POLICY_PROMOTE);
+
+	if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
+	    is_discarded_oblock(cache, op->oblock)) {
+		mg_upgrade_lock(ws);
+		return;
+	}
+
+	init_continuation(&mg->k, mg_upgrade_lock);
+
+	if (copy(mg, is_policy_promote)) {
+		DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
+		mg->k.input = BLK_STS_IOERR;
+		mg_complete(mg, false);
+	}
+}
+
 static void mg_copy(struct work_struct *ws)
 {
-	int r;
 	struct dm_cache_migration *mg = ws_to_mg(ws);
 
 	if (mg->overwrite_bio) {
 		/*
+		 * No exclusive lock was held when we last checked if the bio
+		 * was optimisable.  So we have to check again in case things
+		 * have changed (eg, the block may no longer be discarded).
+		 */
+		if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
+			/*
+			 * Fallback to a real full copy after doing some tidying up.
+			 */
+			bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
+			BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
+			mg->overwrite_bio = NULL;
+			inc_io_migrations(mg->cache);
+			mg_full_copy(ws);
+			return;
+		}
+
+		/*
 		 * It's safe to do this here, even though it's new data
 		 * because all IO has been locked out of the block.
 		 *
@@ -1489,26 +1539,8 @@ static void mg_copy(struct work_struct *ws)
 		 */
 		overwrite(mg, mg_update_metadata_after_copy);
 
-	} else {
-		struct cache *cache = mg->cache;
-		struct policy_work *op = mg->op;
-		bool is_policy_promote = (op->op == POLICY_PROMOTE);
-
-		if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
-		    is_discarded_oblock(cache, op->oblock)) {
-			mg_upgrade_lock(ws);
-			return;
-		}
-
-		init_continuation(&mg->k, mg_upgrade_lock);
-
-		r = copy(mg, is_policy_promote);
-		if (r) {
-			DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-			mg->k.input = BLK_STS_IOERR;
-			mg_complete(mg, false);
-		}
-	}
+	} else
+		mg_full_copy(ws);
 }
 
 static int mg_lock_writes(struct dm_cache_migration *mg)
@@ -1748,18 +1780,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
 
 /*----------------------------------------------------------------*/
 
-static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
-{
-	return (bio_data_dir(bio) == WRITE) &&
-		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
-}
-
-static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
-{
-	return writeback_mode(&cache->features) &&
-		(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
-}
-
 static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
 		   bool *commit_needed)
 {
author	Joe Thornber <ejt@redhat.com>	2017-11-10 13:53:31 +0100
committer	Mike Snitzer <snitzer@redhat.com>	2017-11-10 21:43:39 +0100
commit	d1260e2a3f85f4c1010510a15f89597001318b1b (patch)
tree	55b36baaa68a1c813e457e4d20c734ba353886f5 /drivers/md
parent	dm cache: convert dm_cache_metadata.ref_count from atomic_t to refcount_t (diff)
download	linux-d1260e2a3f85f4c1010510a15f89597001318b1b.tar.xz linux-d1260e2a3f85f4c1010510a15f89597001318b1b.zip