From b844fe691897221ad0d5e0279c8ea9e3e4a46982 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 5 Apr 2013 15:36:32 +0100
Subject: dm cache: fix writes to cache device in writethrough mode

The dm-cache writethrough strategy introduced by commit e2e74d617eadc15
("dm cache: fix race in writethrough implementation") issues a bio to
the origin device, remaps and then issues the bio to the cache device.
This more conservative in-series approach was selected to favor
correctness over performance (of the previous parallel writethrough).
However, this in-series implementation that reuses the same bio to write
both the origin and cache device didn't take into account that the block
layer's req_bio_endio() modifies a completing bio's bi_sector and
bi_size.  So the new writethrough strategy needs to preserve these bio
fields, and restore them before submission to the cache device,
otherwise nothing gets written to the cache (because bi_size is 0).

This patch adds a struct dm_bio_details field to struct per_bio_data,
and uses dm_bio_record() and dm_bio_restore() to ensure the bio is
restored before reissuing to the cache device.  Adding such a large
structure to the per_bio_data is not ideal but we can improve this
later, for now correctness is the important thing.

This problem initially went unnoticed because the dm-cache test-suite
uses a linear DM device for the dm-cache device's origin device.
Writethrough worked as expected because DM submits a *clone* of the
original bio, so the original bio which was reused for the cache was
never touched.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 66120bd46d15..1ab122a75764 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -6,6 +6,7 @@
 
 #include "dm.h"
 #include "dm-bio-prison.h"
+#include "dm-bio-record.h"
 #include "dm-cache-metadata.h"
 
 #include <linux/dm-io.h>
@@ -205,6 +206,7 @@ struct per_bio_data {
 	struct cache *cache;
 	dm_cblock_t cblock;
 	bio_end_io_t *saved_bi_end_io;
+	struct dm_bio_details bio_details;
 };
 
 struct dm_cache_migration {
@@ -643,6 +645,7 @@ static void writethrough_endio(struct bio *bio, int err)
 		return;
 	}
 
+	dm_bio_restore(&pb->bio_details, bio);
 	remap_to_cache(pb->cache, bio, pb->cblock);
 
 	/*
@@ -667,6 +670,7 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
 	pb->cache = cache;
 	pb->cblock = cblock;
 	pb->saved_bi_end_io = bio->bi_end_io;
+	dm_bio_record(&pb->bio_details, bio);
 	bio->bi_end_io = writethrough_endio;
 
 	remap_to_origin_clear_discard(pb->cache, bio, oblock);
-- 
cgit v1.2.3


From 19b0092e265fe9ab129902373c3127c0e0be3376 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 5 Apr 2013 15:36:34 +0100
Subject: dm cache: reduce bio front_pad size in writeback mode

A recent patch to fix the dm cache target's writethrough mode extended
the bio's front_pad to include a 1056-byte struct dm_bio_details.
Writeback mode doesn't need this, so this patch reduces the
per_bio_data_size to 16 bytes in this case instead of 1096.

The dm_bio_details structure was added in "dm cache: fix writes to
cache device in writethrough mode" which fixed commit e2e74d617e ("dm
cache: fix race in writethrough implementation").  In writeback mode
we avoid allocating the writethrough-specific members of the
per_bio_data structure (the dm_bio_details structure included).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 47 ++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1ab122a75764..10744091e6ca 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -202,7 +202,11 @@ struct per_bio_data {
 	unsigned req_nr:2;
 	struct dm_deferred_entry *all_io_entry;
 
-	/* writethrough fields */
+	/*
+	 * writethrough fields.  These MUST remain at the end of this
+	 * structure and the 'cache' member must be the first as it
+	 * is used to determine the offsetof the writethrough fields.
+	 */
 	struct cache *cache;
 	dm_cblock_t cblock;
 	bio_end_io_t *saved_bi_end_io;
@@ -515,16 +519,28 @@ static void save_stats(struct cache *cache)
 /*----------------------------------------------------------------
  * Per bio data
  *--------------------------------------------------------------*/
-static struct per_bio_data *get_per_bio_data(struct bio *bio)
+
+/*
+ * If using writeback, leave out struct per_bio_data's writethrough fields.
+ */
+#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
+#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
+
+static size_t get_per_bio_data_size(struct cache *cache)
+{
+	return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+}
+
+static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
 {
-	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
+	struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
 	BUG_ON(!pb);
 	return pb;
 }
 
-static struct per_bio_data *init_per_bio_data(struct bio *bio)
+static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
 {
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	struct per_bio_data *pb = get_per_bio_data(bio, data_size);
 
 	pb->tick = false;
 	pb->req_nr = dm_bio_get_target_bio_nr(bio);
@@ -558,7 +574,8 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 {
 	unsigned long flags;
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	size_t pb_data_size = get_per_bio_data_size(cache);
+	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
 	spin_lock_irqsave(&cache->lock, flags);
 	if (cache->need_tick_bio &&
@@ -637,7 +654,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
 
 static void writethrough_endio(struct bio *bio, int err)
 {
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
 	bio->bi_end_io = pb->saved_bi_end_io;
 
 	if (err) {
@@ -665,7 +682,7 @@ static void writethrough_endio(struct bio *bio, int err)
 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
 				       dm_oblock_t oblock, dm_cblock_t cblock)
 {
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
 
 	pb->cache = cache;
 	pb->cblock = cblock;
@@ -1039,7 +1056,8 @@ static void defer_bio(struct cache *cache, struct bio *bio)
 
 static void process_flush_bio(struct cache *cache, struct bio *bio)
 {
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	size_t pb_data_size = get_per_bio_data_size(cache);
+	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
 	BUG_ON(bio->bi_size);
 	if (!pb->req_nr)
@@ -1111,7 +1129,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	dm_oblock_t block = get_bio_block(cache, bio);
 	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
 	struct policy_result lookup_result;
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	size_t pb_data_size = get_per_bio_data_size(cache);
+	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 	bool discarded_block = is_discarded_oblock(cache, block);
 	bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
 
@@ -1885,7 +1904,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	cache->ti = ca->ti;
 	ti->private = cache;
-	ti->per_bio_data_size = sizeof(struct per_bio_data);
 	ti->num_flush_bios = 2;
 	ti->flush_supported = true;
 
@@ -1894,6 +1912,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	ti->discard_zeroes_data_unsupported = true;
 
 	memcpy(&cache->features, &ca->features, sizeof(cache->features));
+	ti->per_bio_data_size = get_per_bio_data_size(cache);
 
 	cache->callbacks.congested_fn = cache_is_congested;
 	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
@@ -2096,6 +2115,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 
 	int r;
 	dm_oblock_t block = get_bio_block(cache, bio);
+	size_t pb_data_size = get_per_bio_data_size(cache);
 	bool can_migrate = false;
 	bool discarded_block;
 	struct dm_bio_prison_cell *cell;
@@ -2112,7 +2132,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_REMAPPED;
 	}
 
-	pb = init_per_bio_data(bio);
+	pb = init_per_bio_data(bio, pb_data_size);
 
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
 		defer_bio(cache, bio);
@@ -2197,7 +2217,8 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
-	struct per_bio_data *pb = get_per_bio_data(bio);
+	size_t pb_data_size = get_per_bio_data_size(cache);
+	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
 	if (pb->tick) {
 		policy_tick(cache->policy);
-- 
cgit v1.2.3