summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/closure.h2
-rw-r--r--drivers/md/bitmap.c6
-rw-r--r--drivers/md/dm-bio-prison.c70
-rw-r--r--drivers/md/dm-bio-prison.h2
-rw-r--r--drivers/md/dm-bufio.c8
-rw-r--r--drivers/md/dm-cache-target.c3
-rw-r--r--drivers/md/dm-crypt.c61
-rw-r--r--drivers/md/dm-era-target.c3
-rw-r--r--drivers/md/dm-mpath.c23
-rw-r--r--drivers/md/dm-snap.c71
-rw-r--r--drivers/md/dm-table.c5
-rw-r--r--drivers/md/dm-thin.c197
-rw-r--r--drivers/md/dm-verity.c15
-rw-r--r--drivers/md/dm.c89
-rw-r--r--drivers/md/md.c20
-rw-r--r--drivers/md/raid10.c13
-rw-r--r--drivers/md/raid5.c160
-rw-r--r--drivers/md/raid5.h4
19 files changed, 517 insertions, 237 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 82c9c5d35251..d2ebcf323094 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -828,7 +828,7 @@ static inline bool cached_dev_get(struct cached_dev *dc)
return false;
/* Paired with the mb in cached_dev_attach */
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
return true;
}
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 7ef7461912be..a08e3eeac3c5 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -243,7 +243,7 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
cl->fn = fn;
cl->wq = wq;
/* between atomic_dec() in closure_put() */
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
}
static inline void closure_queue(struct closure *cl)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9a8e66ae04f5..67f8b31e2054 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store,
/*
* return a pointer to the page in the filemap that contains the given bit
*
- * this lookup is complicated by the fact that the bitmap sb might be exactly
- * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
- * 0 or page 1
*/
static inline struct page *filemap_get_page(struct bitmap_storage *store,
unsigned long chunk)
{
if (file_page_index(store, chunk) >= store->file_pages)
return NULL;
- return store->filemap[file_page_index(store, chunk)
- - file_page_index(store, 0)];
+ return store->filemap[file_page_index(store, chunk)];
}
static int bitmap_storage_alloc(struct bitmap_storage *store,
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index 85f0b7074257..f752d12081ff 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,13 +14,17 @@
/*----------------------------------------------------------------*/
-struct dm_bio_prison {
+struct bucket {
spinlock_t lock;
+ struct hlist_head cells;
+};
+
+struct dm_bio_prison {
mempool_t *cell_pool;
unsigned nr_buckets;
unsigned hash_mask;
- struct hlist_head *cells;
+ struct bucket *buckets;
};
/*----------------------------------------------------------------*/
@@ -40,6 +44,12 @@ static uint32_t calc_nr_buckets(unsigned nr_cells)
static struct kmem_cache *_cell_cache;
+static void init_bucket(struct bucket *b)
+{
+ spin_lock_init(&b->lock);
+ INIT_HLIST_HEAD(&b->cells);
+}
+
/*
* @nr_cells should be the number of cells you want in use _concurrently_.
* Don't confuse it with the number of distinct keys.
@@ -49,13 +59,12 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
unsigned i;
uint32_t nr_buckets = calc_nr_buckets(nr_cells);
size_t len = sizeof(struct dm_bio_prison) +
- (sizeof(struct hlist_head) * nr_buckets);
+ (sizeof(struct bucket) * nr_buckets);
struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
if (!prison)
return NULL;
- spin_lock_init(&prison->lock);
prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
if (!prison->cell_pool) {
kfree(prison);
@@ -64,9 +73,9 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
prison->nr_buckets = nr_buckets;
prison->hash_mask = nr_buckets - 1;
- prison->cells = (struct hlist_head *) (prison + 1);
+ prison->buckets = (struct bucket *) (prison + 1);
for (i = 0; i < nr_buckets; i++)
- INIT_HLIST_HEAD(prison->cells + i);
+ init_bucket(prison->buckets + i);
return prison;
}
@@ -107,40 +116,44 @@ static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
(lhs->block == rhs->block);
}
-static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
+static struct bucket *get_bucket(struct dm_bio_prison *prison,
+ struct dm_cell_key *key)
+{
+ return prison->buckets + hash_key(prison, key);
+}
+
+static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
struct dm_cell_key *key)
{
struct dm_bio_prison_cell *cell;
- hlist_for_each_entry(cell, bucket, list)
+ hlist_for_each_entry(cell, &b->cells, list)
if (keys_equal(&cell->key, key))
return cell;
return NULL;
}
-static void __setup_new_cell(struct dm_bio_prison *prison,
+static void __setup_new_cell(struct bucket *b,
struct dm_cell_key *key,
struct bio *holder,
- uint32_t hash,
struct dm_bio_prison_cell *cell)
{
memcpy(&cell->key, key, sizeof(cell->key));
cell->holder = holder;
bio_list_init(&cell->bios);
- hlist_add_head(&cell->list, prison->cells + hash);
+ hlist_add_head(&cell->list, &b->cells);
}
-static int __bio_detain(struct dm_bio_prison *prison,
+static int __bio_detain(struct bucket *b,
struct dm_cell_key *key,
struct bio *inmate,
struct dm_bio_prison_cell *cell_prealloc,
struct dm_bio_prison_cell **cell_result)
{
- uint32_t hash = hash_key(prison, key);
struct dm_bio_prison_cell *cell;
- cell = __search_bucket(prison->cells + hash, key);
+ cell = __search_bucket(b, key);
if (cell) {
if (inmate)
bio_list_add(&cell->bios, inmate);
@@ -148,7 +161,7 @@ static int __bio_detain(struct dm_bio_prison *prison,
return 1;
}
- __setup_new_cell(prison, key, inmate, hash, cell_prealloc);
+ __setup_new_cell(b, key, inmate, cell_prealloc);
*cell_result = cell_prealloc;
return 0;
}
@@ -161,10 +174,11 @@ static int bio_detain(struct dm_bio_prison *prison,
{
int r;
unsigned long flags;
+ struct bucket *b = get_bucket(prison, key);
- spin_lock_irqsave(&prison->lock, flags);
- r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_lock_irqsave(&b->lock, flags);
+ r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
+ spin_unlock_irqrestore(&b->lock, flags);
return r;
}
@@ -208,10 +222,11 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct bio_list *bios)
{
unsigned long flags;
+ struct bucket *b = get_bucket(prison, &cell->key);
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irqsave(&b->lock, flags);
__cell_release(cell, bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irqrestore(&b->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
@@ -230,28 +245,25 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct bio_list *inmates)
{
unsigned long flags;
+ struct bucket *b = get_bucket(prison, &cell->key);
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irqsave(&b->lock, flags);
__cell_release_no_holder(cell, inmates);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irqrestore(&b->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell)
+ struct dm_bio_prison_cell *cell, int error)
{
struct bio_list bios;
struct bio *bio;
- unsigned long flags;
bio_list_init(&bios);
-
- spin_lock_irqsave(&prison->lock, flags);
- __cell_release(cell, &bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ dm_cell_release(prison, cell, &bios);
while ((bio = bio_list_pop(&bios)))
- bio_io_error(bio);
+ bio_endio(bio, error);
}
EXPORT_SYMBOL_GPL(dm_cell_error);
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 3f833190eadf..6805a142b750 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -85,7 +85,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell);
+ struct dm_bio_prison_cell *cell, int error);
/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 66c5d130c8c2..4e84095833db 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -607,9 +607,9 @@ static void write_endio(struct bio *bio, int error)
BUG_ON(!test_bit(B_WRITING, &b->state));
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(B_WRITING, &b->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&b->state, B_WRITING);
}
@@ -997,9 +997,9 @@ static void read_endio(struct bio *bio, int error)
BUG_ON(!test_bit(B_READING, &b->state));
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(B_READING, &b->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&b->state, B_READING);
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1bf4a71919ec..5f054c44b485 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2178,6 +2178,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->num_discard_bios = 1;
ti->discards_supported = true;
ti->discard_zeroes_data_unsupported = true;
+ /* Discard bios must be split on a block boundary */
+ ti->split_discard_bios = true;
cache->features = ca->features;
ti->per_bio_data_size = get_per_bio_data_size(cache);
@@ -2488,6 +2490,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
} else {
inc_hit_counter(cache, bio);
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock))
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 784695d22fde..53b213226c01 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -19,7 +19,6 @@
#include <linux/crypto.h>
#include <linux/workqueue.h>
#include <linux/backing-dev.h>
-#include <linux/percpu.h>
#include <linux/atomic.h>
#include <linux/scatterlist.h>
#include <asm/page.h>
@@ -43,6 +42,7 @@ struct convert_context {
struct bvec_iter iter_out;
sector_t cc_sector;
atomic_t cc_pending;
+ struct ablkcipher_request *req;
};
/*
@@ -111,15 +111,7 @@ struct iv_tcw_private {
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
/*
- * Duplicated per-CPU state for cipher.
- */
-struct crypt_cpu {
- struct ablkcipher_request *req;
-};
-
-/*
- * The fields in here must be read only after initialization,
- * changing state should be in crypt_cpu.
+ * The fields in here must be read only after initialization.
*/
struct crypt_config {
struct dm_dev *dev;
@@ -150,12 +142,6 @@ struct crypt_config {
sector_t iv_offset;
unsigned int iv_size;
- /*
- * Duplicated per cpu state. Access through
- * per_cpu_ptr() only.
- */
- struct crypt_cpu __percpu *cpu;
-
/* ESSIV: struct crypto_cipher *essiv_tfm */
void *iv_private;
struct crypto_ablkcipher **tfms;
@@ -192,11 +178,6 @@ static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
-{
- return this_cpu_ptr(cc->cpu);
-}
-
/*
* Use this to access cipher attributes that are the same for each CPU.
*/
@@ -903,16 +884,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
static void crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
{
- struct crypt_cpu *this_cc = this_crypt_config(cc);
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
- if (!this_cc->req)
- this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+ if (!ctx->req)
+ ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
- ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]);
- ablkcipher_request_set_callback(this_cc->req,
+ ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
+ ablkcipher_request_set_callback(ctx->req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
+ kcryptd_async_done, dmreq_of_req(cc, ctx->req));
}
/*
@@ -921,7 +901,6 @@ static void crypt_alloc_req(struct crypt_config *cc,
static int crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
- struct crypt_cpu *this_cc = this_crypt_config(cc);
int r;
atomic_set(&ctx->cc_pending, 1);
@@ -932,7 +911,7 @@ static int crypt_convert(struct crypt_config *cc,
atomic_inc(&ctx->cc_pending);
- r = crypt_convert_block(cc, ctx, this_cc->req);
+ r = crypt_convert_block(cc, ctx, ctx->req);
switch (r) {
/* async */
@@ -941,7 +920,7 @@ static int crypt_convert(struct crypt_config *cc,
reinit_completion(&ctx->restart);
/* fall through*/
case -EINPROGRESS:
- this_cc->req = NULL;
+ ctx->req = NULL;
ctx->cc_sector++;
continue;
@@ -1040,6 +1019,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
io->sector = sector;
io->error = 0;
io->base_io = NULL;
+ io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
return io;
@@ -1065,6 +1045,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
if (!atomic_dec_and_test(&io->io_pending))
return;
+ if (io->ctx.req)
+ mempool_free(io->ctx.req, cc->req_pool);
mempool_free(io, cc->io_pool);
if (likely(!base_io))
@@ -1492,8 +1474,6 @@ static int crypt_wipe_key(struct crypt_config *cc)
static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = ti->private;
- struct crypt_cpu *cpu_cc;
- int cpu;
ti->private = NULL;
@@ -1505,13 +1485,6 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
- if (cc->cpu)
- for_each_possible_cpu(cpu) {
- cpu_cc = per_cpu_ptr(cc->cpu, cpu);
- if (cpu_cc->req)
- mempool_free(cpu_cc->req, cc->req_pool);
- }
-
crypt_free_tfms(cc);
if (cc->bs)
@@ -1530,9 +1503,6 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->dev)
dm_put_device(ti, cc->dev);
- if (cc->cpu)
- free_percpu(cc->cpu);
-
kzfree(cc->cipher);
kzfree(cc->cipher_string);
@@ -1588,13 +1558,6 @@ static int crypt_ctr_cipher(struct dm_target *ti,
if (tmp)
DMWARN("Ignoring unexpected additional cipher options");
- cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)),
- __alignof__(struct crypt_cpu));
- if (!cc->cpu) {
- ti->error = "Cannot allocate per cpu state";
- goto bad_mem;
- }
-
/*
* For compatibility with the original dm-crypt mapping format, if
* only the cipher name is supplied, use cbc-plain.
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 414dad4cb49b..ad913cd4aded 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1391,7 +1391,8 @@ static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
static void era_destroy(struct era *era)
{
- metadata_close(era->md);
+ if (era->md)
+ metadata_close(era->md);
if (era->wq)
destroy_workqueue(era->wq);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aa009e865871..3f6fd9d33ba3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -445,11 +445,11 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
else
m->saved_queue_if_no_path = queue_if_no_path;
m->queue_if_no_path = queue_if_no_path;
- if (!m->queue_if_no_path)
- dm_table_run_md_queue_async(m->ti->table);
-
spin_unlock_irqrestore(&m->lock, flags);
+ if (!queue_if_no_path)
+ dm_table_run_md_queue_async(m->ti->table);
+
return 0;
}
@@ -954,7 +954,7 @@ out:
*/
static int reinstate_path(struct pgpath *pgpath)
{
- int r = 0;
+ int r = 0, run_queue = 0;
unsigned long flags;
struct multipath *m = pgpath->pg->m;
@@ -978,7 +978,7 @@ static int reinstate_path(struct pgpath *pgpath)
if (!m->nr_valid_paths++) {
m->current_pgpath = NULL;
- dm_table_run_md_queue_async(m->ti->table);
+ run_queue = 1;
} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
m->pg_init_in_progress++;
@@ -991,6 +991,8 @@ static int reinstate_path(struct pgpath *pgpath)
out:
spin_unlock_irqrestore(&m->lock, flags);
+ if (run_queue)
+ dm_table_run_md_queue_async(m->ti->table);
return r;
}
@@ -1240,17 +1242,8 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors)
return 0; /* I/O complete */
- if (noretry_error(error)) {
- if ((clone->cmd_flags & REQ_WRITE_SAME) &&
- !clone->q->limits.max_write_same_sectors) {
- struct queue_limits *limits;
-
- /* device doesn't really support WRITE SAME, disable it */
- limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
- limits->max_write_same_sectors = 0;
- }
+ if (noretry_error(error))
return error;
- }
if (mpio->pgpath)
fail_path(mpio->pgpath);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ebddef5237e4..5bd2290cfb1e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -642,7 +642,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
struct dm_snapshot *s = pe->snap;
mempool_free(pe, s->pending_pool);
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_dec(&s->pending_exceptions_count);
}
@@ -783,7 +783,7 @@ static int init_hash_tables(struct dm_snapshot *s)
static void merge_shutdown(struct dm_snapshot *s)
{
clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&s->state_bits, RUNNING_MERGE);
}
@@ -2141,6 +2141,11 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
* Origin: maps a linear range of a device, with hooks for snapshotting.
*/
+struct dm_origin {
+ struct dm_dev *dev;
+ unsigned split_boundary;
+};
+
/*
* Construct an origin mapping: <dev_path>
* The context for an origin is merely a 'struct dm_dev *'
@@ -2149,41 +2154,65 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
int r;
- struct dm_dev *dev;
+ struct dm_origin *o;
if (argc != 1) {
ti->error = "origin: incorrect number of arguments";
return -EINVAL;
}
- r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
+ o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
+ if (!o) {
+ ti->error = "Cannot allocate private origin structure";
+ r = -ENOMEM;
+ goto bad_alloc;
+ }
+
+ r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
if (r) {
ti->error = "Cannot get target device";
- return r;
+ goto bad_open;
}
- ti->private = dev;
+ ti->private = o;
ti->num_flush_bios = 1;
return 0;
+
+bad_open:
+ kfree(o);
+bad_alloc:
+ return r;
}
static void origin_dtr(struct dm_target *ti)
{
- struct dm_dev *dev = ti->private;
- dm_put_device(ti, dev);
+ struct dm_origin *o = ti->private;
+ dm_put_device(ti, o->dev);
+ kfree(o);
}
static int origin_map(struct dm_target *ti, struct bio *bio)
{
- struct dm_dev *dev = ti->private;
- bio->bi_bdev = dev->bdev;
+ struct dm_origin *o = ti->private;
+ unsigned available_sectors;
- if (bio->bi_rw & REQ_FLUSH)
+ bio->bi_bdev = o->dev->bdev;
+
+ if (unlikely(bio->bi_rw & REQ_FLUSH))
return DM_MAPIO_REMAPPED;
+ if (bio_rw(bio) != WRITE)
+ return DM_MAPIO_REMAPPED;
+
+ available_sectors = o->split_boundary -
+ ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
+
+ if (bio_sectors(bio) > available_sectors)
+ dm_accept_partial_bio(bio, available_sectors);
+
/* Only tell snapshots if this is a write */
- return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
+ return do_origin(o->dev, bio);
}
/*
@@ -2192,15 +2221,15 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
*/
static void origin_resume(struct dm_target *ti)
{
- struct dm_dev *dev = ti->private;
+ struct dm_origin *o = ti->private;
- ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
+ o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
}
static void origin_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
- struct dm_dev *dev = ti->private;
+ struct dm_origin *o = ti->private;
switch (type) {
case STATUSTYPE_INFO:
@@ -2208,7 +2237,7 @@ static void origin_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
- snprintf(result, maxlen, "%s", dev->name);
+ snprintf(result, maxlen, "%s", o->dev->name);
break;
}
}
@@ -2216,13 +2245,13 @@ static void origin_status(struct dm_target *ti, status_type_t type,
static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size)
{
- struct dm_dev *dev = ti->private;
- struct request_queue *q = bdev_get_queue(dev->bdev);
+ struct dm_origin *o = ti->private;
+ struct request_queue *q = bdev_get_queue(o->dev->bdev);
if (!q->merge_bvec_fn)
return max_size;
- bvm->bi_bdev = dev->bdev;
+ bvm->bi_bdev = o->dev->bdev;
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
@@ -2230,9 +2259,9 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
static int origin_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
- struct dm_dev *dev = ti->private;
+ struct dm_origin *o = ti->private;
- return fn(ti, dev, 0, ti->len, data);
+ return fn(ti, o->dev, 0, ti->len, data);
}
static struct target_type origin_target = {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 50601ec7017a..5f59f1e3e5b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -465,8 +465,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
}
EXPORT_SYMBOL(dm_get_device);
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
struct queue_limits *limits = data;
struct block_device *bdev = dev->bdev;
@@ -499,7 +499,6 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
(unsigned int) (PAGE_SIZE >> 9));
return 0;
}
-EXPORT_SYMBOL_GPL(dm_set_device_limits);
/*
* Decrement a device's use count and remove it if necessary.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 53728be84dee..fc9c848a60c9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -27,6 +27,9 @@
#define MAPPING_POOL_SIZE 1024
#define PRISON_CELLS 1024
#define COMMIT_PERIOD HZ
+#define NO_SPACE_TIMEOUT_SECS 60
+
+static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
"A percentage of time allocated for copy on write");
@@ -175,6 +178,7 @@ struct pool {
struct workqueue_struct *wq;
struct work_struct worker;
struct delayed_work waker;
+ struct delayed_work no_space_timeout;
unsigned long last_commit_jiffies;
unsigned ref_count;
@@ -232,6 +236,13 @@ struct thin_c {
struct bio_list deferred_bio_list;
struct bio_list retry_on_resume_list;
struct rb_root sort_bio_list; /* sorted list of deferred bios */
+
+ /*
+ * Ensures the thin is not destroyed until the worker has finished
+ * iterating the active_thins list.
+ */
+ atomic_t refcount;
+ struct completion can_destroy;
};
/*----------------------------------------------------------------*/
@@ -299,13 +310,18 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc,
wake_worker(pool);
}
-static void cell_error(struct pool *pool,
- struct dm_bio_prison_cell *cell)
+static void cell_error_with_code(struct pool *pool,
+ struct dm_bio_prison_cell *cell, int error_code)
{
- dm_cell_error(pool->prison, cell);
+ dm_cell_error(pool->prison, cell, error_code);
dm_bio_prison_free_cell(pool->prison, cell);
}
+static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
+{
+ cell_error_with_code(pool, cell, -EIO);
+}
+
/*----------------------------------------------------------------*/
/*
@@ -928,7 +944,7 @@ static int commit(struct pool *pool)
{
int r;
- if (get_pool_mode(pool) != PM_WRITE)
+ if (get_pool_mode(pool) >= PM_READ_ONLY)
return -EINVAL;
r = dm_pool_commit_metadata(pool->pmd);
@@ -1016,7 +1032,7 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&tc->lock, flags);
}
-static bool should_error_unserviceable_bio(struct pool *pool)
+static int should_error_unserviceable_bio(struct pool *pool)
{
enum pool_mode m = get_pool_mode(pool);
@@ -1024,25 +1040,27 @@ static bool should_error_unserviceable_bio(struct pool *pool)
case PM_WRITE:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
- return true;
+ return -EIO;
case PM_OUT_OF_DATA_SPACE:
- return pool->pf.error_if_no_space;
+ return pool->pf.error_if_no_space ? -ENOSPC : 0;
case PM_READ_ONLY:
case PM_FAIL:
- return true;
+ return -EIO;
default:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
- return true;
+ return -EIO;
}
}
static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
- if (should_error_unserviceable_bio(pool))
- bio_io_error(bio);
+ int error = should_error_unserviceable_bio(pool);
+
+ if (error)
+ bio_endio(bio, error);
else
retry_on_resume(bio);
}
@@ -1051,18 +1069,21 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
{
struct bio *bio;
struct bio_list bios;
+ int error;
- if (should_error_unserviceable_bio(pool)) {
- cell_error(pool, cell);
+ error = should_error_unserviceable_bio(pool);
+ if (error) {
+ cell_error_with_code(pool, cell, error);
return;
}
bio_list_init(&bios);
cell_release(pool, cell, &bios);
- if (should_error_unserviceable_bio(pool))
+ error = should_error_unserviceable_bio(pool);
+ if (error)
while ((bio = bio_list_pop(&bios)))
- bio_io_error(bio);
+ bio_endio(bio, error);
else
while ((bio = bio_list_pop(&bios)))
retry_on_resume(bio);
@@ -1486,6 +1507,45 @@ static void process_thin_deferred_bios(struct thin_c *tc)
blk_finish_plug(&plug);
}
+static void thin_get(struct thin_c *tc);
+static void thin_put(struct thin_c *tc);
+
+/*
+ * We can't hold rcu_read_lock() around code that can block. So we
+ * find a thin with the rcu lock held; bump a refcount; then drop
+ * the lock.
+ */
+static struct thin_c *get_first_thin(struct pool *pool)
+{
+ struct thin_c *tc = NULL;
+
+ rcu_read_lock();
+ if (!list_empty(&pool->active_thins)) {
+ tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
+ thin_get(tc);
+ }
+ rcu_read_unlock();
+
+ return tc;
+}
+
+static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
+{
+ struct thin_c *old_tc = tc;
+
+ rcu_read_lock();
+ list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
+ thin_get(tc);
+ thin_put(old_tc);
+ rcu_read_unlock();
+ return tc;
+ }
+ thin_put(old_tc);
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static void process_deferred_bios(struct pool *pool)
{
unsigned long flags;
@@ -1493,10 +1553,11 @@ static void process_deferred_bios(struct pool *pool)
struct bio_list bios;
struct thin_c *tc;
- rcu_read_lock();
- list_for_each_entry_rcu(tc, &pool->active_thins, list)
+ tc = get_first_thin(pool);
+ while (tc) {
process_thin_deferred_bios(tc);
- rcu_read_unlock();
+ tc = get_next_thin(pool, tc);
+ }
/*
* If there are any deferred flush bios, we must commit
@@ -1543,49 +1604,79 @@ static void do_waker(struct work_struct *ws)
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}
+/*
+ * We're holding onto IO to allow userland time to react. After the
+ * timeout either the pool will have been resized (and thus back in
+ * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
+ */
+static void do_no_space_timeout(struct work_struct *ws)
+{
+ struct pool *pool = container_of(to_delayed_work(ws), struct pool,
+ no_space_timeout);
+
+ if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
+ set_pool_mode(pool, PM_READ_ONLY);
+}
+
/*----------------------------------------------------------------*/
-struct noflush_work {
+struct pool_work {
struct work_struct worker;
- struct thin_c *tc;
+ struct completion complete;
+};
+
+static struct pool_work *to_pool_work(struct work_struct *ws)
+{
+ return container_of(ws, struct pool_work, worker);
+}
+
+static void pool_work_complete(struct pool_work *pw)
+{
+ complete(&pw->complete);
+}
+
+static void pool_work_wait(struct pool_work *pw, struct pool *pool,
+ void (*fn)(struct work_struct *))
+{
+ INIT_WORK_ONSTACK(&pw->worker, fn);
+ init_completion(&pw->complete);
+ queue_work(pool->wq, &pw->worker);
+ wait_for_completion(&pw->complete);
+}
+
+/*----------------------------------------------------------------*/
- atomic_t complete;
- wait_queue_head_t wait;
+struct noflush_work {
+ struct pool_work pw;
+ struct thin_c *tc;
};
-static void complete_noflush_work(struct noflush_work *w)
+static struct noflush_work *to_noflush(struct work_struct *ws)
{
- atomic_set(&w->complete, 1);
- wake_up(&w->wait);
+ return container_of(to_pool_work(ws), struct noflush_work, pw);
}
static void do_noflush_start(struct work_struct *ws)
{
- struct noflush_work *w = container_of(ws, struct noflush_work, worker);
+ struct noflush_work *w = to_noflush(ws);
w->tc->requeue_mode = true;
requeue_io(w->tc);
- complete_noflush_work(w);
+ pool_work_complete(&w->pw);
}
static void do_noflush_stop(struct work_struct *ws)
{
- struct noflush_work *w = container_of(ws, struct noflush_work, worker);
+ struct noflush_work *w = to_noflush(ws);
w->tc->requeue_mode = false;
- complete_noflush_work(w);
+ pool_work_complete(&w->pw);
}
static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
{
struct noflush_work w;
- INIT_WORK(&w.worker, fn);
w.tc = tc;
- atomic_set(&w.complete, 0);
- init_waitqueue_head(&w.wait);
-
- queue_work(tc->pool->wq, &w.worker);
-
- wait_event(w.wait, atomic_read(&w.complete));
+ pool_work_wait(&w.pw, tc->pool, fn);
}
/*----------------------------------------------------------------*/
@@ -1607,6 +1698,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
struct pool_c *pt = pool->ti->private;
bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
enum pool_mode old_mode = get_pool_mode(pool);
+ unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ;
/*
* Never allow the pool to transition to PM_WRITE mode if user
@@ -1668,6 +1760,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
pool->process_discard = process_discard;
pool->process_prepared_mapping = process_prepared_mapping;
pool->process_prepared_discard = process_prepared_discard_passdown;
+
+ if (!pool->pf.error_if_no_space && no_space_timeout)
+ queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
break;
case PM_WRITE:
@@ -2053,6 +2148,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
INIT_WORK(&pool->worker, do_worker);
INIT_DELAYED_WORK(&pool->waker, do_waker);
+ INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
spin_lock_init(&pool->lock);
bio_list_init(&pool->deferred_flush_bios);
INIT_LIST_HEAD(&pool->prepared_mappings);
@@ -2615,6 +2711,7 @@ static void pool_postsuspend(struct dm_target *ti)
struct pool *pool = pt->pool;
cancel_delayed_work(&pool->waker);
+ cancel_delayed_work(&pool->no_space_timeout);
flush_workqueue(pool->wq);
(void) commit(pool);
}
@@ -2997,7 +3094,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
*/
if (pt->adjusted_pf.discard_passdown) {
data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
- limits->discard_granularity = data_limits->discard_granularity;
+ limits->discard_granularity = max(data_limits->discard_granularity,
+ pool->sectors_per_block << SECTOR_SHIFT);
} else
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
}
@@ -3061,11 +3159,25 @@ static struct target_type pool_target = {
/*----------------------------------------------------------------
* Thin target methods
*--------------------------------------------------------------*/
+static void thin_get(struct thin_c *tc)
+{
+ atomic_inc(&tc->refcount);
+}
+
+static void thin_put(struct thin_c *tc)
+{
+ if (atomic_dec_and_test(&tc->refcount))
+ complete(&tc->can_destroy);
+}
+
static void thin_dtr(struct dm_target *ti)
{
struct thin_c *tc = ti->private;
unsigned long flags;
+ thin_put(tc);
+ wait_for_completion(&tc->can_destroy);
+
spin_lock_irqsave(&tc->pool->lock, flags);
list_del_rcu(&tc->list);
spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3101,6 +3213,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
struct thin_c *tc;
struct dm_dev *pool_dev, *origin_dev;
struct mapped_device *pool_md;
+ unsigned long flags;
mutex_lock(&dm_thin_pool_table.mutex);
@@ -3191,9 +3304,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
mutex_unlock(&dm_thin_pool_table.mutex);
- spin_lock(&tc->pool->lock);
+ atomic_set(&tc->refcount, 1);
+ init_completion(&tc->can_destroy);
+
+ spin_lock_irqsave(&tc->pool->lock, flags);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
- spin_unlock(&tc->pool->lock);
+ spin_unlock_irqrestore(&tc->pool->lock, flags);
/*
* This synchronize_rcu() call is needed here otherwise we risk a
* wake_worker() call finding no bios to process (because the newly
@@ -3422,6 +3538,9 @@ static void dm_thin_exit(void)
module_init(dm_thin_init);
module_exit(dm_thin_exit);
+module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
+
MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 796007a5e0e1..7a7bab8947ae 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -330,15 +330,17 @@ test_block_hash:
return r;
}
}
-
todo = 1 << v->data_dev_block_bits;
- while (io->iter.bi_size) {
+ do {
u8 *page;
+ unsigned len;
struct bio_vec bv = bio_iter_iovec(bio, io->iter);
page = kmap_atomic(bv.bv_page);
- r = crypto_shash_update(desc, page + bv.bv_offset,
- bv.bv_len);
+ len = bv.bv_len;
+ if (likely(len >= todo))
+ len = todo;
+ r = crypto_shash_update(desc, page + bv.bv_offset, len);
kunmap_atomic(page);
if (r < 0) {
@@ -346,8 +348,9 @@ test_block_hash:
return r;
}
- bio_advance_iter(bio, &io->iter, bv.bv_len);
- }
+ bio_advance_iter(bio, &io->iter, len);
+ todo -= len;
+ } while (todo);
if (!v->version) {
r = crypto_shash_update(desc, v->salt, v->salt_size);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 455e64916498..437d99045ef2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -755,6 +755,14 @@ static void dec_pending(struct dm_io *io, int error)
}
}
+static void disable_write_same(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support WRITE SAME, disable it */
+ limits->max_write_same_sectors = 0;
+}
+
static void clone_endio(struct bio *bio, int error)
{
int r = 0;
@@ -783,6 +791,10 @@ static void clone_endio(struct bio *bio, int error)
}
}
+ if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
+ disable_write_same(md);
+
free_tio(md, tio);
dec_pending(io, error);
}
@@ -977,6 +989,10 @@ static void dm_done(struct request *clone, int error, bool mapped)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
+ if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
+ !clone->q->limits.max_write_same_sectors))
+ disable_write_same(tio->md);
+
if (r <= 0)
/* The target wants to complete the I/O */
dm_end_request(clone, r);
@@ -1110,6 +1126,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
+/*
+ * A target may call dm_accept_partial_bio only from the map routine. It is
+ * allowed for all bio types except REQ_FLUSH.
+ *
+ * dm_accept_partial_bio informs the dm that the target only wants to process
+ * additional n_sectors sectors of the bio and the rest of the data should be
+ * sent in a next bio.
+ *
+ * A diagram that explains the arithmetics:
+ * +--------------------+---------------+-------+
+ * | 1 | 2 | 3 |
+ * +--------------------+---------------+-------+
+ *
+ * <-------------- *tio->len_ptr --------------->
+ * <------- bi_size ------->
+ * <-- n_sectors -->
+ *
+ * Region 1 was already iterated over with bio_advance or similar function.
+ * (it may be empty if the target doesn't use bio_advance)
+ * Region 2 is the remaining bio size that the target wants to process.
+ * (it may be empty if region 1 is non-empty, although there is no reason
+ * to make it empty)
+ * The target requires that region 3 is to be sent in the next bio.
+ *
+ * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
+ * the partially processed part (the sum of regions 1+2) must be the same for all
+ * copies of the bio.
+ */
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
+{
+ struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+ unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+ BUG_ON(bio->bi_rw & REQ_FLUSH);
+ BUG_ON(bi_size > *tio->len_ptr);
+ BUG_ON(n_sectors > bi_size);
+ *tio->len_ptr -= bi_size - n_sectors;
+ bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+
static void __map_bio(struct dm_target_io *tio)
{
int r;
@@ -1152,10 +1208,10 @@ struct clone_info {
struct bio *bio;
struct dm_io *io;
sector_t sector;
- sector_t sector_count;
+ unsigned sector_count;
};
-static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
+static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
{
bio->bi_iter.bi_sector = sector;
bio->bi_iter.bi_size = to_bytes(len);
@@ -1200,11 +1256,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
static void __clone_and_map_simple_bio(struct clone_info *ci,
struct dm_target *ti,
- unsigned target_bio_nr, sector_t len)
+ unsigned target_bio_nr, unsigned *len)
{
struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
struct bio *clone = &tio->clone;
+ tio->len_ptr = len;
+
/*
* Discard requests require the bio's inline iovecs be initialized.
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
@@ -1212,13 +1270,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
*/
__bio_clone_fast(clone, ci->bio);
if (len)
- bio_setup_sector(clone, ci->sector, len);
+ bio_setup_sector(clone, ci->sector, *len);
__map_bio(tio);
}
static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, sector_t len)
+ unsigned num_bios, unsigned *len)
{
unsigned target_bio_nr;
@@ -1233,13 +1291,13 @@ static int __send_empty_flush(struct clone_info *ci)
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
- __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
+ __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
return 0;
}
static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
- sector_t sector, unsigned len)
+ sector_t sector, unsigned *len)
{
struct bio *bio = ci->bio;
struct dm_target_io *tio;
@@ -1254,7 +1312,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
tio = alloc_tio(ci, ti, 0, target_bio_nr);
- clone_bio(tio, bio, sector, len);
+ tio->len_ptr = len;
+ clone_bio(tio, bio, sector, *len);
__map_bio(tio);
}
}
@@ -1283,7 +1342,7 @@ static int __send_changing_extent_only(struct clone_info *ci,
is_split_required_fn is_split_required)
{
struct dm_target *ti;
- sector_t len;
+ unsigned len;
unsigned num_bios;
do {
@@ -1302,11 +1361,11 @@ static int __send_changing_extent_only(struct clone_info *ci,
return -EOPNOTSUPP;
if (is_split_required && !is_split_required(ti))
- len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
+ len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
else
- len = min(ci->sector_count, max_io_len(ci->sector, ti));
+ len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
- __send_duplicate_bios(ci, ti, num_bios, len);
+ __send_duplicate_bios(ci, ti, num_bios, &len);
ci->sector += len;
} while (ci->sector_count -= len);
@@ -1345,7 +1404,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
- __clone_and_map_data_bio(ci, ti, ci->sector, len);
+ __clone_and_map_data_bio(ci, ti, ci->sector, &len);
ci->sector += len;
ci->sector_count -= len;
@@ -1439,7 +1498,6 @@ static int dm_merge_bvec(struct request_queue *q,
* just one page.
*/
else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-
max_size = 0;
out:
@@ -1544,7 +1602,6 @@ static int setup_clone(struct request *clone, struct request *rq,
clone->cmd = rq->cmd;
clone->cmd_len = rq->cmd_len;
clone->sense = rq->sense;
- clone->buffer = rq->buffer;
clone->end_io = end_clone_request;
clone->end_io_data = tio;
@@ -2447,7 +2504,7 @@ static void dm_wq_work(struct work_struct *work)
static void dm_queue_flush(struct mapped_device *md)
{
clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
queue_work(md->wq, &md->work);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8fda38d23e38..34846856dbc6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->level = LEVEL_NONE;
return rv;
}
+ if (mddev->ro)
+ return -EROFS;
/* request to change the personality. Need to ensure:
* - array is not engaged in resync/recovery/reshape
@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
int err;
if (mddev->pers->check_reshape == NULL)
return -EBUSY;
+ if (mddev->ro)
+ return -EROFS;
mddev->new_layout = n;
err = mddev->pers->check_reshape(mddev);
if (err) {
@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
int err;
if (mddev->pers->check_reshape == NULL)
return -EBUSY;
+ if (mddev->ro)
+ return -EROFS;
mddev->new_chunk_sectors = n >> 9;
err = mddev->pers->check_reshape(mddev);
if (err) {
@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
*/
if (mddev->sync_thread)
return -EBUSY;
+ if (mddev->ro)
+ return -EROFS;
rdev_for_each(rdev, mddev) {
sector_t avail = rdev->sectors;
@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */
if (mddev->pers->check_reshape == NULL)
return -EINVAL;
+ if (mddev->ro)
+ return -EROFS;
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
return -EINVAL;
@@ -7381,8 +7391,10 @@ void md_do_sync(struct md_thread *thread)
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
return;
- if (mddev->ro) /* never try to sync a read-only array */
+ if (mddev->ro) {/* never try to sync a read-only array */
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return;
+ }
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
@@ -7824,6 +7836,7 @@ void md_check_recovery(struct mddev *mddev)
/* There is no thread, but we need to call
* ->spare_active and clear saved_raid_disk
*/
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
@@ -8330,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
if (a < s) {
/* we need to split this range */
if (bb->count >= MD_MAX_BADBLOCKS) {
- rv = 0;
+ rv = -ENOSPC;
goto out;
}
memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
@@ -8516,7 +8529,8 @@ static int md_notify_reboot(struct notifier_block *this,
if (mddev_trylock(mddev)) {
if (mddev->pers)
__md_stop_writes(mddev);
- mddev->safemode = 2;
+ if (mddev->persistent)
+ mddev->safemode = 2;
mddev_unlock(mddev);
}
need_delay = 1;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 33fc408e5eac..cb882aae9e20 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1172,6 +1172,13 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
int max_sectors;
int sectors;
+ /*
+ * Register the new request and wait if the reconstruction
+ * thread has put up a bar for new requests.
+ * Continue immediately if no resync is active currently.
+ */
+ wait_barrier(conf);
+
sectors = bio_sectors(bio);
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio->bi_iter.bi_sector < conf->reshape_progress &&
@@ -1552,12 +1559,6 @@ static void make_request(struct mddev *mddev, struct bio *bio)
md_write_start(mddev, bio);
- /*
- * Register the new request and wait if the reconstruction
- * thread has put up a bar for new requests.
- * Continue immediately if no resync is active currently.
- */
- wait_barrier(conf);
do {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ad1b9bea446e..6234b2e84587 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state) &&
- !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
- else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+ if (atomic_read(&conf->preread_active_stripes)
+ < IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh)
int hash;
bool wakeup;
+ /* Avoid release_list until the last reference.
+ */
+ if (atomic_add_unless(&sh->count, -1, 1))
+ return;
+
if (unlikely(!conf->mddev->thread) ||
test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
goto slow_path;
@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh)
int num = sh->raid_conf->pool_size;
for (i = 0; i < num ; i++) {
+ WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
p = sh->dev[i].page;
if (!p)
continue;
@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh)
return 1;
}
sh->dev[i].page = page;
+ sh->dev[i].orig_page = page;
}
return 0;
}
@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_NOMERGE;
+ if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+ WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+ sh->dev[i].vec.bv_page = sh->dev[i].page;
bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
else
rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->data_offset);
+ if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+ WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+ sh->dev[i].rvec.bv_page = sh->dev[i].page;
rbi->bi_vcnt = 1;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
}
static struct dma_async_tx_descriptor *
-async_copy_data(int frombio, struct bio *bio, struct page *page,
- sector_t sector, struct dma_async_tx_descriptor *tx)
+async_copy_data(int frombio, struct bio *bio, struct page **page,
+ sector_t sector, struct dma_async_tx_descriptor *tx,
+ struct stripe_head *sh)
{
struct bio_vec bvl;
struct bvec_iter iter;
@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
if (clen > 0) {
b_offset += bvl.bv_offset;
bio_page = bvl.bv_page;
- if (frombio)
- tx = async_memcpy(page, bio_page, page_offset,
+ if (frombio) {
+ if (sh->raid_conf->skip_copy &&
+ b_offset == 0 && page_offset == 0 &&
+ clen == STRIPE_SIZE)
+ *page = bio_page;
+ else
+ tx = async_memcpy(*page, bio_page, page_offset,
b_offset, clen, &submit);
- else
- tx = async_memcpy(bio_page, page, b_offset,
+ } else
+ tx = async_memcpy(bio_page, *page, b_offset,
page_offset, clen, &submit);
}
/* chain the operations */
@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh)
spin_unlock_irq(&sh->stripe_lock);
while (rbi && rbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
- tx = async_copy_data(0, rbi, dev->page,
- dev->sector, tx);
+ tx = async_copy_data(0, rbi, &dev->page,
+ dev->sector, tx, sh);
rbi = r5_next_bio(rbi, dev->sector);
}
}
@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
BUG_ON(dev->written);
wbi = dev->written = chosen;
spin_unlock_irq(&sh->stripe_lock);
+ WARN_ON(dev->page != dev->orig_page);
while (wbi && wbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
set_bit(R5_SyncIO, &dev->flags);
if (wbi->bi_rw & REQ_DISCARD)
set_bit(R5_Discard, &dev->flags);
- else
- tx = async_copy_data(1, wbi, dev->page,
- dev->sector, tx);
+ else {
+ tx = async_copy_data(1, wbi, &dev->page,
+ dev->sector, tx, sh);
+ if (dev->page != dev->orig_page) {
+ set_bit(R5_SkipCopy, &dev->flags);
+ clear_bit(R5_UPTODATE, &dev->flags);
+ clear_bit(R5_OVERWRITE, &dev->flags);
+ }
+ }
wbi = r5_next_bio(wbi, dev->sector);
}
}
@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx || i == qd_idx) {
- if (!discard)
+ if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
set_bit(R5_UPTODATE, &dev->flags);
if (fua)
set_bit(R5_WantFUA, &dev->flags);
@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
osh = get_free_stripe(conf, hash);
unlock_device_hash_lock(conf, hash);
atomic_set(&nsh->count, 1);
- for(i=0; i<conf->pool_size; i++)
+ for(i=0; i<conf->pool_size; i++) {
nsh->dev[i].page = osh->dev[i].page;
+ nsh->dev[i].orig_page = osh->dev[i].page;
+ }
for( ; i<newsize; i++)
nsh->dev[i].page = NULL;
nsh->hash_lock_index = hash;
@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO);
nsh->dev[i].page = p;
+ nsh->dev[i].orig_page = p;
if (!p)
err = -ENOMEM;
}
@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error)
}
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-
+
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{
struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
- dev->req.bi_vcnt++;
- dev->req.bi_max_vecs++;
+ dev->req.bi_max_vecs = 1;
dev->req.bi_private = sh;
- dev->vec.bv_page = dev->page;
bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec;
- dev->rreq.bi_vcnt++;
- dev->rreq.bi_max_vecs++;
+ dev->rreq.bi_max_vecs = 1;
dev->rreq.bi_private = sh;
- dev->rvec.bv_page = dev->page;
dev->flags = 0;
dev->sector = compute_blocknr(sh, i, previous);
@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
/* and fail all 'written' */
bi = sh->dev[i].written;
sh->dev[i].written = NULL;
+ if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
+ WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+ sh->dev[i].page = sh->dev[i].orig_page;
+ }
+
if (bi) bitmap_end = 1;
while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) {
@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
(s->failed >= 1 && fdev[0]->toread) ||
(s->failed >= 2 && fdev[1]->toread) ||
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+ (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
!test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
- (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
+ (sh->raid_conf->level == 6 && s->failed && s->to_write &&
+ s->to_write < sh->raid_conf->raid_disks - 2 &&
+ (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
/* we would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
*/
@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) &&
(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_Discard, &dev->flags))) {
+ test_bit(R5_Discard, &dev->flags) ||
+ test_bit(R5_SkipCopy, &dev->flags))) {
/* We can return any write requests */
struct bio *wbi, *wbi2;
pr_debug("Return write for disc %d\n", i);
if (test_and_clear_bit(R5_Discard, &dev->flags))
clear_bit(R5_UPTODATE, &dev->flags);
+ if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
+ WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
+ dev->page = dev->orig_page;
+ }
wbi = dev->written;
dev->written = NULL;
while (wbi && wbi->bi_iter.bi_sector <
@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
0);
} else if (test_bit(R5_Discard, &dev->flags))
discard_pending = 1;
+ WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
+ WARN_ON(dev->page != dev->orig_page);
}
if (!discard_pending &&
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
- if (test_bit(R5_Insync, &dev->flags)) rcw++;
+ if (test_bit(R5_Insync, &dev->flags))
+ rcw++;
else
rcw += 2*disks;
}
@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) {
- if (
- test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- pr_debug("Read_old block "
- "%d for r-m-w\n", i);
+ if (test_bit(STRIPE_PREREAD_ACTIVE,
+ &sh->state)) {
+ pr_debug("Read_old block %d for r-m-w\n",
+ i);
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
rcw++;
- if (!test_bit(R5_Insync, &dev->flags))
- continue; /* it's a failed drive */
- if (
- test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ if (test_bit(R5_Insync, &dev->flags) &&
+ test_bit(STRIPE_PREREAD_ACTIVE,
+ &sh->state)) {
pr_debug("Read_old block "
"%d for Reconstruct\n", i);
set_bit(R5_LOCKED, &dev->flags);
@@ -4400,7 +4443,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
* STRIPE_ON_UNPLUG_LIST clear but the stripe
* is still in our list
*/
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
/*
* STRIPE_ON_RELEASE_LIST could be set here. In that
@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
- handle_stripe(sh);
release_stripe(sh);
return STRIPE_SECTORS;
@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
/* already done this stripe */
continue;
- sh = get_active_stripe(conf, sector, 0, 1, 0);
+ sh = get_active_stripe(conf, sector, 0, 1, 1);
if (!sh) {
/* failed to get a stripe - must wait */
@@ -5355,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
raid5_store_preread_threshold);
static ssize_t
+raid5_show_skip_copy(struct mddev *mddev, char *page)
+{
+ struct r5conf *conf = mddev->private;
+ if (conf)
+ return sprintf(page, "%d\n", conf->skip_copy);
+ else
+ return 0;
+}
+
+static ssize_t
+raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
+{
+ struct r5conf *conf = mddev->private;
+ unsigned long new;
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+ if (!conf)
+ return -ENODEV;
+
+ if (kstrtoul(page, 10, &new))
+ return -EINVAL;
+ new = !!new;
+ if (new == conf->skip_copy)
+ return len;
+
+ mddev_suspend(mddev);
+ conf->skip_copy = new;
+ if (new)
+ mddev->queue->backing_dev_info.capabilities |=
+ BDI_CAP_STABLE_WRITES;
+ else
+ mddev->queue->backing_dev_info.capabilities &=
+ ~BDI_CAP_STABLE_WRITES;
+ mddev_resume(mddev);
+ return len;
+}
+
+static struct md_sysfs_entry
+raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
+ raid5_show_skip_copy,
+ raid5_store_skip_copy);
+
+
+static ssize_t
stripe_cache_active_show(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = {
&raid5_stripecache_active.attr,
&raid5_preread_bypass_threshold.attr,
&raid5_group_thread_cnt.attr,
+ &raid5_skip_copy.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 01ad8ae8f578..bc72cd4be5f8 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -232,7 +232,7 @@ struct stripe_head {
*/
struct bio req, rreq;
struct bio_vec vec, rvec;
- struct page *page;
+ struct page *page, *orig_page;
struct bio *toread, *read, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
@@ -299,6 +299,7 @@ enum r5dev_flags {
* data in, and now is a good time to write it out.
*/
R5_Discard, /* Discard the stripe */
+ R5_SkipCopy, /* Don't copy data from bio to stripe cache */
};
/*
@@ -436,6 +437,7 @@ struct r5conf {
atomic_t pending_full_writes; /* full write backlog */
int bypass_count; /* bypassed prereads */
int bypass_threshold; /* preread nice */
+ int skip_copy; /* Don't copy data from bio to stripe cache */
struct list_head *last_hold; /* detect hold_list promotions */
atomic_t reshape_stripes; /* stripes with pending writes for reshape */