diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 821 |
1 files changed, 461 insertions, 360 deletions
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index f19c6930a67c..e0c40aec5e96 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -4,8 +4,9 @@ * This file is released under the GPL. */ -#include "dm-cache-policy.h" +#include "dm-cache-background-tracker.h" #include "dm-cache-policy-internal.h" +#include "dm-cache-policy.h" #include "dm.h" #include <linux/hash.h> @@ -38,10 +39,11 @@ struct entry { unsigned hash_next:28; unsigned prev:28; unsigned next:28; - unsigned level:7; + unsigned level:6; bool dirty:1; bool allocated:1; bool sentinel:1; + bool pending_work:1; dm_oblock_t oblock; }; @@ -279,14 +281,28 @@ static unsigned q_size(struct queue *q) */ static void q_push(struct queue *q, struct entry *e) { + BUG_ON(e->pending_work); + if (!e->sentinel) q->nr_elts++; l_add_tail(q->es, q->qs + e->level, e); } +static void q_push_front(struct queue *q, struct entry *e) +{ + BUG_ON(e->pending_work); + + if (!e->sentinel) + q->nr_elts++; + + l_add_head(q->es, q->qs + e->level, e); +} + static void q_push_before(struct queue *q, struct entry *old, struct entry *e) { + BUG_ON(e->pending_work); + if (!e->sentinel) q->nr_elts++; @@ -336,19 +352,6 @@ static struct entry *q_pop(struct queue *q) } /* - * Pops an entry from a level that is not past a sentinel. - */ -static struct entry *q_pop_old(struct queue *q, unsigned max_level) -{ - struct entry *e = q_peek(q, max_level, false); - - if (e) - q_del(q, e); - - return e; -} - -/* * This function assumes there is a non-sentinel entry to pop. It's only * used by redistribute, so we know this is true. It also doesn't adjust * the q->nr_elts count. @@ -446,45 +449,49 @@ static void q_redistribute(struct queue *q) break; e->level = level + 1u; - l_add_head(q->es, l_above, e); + l_add_tail(q->es, l_above, e); } } } -static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels) +static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels, + struct entry *s1, struct entry *s2) { struct entry *de; - unsigned new_level; - - q_del(q, e); + unsigned sentinels_passed = 0; + unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels); + /* try and find an entry to swap with */ if (extra_levels && (e->level < q->nr_levels - 1u)) { - new_level = min(q->nr_levels - 1u, e->level + extra_levels); - for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) { - if (de->sentinel) - continue; + for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de)) + sentinels_passed++; + if (de) { q_del(q, de); de->level = e->level; + if (s1) { + switch (sentinels_passed) { + case 0: + q_push_before(q, s1, de); + break; + + case 1: + q_push_before(q, s2, de); + break; - if (dest) - q_push_before(q, dest, de); - else + default: + q_push(q, de); + } + } else q_push(q, de); - break; } - - e->level = new_level; } + q_del(q, e); + e->level = new_level; q_push(q, e); } -static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels) -{ - q_requeue_before(q, NULL, e, extra_levels); -} - /*----------------------------------------------------------------*/ #define FP_SHIFT 8 @@ -550,7 +557,7 @@ static enum performance stats_assess(struct stats *s) /*----------------------------------------------------------------*/ -struct hash_table { +struct smq_hash_table { struct entry_space *es; unsigned long long hash_bits; unsigned *buckets; @@ -560,7 +567,7 @@ struct hash_table { * All cache entries are stored in a chained hash table. To save space we * use indexing again, and only store indexes to the next entry. */ -static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries) +static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries) { unsigned i, nr_buckets; @@ -578,34 +585,34 @@ static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_ent return 0; } -static void h_exit(struct hash_table *ht) +static void h_exit(struct smq_hash_table *ht) { vfree(ht->buckets); } -static struct entry *h_head(struct hash_table *ht, unsigned bucket) +static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket) { return to_entry(ht->es, ht->buckets[bucket]); } -static struct entry *h_next(struct hash_table *ht, struct entry *e) +static struct entry *h_next(struct smq_hash_table *ht, struct entry *e) { return to_entry(ht->es, e->hash_next); } -static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e) +static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e) { e->hash_next = ht->buckets[bucket]; ht->buckets[bucket] = to_index(ht->es, e); } -static void h_insert(struct hash_table *ht, struct entry *e) +static void h_insert(struct smq_hash_table *ht, struct entry *e) { unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); __h_insert(ht, h, e); } -static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock, +static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock, struct entry **prev) { struct entry *e; @@ -621,7 +628,7 @@ static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t o return NULL; } -static void __h_unlink(struct hash_table *ht, unsigned h, +static void __h_unlink(struct smq_hash_table *ht, unsigned h, struct entry *e, struct entry *prev) { if (prev) @@ -633,7 +640,7 @@ static void __h_unlink(struct hash_table *ht, unsigned h, /* * Also moves each entry to the front of the bucket. */ -static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock) +static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock) { struct entry *e, *prev; unsigned h = hash_64(from_oblock(oblock), ht->hash_bits); @@ -651,7 +658,7 @@ static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock) return e; } -static void h_remove(struct hash_table *ht, struct entry *e) +static void h_remove(struct smq_hash_table *ht, struct entry *e) { unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); struct entry *prev; @@ -699,7 +706,10 @@ static void init_entry(struct entry *e) e->next = INDEXER_NULL; e->prev = INDEXER_NULL; e->level = 0u; + e->dirty = true; /* FIXME: audit */ e->allocated = true; + e->sentinel = false; + e->pending_work = false; } static struct entry *alloc_entry(struct entry_alloc *ea) @@ -762,11 +772,11 @@ static struct entry *get_entry(struct entry_alloc *ea, unsigned index) #define NR_HOTSPOT_LEVELS 64u #define NR_CACHE_LEVELS 64u -#define WRITEBACK_PERIOD (10 * HZ) -#define DEMOTE_PERIOD (60 * HZ) +#define WRITEBACK_PERIOD (10ul * HZ) +#define DEMOTE_PERIOD (60ul * HZ) #define HOTSPOT_UPDATE_PERIOD (HZ) -#define CACHE_UPDATE_PERIOD (10u * HZ) +#define CACHE_UPDATE_PERIOD (60ul * HZ) struct smq_policy { struct dm_cache_policy policy; @@ -814,8 +824,8 @@ struct smq_policy { * The hash tables allows us to quickly find an entry by origin * block. */ - struct hash_table table; - struct hash_table hotspot_table; + struct smq_hash_table table; + struct smq_hash_table hotspot_table; bool current_writeback_sentinels; unsigned long next_writeback_period; @@ -828,6 +838,10 @@ struct smq_policy { unsigned long next_hotspot_period; unsigned long next_cache_period; + + struct background_tracker *bg_work; + + bool migrations_allowed; }; /*----------------------------------------------------------------*/ @@ -876,15 +890,15 @@ static void __update_demote_sentinels(struct smq_policy *mq) static void update_sentinels(struct smq_policy *mq) { if (time_after(jiffies, mq->next_writeback_period)) { - __update_writeback_sentinels(mq); mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; mq->current_writeback_sentinels = !mq->current_writeback_sentinels; + __update_writeback_sentinels(mq); } if (time_after(jiffies, mq->next_demote_period)) { - __update_demote_sentinels(mq); mq->next_demote_period = jiffies + DEMOTE_PERIOD; mq->current_demote_sentinels = !mq->current_demote_sentinels; + __update_demote_sentinels(mq); } } @@ -920,55 +934,40 @@ static void sentinels_init(struct smq_policy *mq) /*----------------------------------------------------------------*/ -/* - * These methods tie together the dirty queue, clean queue and hash table. - */ -static void push_new(struct smq_policy *mq, struct entry *e) +static void del_queue(struct smq_policy *mq, struct entry *e) { - struct queue *q = e->dirty ? &mq->dirty : &mq->clean; - h_insert(&mq->table, e); - q_push(q, e); + q_del(e->dirty ? &mq->dirty : &mq->clean, e); } -static void push(struct smq_policy *mq, struct entry *e) +static void push_queue(struct smq_policy *mq, struct entry *e) { - struct entry *sentinel; - - h_insert(&mq->table, e); - - /* - * Punch this into the queue just in front of the sentinel, to - * ensure it's cleaned straight away. - */ - if (e->dirty) { - sentinel = writeback_sentinel(mq, e->level); - q_push_before(&mq->dirty, sentinel, e); - } else { - sentinel = demote_sentinel(mq, e->level); - q_push_before(&mq->clean, sentinel, e); - } + if (e->dirty) + q_push(&mq->dirty, e); + else + q_push(&mq->clean, e); } -/* - * Removes an entry from cache. Removes from the hash table. - */ -static void __del(struct smq_policy *mq, struct queue *q, struct entry *e) +// !h, !q, a -> h, q, a +static void push(struct smq_policy *mq, struct entry *e) { - q_del(q, e); - h_remove(&mq->table, e); + h_insert(&mq->table, e); + if (!e->pending_work) + push_queue(mq, e); } -static void del(struct smq_policy *mq, struct entry *e) +static void push_queue_front(struct smq_policy *mq, struct entry *e) { - __del(mq, e->dirty ? &mq->dirty : &mq->clean, e); + if (e->dirty) + q_push_front(&mq->dirty, e); + else + q_push_front(&mq->clean, e); } -static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level) +static void push_front(struct smq_policy *mq, struct entry *e) { - struct entry *e = q_pop_old(q, max_level); - if (e) - h_remove(&mq->table, e); - return e; + h_insert(&mq->table, e); + if (!e->pending_work) + push_queue_front(mq, e); } static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e) @@ -978,16 +977,21 @@ static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e) static void requeue(struct smq_policy *mq, struct entry *e) { - struct entry *sentinel; + /* + * Pending work has temporarily been taken out of the queues. + */ + if (e->pending_work) + return; if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) { - if (e->dirty) { - sentinel = writeback_sentinel(mq, e->level); - q_requeue_before(&mq->dirty, sentinel, e, 1u); - } else { - sentinel = demote_sentinel(mq, e->level); - q_requeue_before(&mq->clean, sentinel, e, 1u); + if (!e->dirty) { + q_requeue(&mq->clean, e, 1u, NULL, NULL); + return; } + + q_requeue(&mq->dirty, e, 1u, + get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels), + get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels)); } } @@ -1026,6 +1030,8 @@ static void update_promote_levels(struct smq_policy *mq) unsigned threshold_level = allocator_empty(&mq->cache_alloc) ? default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u); + threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS); + /* * If the hotspot queue is performing badly then we have little * confidence that we know which blocks to promote. So we cut down @@ -1045,7 +1051,7 @@ static void update_promote_levels(struct smq_policy *mq) } mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level; - mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u; + mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level); } /* @@ -1095,34 +1101,142 @@ static void end_cache_period(struct smq_policy *mq) } } -static int demote_cblock(struct smq_policy *mq, - struct policy_locker *locker, - dm_oblock_t *oblock) +/*----------------------------------------------------------------*/ + +/* + * Targets are given as a percentage. + */ +#define CLEAN_TARGET 25u +#define FREE_TARGET 25u + +static unsigned percent_to_target(struct smq_policy *mq, unsigned p) { - struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false); - if (!demoted) - /* - * We could get a block from mq->dirty, but that - * would add extra latency to the triggering bio as it - * waits for the writeback. Better to not promote this - * time and hope there's a clean block next time this block - * is hit. - */ - return -ENOSPC; + return from_cblock(mq->cache_size) * p / 100u; +} + +static bool clean_target_met(struct smq_policy *mq, bool idle) +{ + /* + * Cache entries may not be populated. So we cannot rely on the + * size of the clean queue. + */ + unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty); - if (locker->fn(locker, demoted->oblock)) + if (idle) /* - * We couldn't lock this block. + * We'd like to clean everything. */ - return -EBUSY; + return q_size(&mq->dirty) == 0u; + else + return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >= + percent_to_target(mq, CLEAN_TARGET); +} - del(mq, demoted); - *oblock = demoted->oblock; - free_entry(&mq->cache_alloc, demoted); +static bool free_target_met(struct smq_policy *mq, bool idle) +{ + unsigned nr_free = from_cblock(mq->cache_size) - + mq->cache_alloc.nr_allocated; - return 0; + if (idle) + return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >= + percent_to_target(mq, FREE_TARGET); + else + return true; } +/*----------------------------------------------------------------*/ + +static void mark_pending(struct smq_policy *mq, struct entry *e) +{ + BUG_ON(e->sentinel); + BUG_ON(!e->allocated); + BUG_ON(e->pending_work); + e->pending_work = true; +} + +static void clear_pending(struct smq_policy *mq, struct entry *e) +{ + BUG_ON(!e->pending_work); + e->pending_work = false; +} + +static void queue_writeback(struct smq_policy *mq) +{ + int r; + struct policy_work work; + struct entry *e; + + e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed); + if (e) { + mark_pending(mq, e); + q_del(&mq->dirty, e); + + work.op = POLICY_WRITEBACK; + work.oblock = e->oblock; + work.cblock = infer_cblock(mq, e); + + r = btracker_queue(mq->bg_work, &work, NULL); + WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race. + } +} + +static void queue_demotion(struct smq_policy *mq) +{ + struct policy_work work; + struct entry *e; + + if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) + return; + + e = q_peek(&mq->clean, mq->clean.nr_levels, true); + if (!e) { + if (!clean_target_met(mq, false)) + queue_writeback(mq); + return; + } + + mark_pending(mq, e); + q_del(&mq->clean, e); + + work.op = POLICY_DEMOTE; + work.oblock = e->oblock; + work.cblock = infer_cblock(mq, e); + btracker_queue(mq->bg_work, &work, NULL); +} + +static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, + struct policy_work **workp) +{ + struct entry *e; + struct policy_work work; + + if (!mq->migrations_allowed) + return; + + if (allocator_empty(&mq->cache_alloc)) { + if (!free_target_met(mq, false)) + queue_demotion(mq); + return; + } + + if (btracker_promotion_already_present(mq->bg_work, oblock)) + return; + + /* + * We allocate the entry now to reserve the cblock. If the + * background work is aborted we must remember to free it. + */ + e = alloc_entry(&mq->cache_alloc); + BUG_ON(!e); + e->pending_work = true; + work.op = POLICY_PROMOTE; + work.oblock = oblock; + work.cblock = infer_cblock(mq, e); + btracker_queue(mq->bg_work, &work, workp); +} + +/*----------------------------------------------------------------*/ + enum promote_result { PROMOTE_NOT, PROMOTE_TEMPORARY, @@ -1137,49 +1251,18 @@ static enum promote_result maybe_promote(bool promote) return promote ? PROMOTE_PERMANENT : PROMOTE_NOT; } -static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio, - bool fast_promote) +static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, + int data_dir, bool fast_promote) { - if (bio_data_dir(bio) == WRITE) { + if (data_dir == WRITE) { if (!allocator_empty(&mq->cache_alloc) && fast_promote) return PROMOTE_TEMPORARY; - else - return maybe_promote(hs_e->level >= mq->write_promote_level); + return maybe_promote(hs_e->level >= mq->write_promote_level); } else return maybe_promote(hs_e->level >= mq->read_promote_level); } -static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock, - struct policy_locker *locker, - struct policy_result *result, enum promote_result pr) -{ - int r; - struct entry *e; - - if (allocator_empty(&mq->cache_alloc)) { - result->op = POLICY_REPLACE; - r = demote_cblock(mq, locker, &result->old_oblock); - if (r) { - result->op = POLICY_MISS; - return; - } - - } else - result->op = POLICY_NEW; - - e = alloc_entry(&mq->cache_alloc); - BUG_ON(!e); - e->oblock = oblock; - - if (pr == PROMOTE_TEMPORARY) - push(mq, e); - else - push_new(mq, e); - - result->cblock = infer_cblock(mq, e); -} - static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b) { sector_t r = from_oblock(b); @@ -1187,7 +1270,7 @@ static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b) return to_oblock(r); } -static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio) +static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b) { unsigned hi; dm_oblock_t hb = to_hblock(mq, b); @@ -1199,7 +1282,8 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, hi = get_index(&mq->hotspot_alloc, e); q_requeue(&mq->hotspot, e, test_and_set_bit(hi, mq->hotspot_hit_bits) ? - 0u : mq->hotspot_level_jump); + 0u : mq->hotspot_level_jump, + NULL, NULL); } else { stats_miss(&mq->hotspot_stats); @@ -1225,47 +1309,6 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, return e; } -/* - * Looks the oblock up in the hash table, then decides whether to put in - * pre_cache, or cache etc. - */ -static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock, - bool can_migrate, bool fast_promote, - struct policy_locker *locker, struct policy_result *result) -{ - struct entry *e, *hs_e; - enum promote_result pr; - - hs_e = update_hotspot_queue(mq, oblock, bio); - - e = h_lookup(&mq->table, oblock); - if (e) { - stats_level_accessed(&mq->cache_stats, e->level); - - requeue(mq, e); - result->op = POLICY_HIT; - result->cblock = infer_cblock(mq, e); - - } else { - stats_miss(&mq->cache_stats); - - pr = should_promote(mq, hs_e, bio, fast_promote); - if (pr == PROMOTE_NOT) - result->op = POLICY_MISS; - - else { - if (!can_migrate) { - result->op = POLICY_MISS; - return -EWOULDBLOCK; - } - - insert_in_cache(mq, oblock, locker, result, pr); - } - } - - return 0; -} - /*----------------------------------------------------------------*/ /* @@ -1282,6 +1325,7 @@ static void smq_destroy(struct dm_cache_policy *p) { struct smq_policy *mq = to_smq_policy(p); + btracker_destroy(mq->bg_work); h_exit(&mq->hotspot_table); h_exit(&mq->table); free_bitset(mq->hotspot_hit_bits); @@ -1290,234 +1334,247 @@ static void smq_destroy(struct dm_cache_policy *p) kfree(mq); } -static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, - bool can_block, bool can_migrate, bool fast_promote, - struct bio *bio, struct policy_locker *locker, - struct policy_result *result) -{ - int r; - unsigned long flags; - struct smq_policy *mq = to_smq_policy(p); - - result->op = POLICY_MISS; - - spin_lock_irqsave(&mq->lock, flags); - r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result); - spin_unlock_irqrestore(&mq->lock, flags); - - return r; -} +/*----------------------------------------------------------------*/ -static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock, + int data_dir, bool fast_copy, + struct policy_work **work, bool *background_work) { - int r; - unsigned long flags; - struct smq_policy *mq = to_smq_policy(p); - struct entry *e; + struct entry *e, *hs_e; + enum promote_result pr; + + *background_work = false; - spin_lock_irqsave(&mq->lock, flags); e = h_lookup(&mq->table, oblock); if (e) { + stats_level_accessed(&mq->cache_stats, e->level); + + requeue(mq, e); *cblock = infer_cblock(mq, e); - r = 0; - } else - r = -ENOENT; - spin_unlock_irqrestore(&mq->lock, flags); + return 0; - return r; -} + } else { + stats_miss(&mq->cache_stats); -static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set) -{ - struct entry *e; + /* + * The hotspot queue only gets updated with misses. + */ + hs_e = update_hotspot_queue(mq, oblock); - e = h_lookup(&mq->table, oblock); - BUG_ON(!e); + pr = should_promote(mq, hs_e, data_dir, fast_copy); + if (pr != PROMOTE_NOT) { + queue_promotion(mq, oblock, work); + *background_work = true; + } - del(mq, e); - e->dirty = set; - push(mq, e); + return -ENOENT; + } } -static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock, + int data_dir, bool fast_copy, + bool *background_work) { + int r; unsigned long flags; struct smq_policy *mq = to_smq_policy(p); spin_lock_irqsave(&mq->lock, flags); - __smq_set_clear_dirty(mq, oblock, true); + r = __lookup(mq, oblock, cblock, + data_dir, fast_copy, + NULL, background_work); spin_unlock_irqrestore(&mq->lock, flags); + + return r; } -static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +static int smq_lookup_with_work(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t *cblock, + int data_dir, bool fast_copy, + struct policy_work **work) { - struct smq_policy *mq = to_smq_policy(p); + int r; + bool background_queued; unsigned long flags; + struct smq_policy *mq = to_smq_policy(p); spin_lock_irqsave(&mq->lock, flags); - __smq_set_clear_dirty(mq, oblock, false); + r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued); spin_unlock_irqrestore(&mq->lock, flags); -} -static unsigned random_level(dm_cblock_t cblock) -{ - return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); + return r; } -static int smq_load_mapping(struct dm_cache_policy *p, - dm_oblock_t oblock, dm_cblock_t cblock, - uint32_t hint, bool hint_valid) +static int smq_get_background_work(struct dm_cache_policy *p, bool idle, + struct policy_work **result) { + int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - struct entry *e; - e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); - e->oblock = oblock; - e->dirty = false; /* this gets corrected in a minute */ - e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); - push(mq, e); - - return 0; -} + spin_lock_irqsave(&mq->lock, flags); + r = btracker_issue(mq->bg_work, result); + if (r == -ENODATA) { + /* find some writeback work to do */ + if (mq->migrations_allowed && !free_target_met(mq, idle)) + queue_demotion(mq); -static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) -{ - struct smq_policy *mq = to_smq_policy(p); - struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); + else if (!clean_target_met(mq, idle)) + queue_writeback(mq); - if (!e->allocated) - return 0; + r = btracker_issue(mq->bg_work, result); + } + spin_unlock_irqrestore(&mq->lock, flags); - return e->level; + return r; } -static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock) -{ - struct entry *e; +/* + * We need to clear any pending work flags that have been set, and in the + * case of promotion free the entry for the destination cblock. + */ +static void __complete_background_work(struct smq_policy *mq, + struct policy_work *work, + bool success) +{ + struct entry *e = get_entry(&mq->cache_alloc, + from_cblock(work->cblock)); + + switch (work->op) { + case POLICY_PROMOTE: + // !h, !q, a + clear_pending(mq, e); + if (success) { + e->oblock = work->oblock; + push(mq, e); + // h, q, a + } else { + free_entry(&mq->cache_alloc, e); + // !h, !q, !a + } + break; - e = h_lookup(&mq->table, oblock); - BUG_ON(!e); + case POLICY_DEMOTE: + // h, !q, a + if (success) { + h_remove(&mq->table, e); + free_entry(&mq->cache_alloc, e); + // !h, !q, !a + } else { + clear_pending(mq, e); + push_queue(mq, e); + // h, q, a + } + break; - del(mq, e); - free_entry(&mq->cache_alloc, e); + case POLICY_WRITEBACK: + // h, !q, a + clear_pending(mq, e); + push_queue(mq, e); + // h, q, a + break; + } + + btracker_complete(mq->bg_work, work); } -static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +static void smq_complete_background_work(struct dm_cache_policy *p, + struct policy_work *work, + bool success) { - struct smq_policy *mq = to_smq_policy(p); unsigned long flags; + struct smq_policy *mq = to_smq_policy(p); spin_lock_irqsave(&mq->lock, flags); - __remove_mapping(mq, oblock); + __complete_background_work(mq, work, success); spin_unlock_irqrestore(&mq->lock, flags); } -static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) +// in_hash(oblock) -> in_hash(oblock) +static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set) { struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); - if (!e || !e->allocated) - return -ENODATA; - - del(mq, e); - free_entry(&mq->cache_alloc, e); - - return 0; + if (e->pending_work) + e->dirty = set; + else { + del_queue(mq, e); + e->dirty = set; + push_queue(mq, e); + } } -static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) +static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) { - int r; unsigned long flags; struct smq_policy *mq = to_smq_policy(p); spin_lock_irqsave(&mq->lock, flags); - r = __remove_cblock(mq, cblock); + __smq_set_clear_dirty(mq, cblock, true); spin_unlock_irqrestore(&mq->lock, flags); - - return r; } - -#define CLEAN_TARGET_CRITICAL 5u /* percent */ - -static bool clean_target_met(struct smq_policy *mq, bool critical) +static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) { - if (critical) { - /* - * Cache entries may not be populated. So we're cannot rely on the - * size of the clean queue. - */ - unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty); - unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u; + struct smq_policy *mq = to_smq_policy(p); + unsigned long flags; - return nr_clean >= target; - } else - return !q_size(&mq->dirty); + spin_lock_irqsave(&mq->lock, flags); + __smq_set_clear_dirty(mq, cblock, false); + spin_unlock_irqrestore(&mq->lock, flags); } -static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock, - dm_cblock_t *cblock, bool critical_only) +static unsigned random_level(dm_cblock_t cblock) { - struct entry *e = NULL; - bool target_met = clean_target_met(mq, critical_only); - - if (critical_only) - /* - * Always try and keep the bottom level clean. - */ - e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels); + return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); +} - else - e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels); +static int smq_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + bool dirty, uint32_t hint, bool hint_valid) +{ + struct smq_policy *mq = to_smq_policy(p); + struct entry *e; - if (!e) - return -ENODATA; + e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); + e->oblock = oblock; + e->dirty = dirty; + e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); + e->pending_work = false; - *oblock = e->oblock; - *cblock = infer_cblock(mq, e); - e->dirty = false; - push_new(mq, e); + /* + * When we load mappings we push ahead of both sentinels in order to + * allow demotions and cleaning to occur immediately. + */ + push_front(mq, e); return 0; } -static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, - dm_cblock_t *cblock, bool critical_only) +static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock) { - int r; - unsigned long flags; struct smq_policy *mq = to_smq_policy(p); + struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); - spin_lock_irqsave(&mq->lock, flags); - r = __smq_writeback_work(mq, oblock, cblock, critical_only); - spin_unlock_irqrestore(&mq->lock, flags); - - return r; -} - -static void __force_mapping(struct smq_policy *mq, - dm_oblock_t current_oblock, dm_oblock_t new_oblock) -{ - struct entry *e = h_lookup(&mq->table, current_oblock); + if (!e->allocated) + return -ENODATA; - if (e) { - del(mq, e); - e->oblock = new_oblock; - e->dirty = true; - push(mq, e); - } + // FIXME: what if this block has pending background work? + del_queue(mq, e); + h_remove(&mq->table, e); + free_entry(&mq->cache_alloc, e); + return 0; } -static void smq_force_mapping(struct dm_cache_policy *p, - dm_oblock_t current_oblock, dm_oblock_t new_oblock) +static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) { - unsigned long flags; struct smq_policy *mq = to_smq_policy(p); + struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); - spin_lock_irqsave(&mq->lock, flags); - __force_mapping(mq, current_oblock, new_oblock); - spin_unlock_irqrestore(&mq->lock, flags); + if (!e->allocated) + return 0; + + return e->level; } static dm_cblock_t smq_residency(struct dm_cache_policy *p) @@ -1546,6 +1603,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block) spin_unlock_irqrestore(&mq->lock, flags); } +static void smq_allow_migrations(struct dm_cache_policy *p, bool allow) +{ + struct smq_policy *mq = to_smq_policy(p); + mq->migrations_allowed = allow; +} + /* * smq has no config values, but the old mq policy did. To avoid breaking * software we continue to accept these configurables for the mq policy, @@ -1590,18 +1653,18 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result, static void init_policy_functions(struct smq_policy *mq, bool mimic_mq) { mq->policy.destroy = smq_destroy; - mq->policy.map = smq_map; mq->policy.lookup = smq_lookup; + mq->policy.lookup_with_work = smq_lookup_with_work; + mq->policy.get_background_work = smq_get_background_work; + mq->policy.complete_background_work = smq_complete_background_work; mq->policy.set_dirty = smq_set_dirty; mq->policy.clear_dirty = smq_clear_dirty; mq->policy.load_mapping = smq_load_mapping; + mq->policy.invalidate_mapping = smq_invalidate_mapping; mq->policy.get_hint = smq_get_hint; - mq->policy.remove_mapping = smq_remove_mapping; - mq->policy.remove_cblock = smq_remove_cblock; - mq->policy.writeback_work = smq_writeback_work; - mq->policy.force_mapping = smq_force_mapping; mq->policy.residency = smq_residency; mq->policy.tick = smq_tick; + mq->policy.allow_migrations = smq_allow_migrations; if (mimic_mq) { mq->policy.set_config_value = mq_set_config_value; @@ -1633,7 +1696,8 @@ static void calc_hotspot_params(sector_t origin_size, static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size, - bool mimic_mq) + bool mimic_mq, + bool migrations_allowed) { unsigned i; unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; @@ -1658,11 +1722,11 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, } init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue); - for (i = 0; i < nr_sentinels_per_queue; i++) + for (i = 0; i < nr_sentinels_per_queue; i++) get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true; init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels); - for (i = 0; i < nr_sentinels_per_queue; i++) + for (i = 0; i < nr_sentinels_per_queue; i++) get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true; init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels, @@ -1715,8 +1779,16 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, mq->next_hotspot_period = jiffies; mq->next_cache_period = jiffies; + mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */ + if (!mq->bg_work) + goto bad_btracker; + + mq->migrations_allowed = migrations_allowed; + return &mq->policy; +bad_btracker: + h_exit(&mq->hotspot_table); bad_alloc_hotspot_table: h_exit(&mq->table); bad_alloc_table: @@ -1735,21 +1807,28 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, false); + return __smq_create(cache_size, origin_size, cache_block_size, false, true); } static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, true); + return __smq_create(cache_size, origin_size, cache_block_size, true, true); +} + +static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + return __smq_create(cache_size, origin_size, cache_block_size, false, false); } /*----------------------------------------------------------------*/ static struct dm_cache_policy_type smq_policy_type = { .name = "smq", - .version = {1, 5, 0}, + .version = {2, 0, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = smq_create @@ -1757,15 +1836,23 @@ static struct dm_cache_policy_type smq_policy_type = { static struct dm_cache_policy_type mq_policy_type = { .name = "mq", - .version = {1, 5, 0}, + .version = {2, 0, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create, }; +static struct dm_cache_policy_type cleaner_policy_type = { + .name = "cleaner", + .version = {2, 0, 0}, + .hint_size = 4, + .owner = THIS_MODULE, + .create = cleaner_create, +}; + static struct dm_cache_policy_type default_policy_type = { .name = "default", - .version = {1, 5, 0}, + .version = {2, 0, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = smq_create, @@ -1785,23 +1872,36 @@ static int __init smq_init(void) r = dm_cache_policy_register(&mq_policy_type); if (r) { DMERR("register failed (as mq) %d", r); - dm_cache_policy_unregister(&smq_policy_type); - return -ENOMEM; + goto out_mq; + } + + r = dm_cache_policy_register(&cleaner_policy_type); + if (r) { + DMERR("register failed (as cleaner) %d", r); + goto out_cleaner; } r = dm_cache_policy_register(&default_policy_type); if (r) { DMERR("register failed (as default) %d", r); - dm_cache_policy_unregister(&mq_policy_type); - dm_cache_policy_unregister(&smq_policy_type); - return -ENOMEM; + goto out_default; } return 0; + +out_default: + dm_cache_policy_unregister(&cleaner_policy_type); +out_cleaner: + dm_cache_policy_unregister(&mq_policy_type); +out_mq: + dm_cache_policy_unregister(&smq_policy_type); + + return -ENOMEM; } static void __exit smq_exit(void) { + dm_cache_policy_unregister(&cleaner_policy_type); dm_cache_policy_unregister(&smq_policy_type); dm_cache_policy_unregister(&mq_policy_type); dm_cache_policy_unregister(&default_policy_type); @@ -1816,3 +1916,4 @@ MODULE_DESCRIPTION("smq cache policy"); MODULE_ALIAS("dm-cache-default"); MODULE_ALIAS("dm-cache-mq"); +MODULE_ALIAS("dm-cache-cleaner"); |