From 75da39bf256c27e25f395b191ead79f323772672 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 20 Feb 2015 12:58:03 +0000
Subject: dm cache policy mq: keep track of the number of entries in a
 multiqueue

Small optimisation, now queue_empty() doesn't need to walk all levels of
the multiqueue.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'drivers/md/dm-cache-policy-mq.c')

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 13f547a4eeb6..ca05d69191e8 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -126,6 +126,7 @@ static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
 #define NR_QUEUE_LEVELS 16u
 
 struct queue {
+	unsigned nr_elts;
 	struct list_head qs[NR_QUEUE_LEVELS];
 };
 
@@ -133,23 +134,14 @@ static void queue_init(struct queue *q)
 {
 	unsigned i;
 
+	q->nr_elts = 0;
 	for (i = 0; i < NR_QUEUE_LEVELS; i++)
 		INIT_LIST_HEAD(q->qs + i);
 }
 
-/*
- * Checks to see if the queue is empty.
- * FIXME: reduce cpu usage.
- */
 static bool queue_empty(struct queue *q)
 {
-	unsigned i;
-
-	for (i = 0; i < NR_QUEUE_LEVELS; i++)
-		if (!list_empty(q->qs + i))
-			return false;
-
-	return true;
+	return q->nr_elts == 0;
 }
 
 /*
@@ -157,11 +149,13 @@ static bool queue_empty(struct queue *q)
  */
 static void queue_push(struct queue *q, unsigned level, struct list_head *elt)
 {
+	q->nr_elts++;
 	list_add_tail(elt, q->qs + level);
 }
 
-static void queue_remove(struct list_head *elt)
+static void queue_remove(struct queue *q, struct list_head *elt)
 {
+	q->nr_elts--;
 	list_del(elt);
 }
 
@@ -197,6 +191,7 @@ static struct list_head *queue_pop(struct queue *q)
 	struct list_head *r = queue_peek(q);
 
 	if (r) {
+		q->nr_elts--;
 		list_del(r);
 
 		/* have we just emptied the bottom level? */
@@ -496,7 +491,11 @@ static void push(struct mq_policy *mq, struct entry *e)
  */
 static void del(struct mq_policy *mq, struct entry *e)
 {
-	queue_remove(&e->list);
+	if (in_cache(mq, e))
+		queue_remove(e->dirty ? &mq->cache_dirty : &mq->cache_clean, &e->list);
+	else
+		queue_remove(&mq->pre_cache, &e->list);
+
 	hash_remove(e);
 }
 
-- 
cgit v1.2.3


From c74ffc5c63b0b2753bedd49bdc1196d570f66803 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 20 Feb 2015 13:01:22 +0000
Subject: dm cache policy mq: remove queue_shift_down()

queue_shift_down() didn't adjust the hit_counts to the new levels, so it
just had the effect of scrambling levels.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'drivers/md/dm-cache-policy-mq.c')

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index ca05d69191e8..3c86b5efe78f 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -159,18 +159,6 @@ static void queue_remove(struct queue *q, struct list_head *elt)
 	list_del(elt);
 }
 
-/*
- * Shifts all regions down one level.  This has no effect on the order of
- * the queue.
- */
-static void queue_shift_down(struct queue *q)
-{
-	unsigned level;
-
-	for (level = 1; level < NR_QUEUE_LEVELS; level++)
-		list_splice_init(q->qs + level, q->qs + level - 1);
-}
-
 /*
  * Gives us the oldest entry of the lowest popoulated level.  If the first
  * level is emptied then we shift down one level.
@@ -193,10 +181,6 @@ static struct list_head *queue_pop(struct queue *q)
 	if (r) {
 		q->nr_elts--;
 		list_del(r);
-
-		/* have we just emptied the bottom level? */
-		if (list_empty(q->qs))
-			queue_shift_down(q);
 	}
 
 	return r;
-- 
cgit v1.2.3


From 3e45c91e5cdd0cfd3cc1228628602c8e7e587157 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 20 Feb 2015 13:49:45 +0000
Subject: dm cache policy mq: track entries hit this 'tick' via sentinel
 objects

A sentinel object is placed on each level of the multiqueues.  When an
object is hit it is requeued behind the sentinel.  When the tick is
incremented we iterate through all objects behind the sentinel and
update the hit_count, then reposition the sentinel at the very back.

This saves memory by avoiding tracking the tick explicitly for every
struct entry object in the multiqueues.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c | 117 ++++++++++++++++++++++++++++------------
 1 file changed, 82 insertions(+), 35 deletions(-)

(limited to 'drivers/md/dm-cache-policy-mq.c')

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 3c86b5efe78f..97b14309df90 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -124,10 +124,12 @@ static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
  * sorted queue.
  */
 #define NR_QUEUE_LEVELS 16u
+#define NR_SENTINELS NR_QUEUE_LEVELS * 3
 
 struct queue {
 	unsigned nr_elts;
 	struct list_head qs[NR_QUEUE_LEVELS];
+	struct list_head sentinels[NR_SENTINELS];
 };
 
 static void queue_init(struct queue *q)
@@ -135,8 +137,10 @@ static void queue_init(struct queue *q)
 	unsigned i;
 
 	q->nr_elts = 0;
-	for (i = 0; i < NR_QUEUE_LEVELS; i++)
+	for (i = 0; i < NR_QUEUE_LEVELS; i++) {
 		INIT_LIST_HEAD(q->qs + i);
+		INIT_LIST_HEAD(q->sentinels + i);
+	}
 }
 
 static bool queue_empty(struct queue *q)
@@ -159,6 +163,11 @@ static void queue_remove(struct queue *q, struct list_head *elt)
 	list_del(elt);
 }
 
+static bool is_sentinel(struct queue *q, struct list_head *h)
+{
+	return (h >= q->sentinels) && (h < (q->sentinels + NR_SENTINELS));
+}
+
 /*
  * Gives us the oldest entry of the lowest popoulated level.  If the first
  * level is emptied then we shift down one level.
@@ -166,10 +175,12 @@ static void queue_remove(struct queue *q, struct list_head *elt)
 static struct list_head *queue_peek(struct queue *q)
 {
 	unsigned level;
+	struct list_head *h;
 
 	for (level = 0; level < NR_QUEUE_LEVELS; level++)
-		if (!list_empty(q->qs + level))
-			return q->qs[level].next;
+		list_for_each(h, q->qs + level)
+			if (!is_sentinel(q, h))
+				return h;
 
 	return NULL;
 }
@@ -196,6 +207,37 @@ static struct list_head *list_pop(struct list_head *lh)
 	return r;
 }
 
+/*
+ * Sometimes we want to iterate through entries that have been pushed since
+ * a certain event.  We use sentinel entries on the queues to delimit these
+ * 'tick' events.
+ */
+static void queue_tick(struct queue *q)
+{
+	unsigned i;
+
+	for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+		list_del(q->sentinels + i);
+		list_add_tail(q->sentinels + i, q->qs + i);
+	}
+}
+
+typedef void (*iter_fn)(struct list_head *, void *);
+static void queue_iterate_tick(struct queue *q, iter_fn fn, void *context)
+{
+	unsigned i;
+	struct list_head *h;
+
+	for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+		list_for_each_prev(h, q->qs + i) {
+			if (is_sentinel(q, h))
+				break;
+
+			fn(h, context);
+		}
+	}
+}
+
 /*----------------------------------------------------------------*/
 
 /*
@@ -212,7 +254,6 @@ struct entry {
 	bool dirty:1;
 	unsigned hit_count;
 	unsigned generation;
-	unsigned tick;
 };
 
 /*
@@ -460,7 +501,6 @@ static bool in_cache(struct mq_policy *mq, struct entry *e)
  */
 static void push(struct mq_policy *mq, struct entry *e)
 {
-	e->tick = mq->tick;
 	hash_insert(mq, e);
 
 	if (in_cache(mq, e))
@@ -507,14 +547,6 @@ static struct entry *peek(struct queue *q)
 	return h ? container_of(h, struct entry, list) : NULL;
 }
 
-/*
- * Has this entry already been updated?
- */
-static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
-{
-	return mq->tick == e->tick;
-}
-
 /*
  * The promotion threshold is adjusted every generation.  As are the counts
  * of the entries.
@@ -566,20 +598,9 @@ static void check_generation(struct mq_policy *mq)
  * Whenever we use an entry we bump up it's hit counter, and push it to the
  * back to it's current level.
  */
-static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
+static void requeue(struct mq_policy *mq, struct entry *e)
 {
-	if (updated_this_tick(mq, e))
-		return;
-
-	e->hit_count++;
-	mq->hit_count++;
 	check_generation(mq);
-
-	/* generation adjustment, to stop the counts increasing forever. */
-	/* FIXME: divide? */
-	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
-	e->generation = mq->generation;
-
 	del(mq, e);
 	push(mq, e);
 }
@@ -686,7 +707,7 @@ static int cache_entry_found(struct mq_policy *mq,
 			     struct entry *e,
 			     struct policy_result *result)
 {
-	requeue_and_update_tick(mq, e);
+	requeue(mq, e);
 
 	if (in_cache(mq, e)) {
 		result->op = POLICY_HIT;
@@ -724,7 +745,6 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
 	new_e->dirty = false;
 	new_e->hit_count = e->hit_count;
 	new_e->generation = e->generation;
-	new_e->tick = e->tick;
 
 	del(mq, e);
 	free_entry(&mq->pre_cache_pool, e);
@@ -740,18 +760,16 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
 				 int data_dir, struct policy_result *result)
 {
 	int r = 0;
-	bool updated = updated_this_tick(mq, e);
 
-	if ((!discarded_oblock && updated) ||
-	    !should_promote(mq, e, discarded_oblock, data_dir)) {
-		requeue_and_update_tick(mq, e);
+	if (!should_promote(mq, e, discarded_oblock, data_dir)) {
+		requeue(mq, e);
 		result->op = POLICY_MISS;
 
 	} else if (!can_migrate)
 		r = -EWOULDBLOCK;
 
 	else {
-		requeue_and_update_tick(mq, e);
+		requeue(mq, e);
 		r = pre_cache_to_cache(mq, e, result);
 	}
 
@@ -888,12 +906,36 @@ static void mq_destroy(struct dm_cache_policy *p)
 	kfree(mq);
 }
 
+static void update_pre_cache_hits(struct list_head *h, void *context)
+{
+	struct entry *e = container_of(h, struct entry, list);
+	e->hit_count++;
+}
+
+static void update_cache_hits(struct list_head *h, void *context)
+{
+	struct mq_policy *mq = context;
+	struct entry *e = container_of(h, struct entry, list);
+	e->hit_count++;
+	mq->hit_count++;
+}
+
 static void copy_tick(struct mq_policy *mq)
 {
-	unsigned long flags;
+	unsigned long flags, tick;
 
 	spin_lock_irqsave(&mq->tick_lock, flags);
-	mq->tick = mq->tick_protected;
+	tick = mq->tick_protected;
+	if (tick != mq->tick) {
+		queue_iterate_tick(&mq->pre_cache, update_pre_cache_hits, mq);
+		queue_iterate_tick(&mq->cache_dirty, update_cache_hits, mq);
+		queue_iterate_tick(&mq->cache_clean, update_cache_hits, mq);
+		mq->tick = tick;
+	}
+
+	queue_tick(&mq->pre_cache);
+	queue_tick(&mq->cache_dirty);
+	queue_tick(&mq->cache_clean);
 	spin_unlock_irqrestore(&mq->tick_lock, flags);
 }
 
@@ -995,10 +1037,15 @@ static int mq_save_hints(struct mq_policy *mq, struct queue *q,
 {
 	int r;
 	unsigned level;
+	struct list_head *h;
 	struct entry *e;
 
 	for (level = 0; level < NR_QUEUE_LEVELS; level++)
-		list_for_each_entry(e, q->qs + level, list) {
+		list_for_each(h, q->qs + level) {
+			if (is_sentinel(q, h))
+				continue;
+
+			e = container_of(h, struct entry, list);
 			r = fn(context, infer_cblock(&mq->cache_pool, e),
 			       e->oblock, e->hit_count);
 			if (r)
-- 
cgit v1.2.3


From fdecee3224d90e51c611198baeb0c38e568ca0e8 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 20 Feb 2015 13:54:14 +0000
Subject: dm cache policy mq: remove unused generation member of struct entry

Remove to stop wasting memory.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'drivers/md/dm-cache-policy-mq.c')

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 97b14309df90..6bfb39411fa9 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -253,7 +253,6 @@ struct entry {
 	 */
 	bool dirty:1;
 	unsigned hit_count;
-	unsigned generation;
 };
 
 /*
@@ -744,7 +743,6 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
 	new_e->oblock = e->oblock;
 	new_e->dirty = false;
 	new_e->hit_count = e->hit_count;
-	new_e->generation = e->generation;
 
 	del(mq, e);
 	free_entry(&mq->pre_cache_pool, e);
@@ -796,7 +794,6 @@ static void insert_in_pre_cache(struct mq_policy *mq,
 	e->dirty = false;
 	e->oblock = oblock;
 	e->hit_count = 1;
-	e->generation = mq->generation;
 	push(mq, e);
 }
 
@@ -829,7 +826,6 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
 	e->oblock = oblock;
 	e->dirty = false;
 	e->hit_count = 1;
-	e->generation = mq->generation;
 	push(mq, e);
 
 	result->cblock = infer_cblock(&mq->cache_pool, e);
@@ -1026,7 +1022,6 @@ static int mq_load_mapping(struct dm_cache_policy *p,
 	e->oblock = oblock;
 	e->dirty = false;	/* this gets corrected in a minute */
 	e->hit_count = hint_valid ? hint : 1;
-	e->generation = mq->generation;
 	push(mq, e);
 
 	return 0;
-- 
cgit v1.2.3


From e65ff8703f56273c6dc8b77373f4d2bef6e35107 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 20 Feb 2015 14:22:17 +0000
Subject: dm cache policy mq: try not to writeback data that changed in the
 last second

Writeback takes out a lock on the cache block, so will increase the
latency for any concurrent io.

This patch works by placing 2 sentinel objects on each level of the
multiqueues.  Every WRITEBACK_PERIOD the oldest sentinel gets moved to
the newest end of the queue level.

When looking for writeback work:
  if less than 25% of the cache is clean:
    we select the oldest object with the lowest hit count
  otherwise:
    we select the oldest object that is not past a writeback sentinel.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c | 94 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

(limited to 'drivers/md/dm-cache-policy-mq.c')

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 6bfb39411fa9..3ddd1162334d 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -8,6 +8,7 @@
 #include "dm.h"
 
 #include <linux/hash.h>
+#include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -126,8 +127,12 @@ static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
 #define NR_QUEUE_LEVELS 16u
 #define NR_SENTINELS NR_QUEUE_LEVELS * 3
 
+#define WRITEBACK_PERIOD HZ
+
 struct queue {
 	unsigned nr_elts;
+	bool current_writeback_sentinels;
+	unsigned long next_writeback;
 	struct list_head qs[NR_QUEUE_LEVELS];
 	struct list_head sentinels[NR_SENTINELS];
 };
@@ -137,12 +142,21 @@ static void queue_init(struct queue *q)
 	unsigned i;
 
 	q->nr_elts = 0;
+	q->current_writeback_sentinels = false;
+	q->next_writeback = 0;
 	for (i = 0; i < NR_QUEUE_LEVELS; i++) {
 		INIT_LIST_HEAD(q->qs + i);
 		INIT_LIST_HEAD(q->sentinels + i);
+		INIT_LIST_HEAD(q->sentinels + NR_QUEUE_LEVELS + i);
+		INIT_LIST_HEAD(q->sentinels + (2 * NR_QUEUE_LEVELS) + i);
 	}
 }
 
+static unsigned queue_size(struct queue *q)
+{
+	return q->nr_elts;
+}
+
 static bool queue_empty(struct queue *q)
 {
 	return q->nr_elts == 0;
@@ -197,6 +211,27 @@ static struct list_head *queue_pop(struct queue *q)
 	return r;
 }
 
+/*
+ * Pops an entry from a level that is not past a sentinel.
+ */
+static struct list_head *queue_pop_old(struct queue *q)
+{
+	unsigned level;
+	struct list_head *h;
+
+	for (level = 0; level < NR_QUEUE_LEVELS; level++)
+		list_for_each(h, q->qs + level) {
+			if (is_sentinel(q, h))
+				break;
+
+			q->nr_elts--;
+			list_del(h);
+			return h;
+		}
+
+	return NULL;
+}
+
 static struct list_head *list_pop(struct list_head *lh)
 {
 	struct list_head *r = lh->next;
@@ -207,6 +242,31 @@ static struct list_head *list_pop(struct list_head *lh)
 	return r;
 }
 
+static struct list_head *writeback_sentinel(struct queue *q, unsigned level)
+{
+	if (q->current_writeback_sentinels)
+		return q->sentinels + NR_QUEUE_LEVELS + level;
+	else
+		return q->sentinels + 2 * NR_QUEUE_LEVELS + level;
+}
+
+static void queue_update_writeback_sentinels(struct queue *q)
+{
+	unsigned i;
+	struct list_head *h;
+
+	if (time_after(jiffies, q->next_writeback)) {
+		for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+			h = writeback_sentinel(q, i);
+			list_del(h);
+			list_add_tail(h, q->qs + i);
+		}
+
+		q->next_writeback = jiffies + WRITEBACK_PERIOD;
+		q->current_writeback_sentinels = !q->current_writeback_sentinels;
+	}
+}
+
 /*
  * Sometimes we want to iterate through entries that have been pushed since
  * a certain event.  We use sentinel entries on the queues to delimit these
@@ -540,6 +600,20 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
 	return e;
 }
 
+static struct entry *pop_old(struct mq_policy *mq, struct queue *q)
+{
+	struct entry *e;
+	struct list_head *h = queue_pop_old(q);
+
+	if (!h)
+		return NULL;
+
+	e = container_of(h, struct entry, list);
+	hash_remove(e);
+
+	return e;
+}
+
 static struct entry *peek(struct queue *q)
 {
 	struct list_head *h = queue_peek(q);
@@ -932,6 +1006,7 @@ static void copy_tick(struct mq_policy *mq)
 	queue_tick(&mq->pre_cache);
 	queue_tick(&mq->cache_dirty);
 	queue_tick(&mq->cache_clean);
+	queue_update_writeback_sentinels(&mq->cache_dirty);
 	spin_unlock_irqrestore(&mq->tick_lock, flags);
 }
 
@@ -1112,10 +1187,27 @@ static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
 	return r;
 }
 
+#define CLEAN_TARGET_PERCENTAGE 25
+
+static bool clean_target_met(struct mq_policy *mq)
+{
+	/*
+	 * Cache entries may not be populated.  So we're cannot rely on the
+	 * size of the clean queue.
+	 */
+	unsigned nr_clean = from_cblock(mq->cache_size) - queue_size(&mq->cache_dirty);
+	unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_PERCENTAGE / 100;
+
+	return nr_clean >= target;
+}
+
 static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
 			      dm_cblock_t *cblock)
 {
-	struct entry *e = pop(mq, &mq->cache_dirty);
+	struct entry *e = pop_old(mq, &mq->cache_dirty);
+
+	if (!e && !clean_target_met(mq))
+		e = pop(mq, &mq->cache_dirty);
 
 	if (!e)
 		return -ENODATA;
-- 
cgit v1.2.3