From 31b90956b124240aa8c63250243ae1a53585c5e2 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 10 Jun 2019 06:13:34 +0800 Subject: bcache: fix stack corruption by PRECEDING_KEY() Recently people report bcache code compiled with gcc9 is broken, one of the buggy behavior I observe is that two adjacent 4KB I/Os should merge into one but they don't. Finally it turns out to be a stack corruption caused by macro PRECEDING_KEY(). See how PRECEDING_KEY() is defined in bset.h, 437 #define PRECEDING_KEY(_k) \ 438 ({ \ 439 struct bkey *_ret = NULL; \ 440 \ 441 if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ 442 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ 443 \ 444 if (!_ret->low) \ 445 _ret->high--; \ 446 _ret->low--; \ 447 } \ 448 \ 449 _ret; \ 450 }) At line 442, _ret points to address of a on-stack variable combined by KEY(), the life range of this on-stack variable is in line 442-446, once _ret is returned to bch_btree_insert_key(), the returned address points to an invalid stack address and this address is overwritten in the following called bch_btree_iter_init(). Then argument 'search' of bch_btree_iter_init() points to some address inside stackframe of bch_btree_iter_init(), exact address depends on how the compiler allocates stack space. Now the stack is corrupted. Fixes: 0eacac22034c ("bcache: PRECEDING_KEY()") Signed-off-by: Coly Li Reviewed-by: Rolf Fokkens Reviewed-by: Pierre JUHEN Tested-by: Shenghui Wang Tested-by: Pierre JUHEN Cc: Kent Overstreet Cc: Nix Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/bset.c | 16 +++++++++++++--- drivers/md/bcache/bset.h | 34 ++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 17 deletions(-) (limited to 'drivers/md/bcache') diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 8f07fa6e1739..268f1b685084 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; struct btree_iter iter; + struct bkey preceding_key_on_stack = ZERO_KEY; + struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - m = bch_btree_iter_init(b, &iter, b->ops->is_extents - ? PRECEDING_KEY(&START_KEY(k)) - : PRECEDING_KEY(k)); + /* + * If k has preceding key, preceding_key_p will be set to address + * of k's preceding key; otherwise preceding_key_p will be set + * to NULL inside preceding_key(). + */ + if (b->ops->is_extents) + preceding_key(&START_KEY(k), &preceding_key_p); + else + preceding_key(k, &preceding_key_p); + + m = bch_btree_iter_init(b, &iter, preceding_key_p); if (b->ops->insert_fixup(b, k, &iter, replace_key)) return status; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index bac76aabca6d..c71365e7c1fa 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) return __bch_cut_back(where, k); } -#define PRECEDING_KEY(_k) \ -({ \ - struct bkey *_ret = NULL; \ - \ - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ - \ - if (!_ret->low) \ - _ret->high--; \ - _ret->low--; \ - } \ - \ - _ret; \ -}) +/* + * Pointer '*preceding_key_p' points to a memory object to store preceding + * key of k. If the preceding key does not exist, set '*preceding_key_p' to + * NULL. So the caller of preceding_key() needs to take care of memory + * which '*preceding_key_p' pointed to before calling preceding_key(). + * Currently the only caller of preceding_key() is bch_btree_insert_key(), + * and it points to an on-stack variable, so the memory release is handled + * by stackframe itself. + */ +static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p) +{ + if (KEY_INODE(k) || KEY_OFFSET(k)) { + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0); + if (!(*preceding_key_p)->low) + (*preceding_key_p)->high--; + (*preceding_key_p)->low--; + } else { + (*preceding_key_p) = NULL; + } +} static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k) { -- cgit v1.2.3 From 1f0ffa67349c56ea54c03ccfd1e073c990e7411e Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 10 Jun 2019 06:13:35 +0800 Subject: bcache: only set BCACHE_DEV_WB_RUNNING when cached device attached MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When people set a writeback percent via sysfs file, /sys/block/bcache/bcache/writeback_percent current code directly sets BCACHE_DEV_WB_RUNNING to dc->disk.flags and schedules kworker dc->writeback_rate_update. If there is no cache set attached to, the writeback kernel thread is not running indeed, running dc->writeback_rate_update does not make sense and may cause NULL pointer deference when reference cache set pointer inside update_writeback_rate(). This patch checks whether the cache set point (dc->disk.c) is NULL in sysfs interface handler, and only set BCACHE_DEV_WB_RUNNING and schedule dc->writeback_rate_update when dc->disk.c is not NULL (it means the cache device is attached to a cache set). This problem might be introduced from initial bcache commit, but commit 3fd47bfe55b0 ("bcache: stop dc->writeback_rate_update properly") changes part of the original code piece, so I add 'Fixes: 3fd47bfe55b0' to indicate from which commit this patch can be applied. Fixes: 3fd47bfe55b0 ("bcache: stop dc->writeback_rate_update properly") Reported-by: Bjørn Forsman Signed-off-by: Coly Li Reviewed-by: Bjørn Forsman Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/md/bcache') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 6cd44d3cf906..bfb437ffb13c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -431,8 +431,13 @@ STORE(bch_cached_dev) bch_writeback_queue(dc); } + /* + * Only set BCACHE_DEV_WB_RUNNING when cached device attached to + * a cache set, otherwise it doesn't make sense. + */ if (attr == &sysfs_writeback_percent) - if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + if ((dc->disk.c != NULL) && + (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))) schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); -- cgit v1.2.3