summaryrefslogtreecommitdiffstats
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-04-17 23:30:49 +0200
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 23:09:32 +0200
commitc0960603e2d42d097fea4afd6b720619441061bd (patch)
tree075cb4958bc4111afc41077897393c082483157b /fs/bcachefs
parentbcachefs: Fix hash_check_key() (diff)
downloadlinux-c0960603e2d42d097fea4afd6b720619441061bd.tar.xz
linux-c0960603e2d42d097fea4afd6b720619441061bd.zip
bcachefs: Shutdown path improvements
We're seeing occasional firings of the assertion in the key cache shutdown code that nr_dirty == 0, which means we must sometimes be doing transaction commits after we've gone read only. Cleanups & changes: - BCH_FS_ALLOC_CLEAN renamed to BCH_FS_CLEAN_SHUTDOWN - new helper bch2_btree_interior_updates_flush(), which returns true if it had to wait - bch2_btree_flush_writes() now also returns true if there were btree writes in flight - __bch2_fs_read_only now checks if btree writes were in flight in the shutdown loop: btree write completion does a transaction update, to update the pointer in the parent node - assert that !BCH_FS_CLEAN_SHUTDOWN in __bch2_trans_commit Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_gc.c4
-rw-r--r--fs/bcachefs/btree_io.c14
-rw-r--r--fs/bcachefs/btree_io.h4
-rw-r--r--fs/bcachefs/btree_update_interior.c18
-rw-r--r--fs/bcachefs/btree_update_interior.h2
-rw-r--r--fs/bcachefs/btree_update_leaf.c2
-rw-r--r--fs/bcachefs/migrate.c5
-rw-r--r--fs/bcachefs/move.c4
-rw-r--r--fs/bcachefs/super.c48
10 files changed, 43 insertions, 60 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 04d297b1da94..e7300a9f427c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -494,7 +494,7 @@ struct bch_dev {
enum {
/* startup: */
- BCH_FS_ALLOC_CLEAN,
+ BCH_FS_CLEAN_SHUTDOWN,
BCH_FS_INITIAL_GC_DONE,
BCH_FS_INITIAL_GC_UNFIXED,
BCH_FS_TOPOLOGY_REPAIR_DONE,
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 5199f0240fcd..21afc7200570 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1751,9 +1751,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
down_write(&c->gc_lock);
- /* flush interior btree updates: */
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
+ bch2_btree_interior_updates_flush(c);
ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_alloc_start(c, metadata_only) ?:
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index f847928ab743..33c54803e0a2 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -2099,29 +2099,33 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
}
}
-static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
+static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
{
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
unsigned i;
+ bool ret = false;
restart:
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos)
if (test_bit(flag, &b->flags)) {
rcu_read_unlock();
wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
+ ret = true;
goto restart;
}
rcu_read_unlock();
+
+ return ret;
}
-void bch2_btree_flush_all_reads(struct bch_fs *c)
+bool bch2_btree_flush_all_reads(struct bch_fs *c)
{
- __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
+ return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
}
-void bch2_btree_flush_all_writes(struct bch_fs *c)
+bool bch2_btree_flush_all_writes(struct bch_fs *c)
{
- __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
+ return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
}
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index d818d87661e8..8af853642123 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -152,8 +152,8 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
}
-void bch2_btree_flush_all_reads(struct bch_fs *);
-void bch2_btree_flush_all_writes(struct bch_fs *);
+bool bch2_btree_flush_all_reads(struct bch_fs *);
+bool bch2_btree_flush_all_writes(struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian,
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 7a092d852930..27ab1cde2217 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -2175,19 +2175,27 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock);
}
-size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
+static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{
- size_t ret = 0;
- struct list_head *i;
+ bool ret;
mutex_lock(&c->btree_interior_update_lock);
- list_for_each(i, &c->btree_interior_update_list)
- ret++;
+ ret = !list_empty(&c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
return ret;
}
+bool bch2_btree_interior_updates_flush(struct bch_fs *c)
+{
+ bool ret = bch2_btree_interior_updates_pending(c);
+
+ if (ret)
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_pending(c));
+ return ret;
+}
+
void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
{
struct btree_root *r;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index e72eb8795616..adfc6c24a7a4 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -309,7 +309,7 @@ static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
-size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
+bool bch2_btree_interior_updates_flush(struct bch_fs *);
void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index ba6a9218610a..9a2955f4ae6b 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -1117,6 +1117,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto out_reset;
}
+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
+
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
trans->journal_u64s = trans->extra_journal_entries.nr;
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 6defc33322b3..5345697f2712 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -175,10 +175,7 @@ next:
goto err;
}
- /* flush relevant btree updates */
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
-
+ bch2_btree_interior_updates_flush(c);
ret = 0;
err:
bch2_trans_exit(&trans);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index a219c10a7135..f18d603624c0 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -942,9 +942,7 @@ next:
if (ret)
bch_err(c, "error %i in bch2_move_btree", ret);
- /* flush relevant btree updates */
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
+ bch2_btree_interior_updates_flush(c);
progress_list_del(c, stats);
return ret;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index e4ccdc966fdb..77b7bd61bf43 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -195,58 +195,34 @@ static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i, clean_passes = 0;
+ u64 seq = 0;
bch2_rebalance_stop(c);
bch2_copygc_stop(c);
bch2_gc_thread_stop(c);
- /*
- * Flush journal before stopping allocators, because flushing journal
- * blacklist entries involves allocating new btree nodes:
- */
- bch2_journal_flush_all_pins(&c->journal);
-
bch_verbose(c, "flushing journal and stopping allocators");
- bch2_journal_flush_all_pins(&c->journal);
-
do {
clean_passes++;
- if (bch2_journal_flush_all_pins(&c->journal))
- clean_passes = 0;
-
- /*
- * In flight interior btree updates will generate more journal
- * updates and btree updates (alloc btree):
- */
- if (bch2_btree_interior_updates_nr_pending(c)) {
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
+ if (bch2_btree_interior_updates_flush(c) ||
+ bch2_journal_flush_all_pins(&c->journal) ||
+ bch2_btree_flush_all_writes(c) ||
+ seq != atomic64_read(&c->journal.seq)) {
+ seq = atomic64_read(&c->journal.seq);
clean_passes = 0;
}
- flush_work(&c->btree_interior_update_work);
-
- if (bch2_journal_flush_all_pins(&c->journal))
- clean_passes = 0;
} while (clean_passes < 2);
- bch_verbose(c, "flushing journal and stopping allocators complete");
- set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
-
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
- flush_work(&c->btree_interior_update_work);
+ bch_verbose(c, "flushing journal and stopping allocators complete");
+ if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
+ set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
bch2_fs_journal_stop(&c->journal);
/*
- * the journal kicks off btree writes via reclaim - wait for in flight
- * writes after stopping journal:
- */
- bch2_btree_flush_all_writes(c);
-
- /*
* After stopping journal:
*/
for_each_member_device(ca, c, i)
@@ -304,7 +280,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags) &&
- test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
+ test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
!c->opts.norecovery) {
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
@@ -395,7 +371,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
if (ret)
goto err;
- clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
+ clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);