From 5e388e95815408c27f3612190d089afc0774b870 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 18 Apr 2018 09:41:54 +0300 Subject: btrfs: Fix race condition between delayed refs and blockgroup removal When the delayed refs for a head are all run, eventually cleanup_ref_head is called which (in case of deletion) obtains a reference for the relevant btrfs_space_info struct by querying the bg for the range. This is problematic because when the last extent of a bg is deleted a race window emerges between removal of that bg and the subsequent invocation of cleanup_ref_head. This can result in cache being null and either a null pointer dereference or assertion failure. task: ffff8d04d31ed080 task.stack: ffff9e5dc10cc000 RIP: 0010:assfail.constprop.78+0x18/0x1a [btrfs] RSP: 0018:ffff9e5dc10cfbe8 EFLAGS: 00010292 RAX: 0000000000000044 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8d04ffc1f868 RSI: ffff8d04ffc178c8 RDI: ffff8d04ffc178c8 RBP: ffff8d04d29e5ea0 R08: 00000000000001f0 R09: 0000000000000001 R10: ffff9e5dc0507d58 R11: 0000000000000001 R12: ffff8d04d29e5ea0 R13: ffff8d04d29e5f08 R14: ffff8d04efe29b40 R15: ffff8d04efe203e0 FS: 00007fbf58ead500(0000) GS:ffff8d04ffc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe6c6975648 CR3: 0000000013b2a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __btrfs_run_delayed_refs+0x10e7/0x12c0 [btrfs] btrfs_run_delayed_refs+0x68/0x250 [btrfs] btrfs_should_end_transaction+0x42/0x60 [btrfs] btrfs_truncate_inode_items+0xaac/0xfc0 [btrfs] btrfs_evict_inode+0x4c6/0x5c0 [btrfs] evict+0xc6/0x190 do_unlinkat+0x19c/0x300 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fbf589c57a7 To fix this, introduce a new flag "is_system" to head_ref structs, which is populated at insertion time. This allows to decouple the querying for the spaceinfo from querying the possibly deleted bg. Fixes: d7eae3403f46 ("Btrfs: rework delayed ref total_bytes_pinned accounting") CC: stable@vger.kernel.org # 4.14+ Suggested-by: Omar Sandoval Signed-off-by: Nikolay Borisov Reviewed-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 055494ddcace..f99102063366 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; + struct btrfs_space_info *space_info; + u64 flags; - cache = btrfs_lookup_block_group(fs_info, head->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, + if (head->is_data) + flags = BTRFS_BLOCK_GROUP_DATA; + else if (head->is_system) + flags = BTRFS_BLOCK_GROUP_SYSTEM; + else + flags = BTRFS_BLOCK_GROUP_METADATA; + space_info = __find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add(&space_info->total_bytes_pinned, -head->num_bytes); - btrfs_put_block_group(cache); if (head->is_data) { spin_lock(&delayed_refs->lock); -- cgit v1.2.3