diff options
author | Josef Bacik <josef@redhat.com> | 2012-05-23 20:26:42 +0200 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2012-05-30 16:23:37 +0200 |
commit | 8a35d95ff4680a456d3ce47df9638f33d4f54f20 (patch) | |
tree | 8adb116747b84209406d29767b56e2a6ade2f2f4 /fs/btrfs/inode.c | |
parent | Btrfs: convert the inode bit field to use the actual bit operations (diff) | |
download | linux-8a35d95ff4680a456d3ce47df9638f33d4f54f20.tar.xz linux-8a35d95ff4680a456d3ce47df9638f33d4f54f20.zip |
Btrfs: fix how we deal with the orphan block rsv
Ceph was hitting this race where we would remove an inode from the per-root
orphan list before we would release the space we had reserved for the inode.
We actually don't need a list or anything, we just need to make sure the
root doesn't try to free up the orphan reserve until after the inodes have
released their reservations. So use an atomic counter instead of a list on
the root and only decrement the counter after we've released our
reservation. I've tested this as well as several others and we no longer
see the warnings that you would see while running ceph. Thanks,
Btrfs: fix how we deal with the orphan block rsv
Ceph was hitting this race where we would remove an inode from the per-root
orphan list before we would release the space we had reserved for the inode.
We actually don't need a list or anything, we just need to make sure the
root doesn't try to free up the orphan reserve until after the inodes have
released their reservations. So use an atomic counter instead of a list on
the root and only decrement the counter after we've released our
reservation. I've tested this as well as several others and we no longer
see the warnings that you would see while running ceph. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 38 |
1 files changed, 21 insertions, 17 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 91ad63901756..029892887fc1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2104,12 +2104,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *block_rsv; int ret; - if (!list_empty(&root->orphan_list) || + if (atomic_read(&root->orphan_inodes) || root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) return; spin_lock(&root->orphan_lock); - if (!list_empty(&root->orphan_list)) { + if (atomic_read(&root->orphan_inodes)) { spin_unlock(&root->orphan_lock); return; } @@ -2166,8 +2166,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) block_rsv = NULL; } - if (list_empty(&BTRFS_I(inode)->i_orphan)) { - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) { #if 0 /* * For proper ENOSPC handling, we should do orphan @@ -2180,6 +2180,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) insert = 1; #endif insert = 1; + atomic_dec(&root->orphan_inodes); } if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, @@ -2197,6 +2198,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret && ret != -EEXIST) { + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); btrfs_abort_transaction(trans, root, ret); return ret; } @@ -2227,10 +2230,9 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) int ret = 0; spin_lock(&root->orphan_lock); - if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - list_del_init(&BTRFS_I(inode)->i_orphan); + if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) delete_item = 1; - } if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags)) @@ -2242,8 +2244,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ } - if (release_rsv) + if (release_rsv) { btrfs_orphan_release_metadata(inode); + atomic_dec(&root->orphan_inodes); + } return 0; } @@ -2371,6 +2375,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = PTR_ERR(trans); goto out; } + printk(KERN_ERR "auto deleting %Lu\n", + found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ @@ -2382,9 +2388,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - spin_lock(&root->orphan_lock); - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { @@ -3706,7 +3711,8 @@ void btrfs_evict_inode(struct inode *inode) btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { - BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)); goto no_delete; } @@ -6903,7 +6909,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->log_mutex); mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); - INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->delalloc_inodes); INIT_LIST_HEAD(&ei->ordered_operations); RB_CLEAR_NODE(&ei->rb_node); @@ -6948,13 +6953,12 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - spin_lock(&root->orphan_lock); - if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) { printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", (unsigned long long)btrfs_ino(inode)); - list_del_init(&BTRFS_I(inode)->i_orphan); + atomic_dec(&root->orphan_inodes); } - spin_unlock(&root->orphan_lock); while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |