diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 211 |
1 files changed, 172 insertions, 39 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b214ab178f3a..686331f22b15 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, static int btrfs_dirty_inode(struct inode *inode); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode) +{ + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; +} +#endif + static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -1530,10 +1537,32 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static void btrfs_split_extent_hook(struct inode *inode, struct extent_state *orig, u64 split) { + u64 size; + /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) return; + size = orig->end - orig->start + 1; + if (size > BTRFS_MAX_EXTENT_SIZE) { + u64 num_extents; + u64 new_size; + + /* + * See the explanation in btrfs_merge_extent_hook, the same + * applies here, just in reverse. + */ + new_size = orig->end - split + 1; + num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + new_size = split - orig->start; + num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) >= num_extents) + return; + } + spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); @@ -1549,10 +1578,55 @@ static void btrfs_merge_extent_hook(struct inode *inode, struct extent_state *new, struct extent_state *other) { + u64 new_size, old_size; + u64 num_extents; + /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return; + if (new->start > other->start) + new_size = new->end - other->start + 1; + else + new_size = other->end - new->start + 1; + + /* we're not bigger than the max, unreserve the space and go */ + if (new_size <= BTRFS_MAX_EXTENT_SIZE) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); + return; + } + + /* + * We have to add up either side to figure out how many extents were + * accounted for before we merged into one big extent. If the number of + * extents we accounted for is <= the amount we need for the new range + * then we can return, otherwise drop. Think of it like this + * + * [ 4k][MAX_SIZE] + * + * So we've grown the extent by a MAX_SIZE extent, this would mean we + * need 2 outstanding extents, on one side we have 1 and the other side + * we have 1 so they are == and we can return. But in this case + * + * [MAX_SIZE+4k][MAX_SIZE+4k] + * + * Each range on their own accounts for 2 extents, but merged together + * they are only 3 extents worth of accounting, so we need to drop in + * this case. + */ + old_size = other->end - other->start + 1; + num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + old_size = new->end - new->start + 1; + num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) >= num_extents) + return; + spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->lock); @@ -1604,7 +1678,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, * have pending delalloc work to be done. */ static void btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, unsigned long *bits) + struct extent_state *state, unsigned *bits) { if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) @@ -1627,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } + /* For sanity tests */ + if (btrfs_test_is_dummy_root(root)) + return; + __percpu_counter_add(&root->fs_info->delalloc_bytes, len, root->fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); @@ -1645,9 +1723,11 @@ static void btrfs_set_bit_hook(struct inode *inode, */ static void btrfs_clear_bit_hook(struct inode *inode, struct extent_state *state, - unsigned long *bits) + unsigned *bits) { u64 len = state->end + 1 - state->start; + u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1, + BTRFS_MAX_EXTENT_SIZE); spin_lock(&BTRFS_I(inode)->lock); if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) @@ -1667,7 +1747,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, *bits &= ~EXTENT_FIRST_DELALLOC; } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents--; + BTRFS_I(inode)->outstanding_extents -= num_extents; spin_unlock(&BTRFS_I(inode)->lock); } @@ -1680,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode, root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); + /* For sanity tests. */ + if (btrfs_test_is_dummy_root(root)) + return; + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && do_list && !(state->state & EXTENT_NORESERVE)) btrfs_free_reserved_data_space(inode, len); @@ -2945,7 +3029,7 @@ static int __readpage_endio_check(struct inode *inode, return 0; zeroit: if (__ratelimit(&_rs)) - btrfs_info(BTRFS_I(inode)->root->fs_info, + btrfs_warn(BTRFS_I(inode)->root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", btrfs_ino(inode), start, csum, csum_expected); memset(kaddr + pgoff, 1, len); @@ -3407,7 +3491,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) out: if (ret) - btrfs_crit(root->fs_info, + btrfs_err(root->fs_info, "could not do orphan cleanup %d", ret); btrfs_free_path(path); return ret; @@ -3490,7 +3574,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; - struct btrfs_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; unsigned long ptr; @@ -3527,17 +3610,19 @@ static void btrfs_read_locked_inode(struct inode *inode) i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); - tspec = btrfs_inode_atime(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime); - tspec = btrfs_inode_mtime(inode_item); - inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime); - tspec = btrfs_inode_ctime(inode_item); - inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime); + + BTRFS_I(inode)->i_otime.tv_sec = + btrfs_timespec_sec(leaf, &inode_item->otime); + BTRFS_I(inode)->i_otime.tv_nsec = + btrfs_timespec_nsec(leaf, &inode_item->otime); inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); @@ -3656,21 +3741,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); - btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), + btrfs_set_token_timespec_sec(leaf, &item->atime, inode->i_atime.tv_sec, &token); - btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), + btrfs_set_token_timespec_nsec(leaf, &item->atime, inode->i_atime.tv_nsec, &token); - btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), + btrfs_set_token_timespec_sec(leaf, &item->mtime, inode->i_mtime.tv_sec, &token); - btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), + btrfs_set_token_timespec_nsec(leaf, &item->mtime, inode->i_mtime.tv_nsec, &token); - btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), + btrfs_set_token_timespec_sec(leaf, &item->ctime, inode->i_ctime.tv_sec, &token); - btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), + btrfs_set_token_timespec_nsec(leaf, &item->ctime, inode->i_ctime.tv_nsec, &token); + btrfs_set_token_timespec_sec(leaf, &item->otime, + BTRFS_I(inode)->i_otime.tv_sec, &token); + btrfs_set_token_timespec_nsec(leaf, &item->otime, + BTRFS_I(inode)->i_otime.tv_nsec, &token); + btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), &token); btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, @@ -5007,6 +5097,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_root *new_root; struct btrfs_root_ref *ref; struct extent_buffer *leaf; + struct btrfs_key key; int ret; int err = 0; @@ -5017,9 +5108,12 @@ static int fixup_tree_root_location(struct btrfs_root *root, } err = -ENOENT; - ret = btrfs_find_item(root->fs_info->tree_root, path, - BTRFS_I(dir)->root->root_key.objectid, - location->objectid, BTRFS_ROOT_REF_KEY, NULL); + key.objectid = BTRFS_I(dir)->root->root_key.objectid; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = location->objectid; + + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path, + 0, 0); if (ret) { if (ret < 0) err = ret; @@ -5258,7 +5352,10 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = CURRENT_TIME; + inode->i_atime = inode->i_mtime; + inode->i_ctime = inode->i_mtime; + BTRFS_I(inode)->i_otime = inode->i_mtime; return inode; } @@ -5826,7 +5923,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + inode->i_mtime = CURRENT_TIME; + inode->i_atime = inode->i_mtime; + inode->i_ctime = inode->i_mtime; + BTRFS_I(inode)->i_otime = inode->i_mtime; + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, @@ -7134,17 +7236,28 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; + u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; if (create) - unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + unlock_bits |= EXTENT_DIRTY; else len = min_t(u64, len, root->sectorsize); lockstart = start; lockend = start + len - 1; + if (current->journal_info) { + /* + * Need to pull our outstanding extents and set journal_info to NULL so + * that anything that needs to check if there's a transction doesn't get + * confused. + */ + outstanding_extents = current->journal_info; + current->journal_info = NULL; + } + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -7205,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->block_start != EXTENT_MAP_HOLE)) { int type; - int ret; u64 block_start, orig_start, orig_block_len, ram_bytes; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) @@ -7269,14 +7381,21 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (*outstanding_extents) { + (*outstanding_extents)--; + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } - ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, - lockstart + len - 1, EXTENT_DELALLOC, NULL, - &cached_state, GFP_NOFS); - BUG_ON(ret); + current->journal_info = outstanding_extents; + btrfs_free_reserved_data_space(inode, len); } /* @@ -7299,6 +7418,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); + if (outstanding_extents) + current->journal_info = outstanding_extents; return ret; } @@ -7805,8 +7926,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, } /* async crcs make it difficult to collect full stripe writes. */ - if (btrfs_get_alloc_profile(root, 1) & - (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) + if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK) async_submit = 0; else async_submit = 1; @@ -7999,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + u64 outstanding_extents = 0; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8036,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; + outstanding_extents = div64_u64(count + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + + /* + * We need to know how many extents we reserved so that we can + * do the accounting properly if we go over the number we + * originally calculated. Abuse current->journal_info for this. + */ + current->journal_info = &outstanding_extents; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_done(inode); @@ -8048,13 +8179,12 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { + current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); - else - btrfs_delalloc_release_metadata(inode, 0); } out: if (wakeup) @@ -8575,6 +8705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->delayed_node = NULL; + ei->i_otime.tv_sec = 0; + ei->i_otime.tv_nsec = 0; + inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree); extent_io_tree_init(&ei->io_tree, &inode->i_data); |