diff options
author | Jens Axboe <axboe@fb.com> | 2014-09-11 17:31:18 +0200 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-09-11 17:31:18 +0200 |
commit | b207892b061da7608878e273ae22ba9bf9be264b (patch) | |
tree | 51daa46b89b83cad422941f52110b19571b85b79 /fs/btrfs | |
parent | bdi: reimplement bdev_inode_switch_bdi() (diff) | |
parent | blk-mq: scale depth and rq map appropriate if low on memory (diff) | |
download | linux-b207892b061da7608878e273ae22ba9bf9be264b.tar.xz linux-b207892b061da7608878e273ae22ba9bf9be264b.zip |
Merge branch 'for-linus' into for-3.18/core
A bit of churn on the for-linus side that would be nice to have
in the core bits for 3.18, so pull it in to catch us up and make
forward progress easier.
Signed-off-by: Jens Axboe <axboe@fb.com>
Conflicts:
block/scsi_ioctl.c
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/async-thread.c | 44 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 28 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 56 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 23 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 5 | ||||
-rw-r--r-- | fs/btrfs/file.c | 17 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 109 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 36 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 1 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 3 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 9 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 3 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 25 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 2 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 17 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 65 |
17 files changed, 312 insertions, 135 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5a201d81049c..fbd76ded9a34 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -22,7 +22,6 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/freezer.h> -#include <linux/workqueue.h> #include "async-thread.h" #include "ctree.h" @@ -55,8 +54,39 @@ struct btrfs_workqueue { struct __btrfs_workqueue *high; }; -static inline struct __btrfs_workqueue -*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, +static void normal_work_helper(struct btrfs_work *work); + +#define BTRFS_WORK_HELPER(name) \ +void btrfs_##name(struct work_struct *arg) \ +{ \ + struct btrfs_work *work = container_of(arg, struct btrfs_work, \ + normal_work); \ + normal_work_helper(work); \ +} + +BTRFS_WORK_HELPER(worker_helper); +BTRFS_WORK_HELPER(delalloc_helper); +BTRFS_WORK_HELPER(flush_delalloc_helper); +BTRFS_WORK_HELPER(cache_helper); +BTRFS_WORK_HELPER(submit_helper); +BTRFS_WORK_HELPER(fixup_helper); +BTRFS_WORK_HELPER(endio_helper); +BTRFS_WORK_HELPER(endio_meta_helper); +BTRFS_WORK_HELPER(endio_meta_write_helper); +BTRFS_WORK_HELPER(endio_raid56_helper); +BTRFS_WORK_HELPER(rmw_helper); +BTRFS_WORK_HELPER(endio_write_helper); +BTRFS_WORK_HELPER(freespace_write_helper); +BTRFS_WORK_HELPER(delayed_meta_helper); +BTRFS_WORK_HELPER(readahead_helper); +BTRFS_WORK_HELPER(qgroup_rescan_helper); +BTRFS_WORK_HELPER(extent_refs_helper); +BTRFS_WORK_HELPER(scrub_helper); +BTRFS_WORK_HELPER(scrubwrc_helper); +BTRFS_WORK_HELPER(scrubnc_helper); + +static struct __btrfs_workqueue * +__btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); } -static void normal_work_helper(struct work_struct *arg) +static void normal_work_helper(struct btrfs_work *work) { - struct btrfs_work *work; struct __btrfs_workqueue *wq; int need_order = 0; - work = container_of(arg, struct btrfs_work, normal_work); /* * We should not touch things inside work in the following cases: * 1) after work->func() if it has no ordered_free @@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg) trace_btrfs_all_work_done(work); } -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free) @@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work, work->func = func; work->ordered_func = ordered_func; work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, normal_work_helper); + INIT_WORK(&work->normal_work, uniq_func); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 9c6b66d15fb0..e9e31c94758f 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -19,12 +19,14 @@ #ifndef __BTRFS_ASYNC_THREAD_ #define __BTRFS_ASYNC_THREAD_ +#include <linux/workqueue.h> struct btrfs_workqueue; /* Internal use only */ struct __btrfs_workqueue; struct btrfs_work; typedef void (*btrfs_func_t)(struct btrfs_work *arg); +typedef void (*btrfs_work_func_t)(struct work_struct *arg); struct btrfs_work { btrfs_func_t func; @@ -38,11 +40,35 @@ struct btrfs_work { unsigned long flags; }; +#define BTRFS_WORK_HELPER_PROTO(name) \ +void btrfs_##name(struct work_struct *arg) + +BTRFS_WORK_HELPER_PROTO(worker_helper); +BTRFS_WORK_HELPER_PROTO(delalloc_helper); +BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); +BTRFS_WORK_HELPER_PROTO(cache_helper); +BTRFS_WORK_HELPER_PROTO(submit_helper); +BTRFS_WORK_HELPER_PROTO(fixup_helper); +BTRFS_WORK_HELPER_PROTO(endio_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); +BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); +BTRFS_WORK_HELPER_PROTO(rmw_helper); +BTRFS_WORK_HELPER_PROTO(endio_write_helper); +BTRFS_WORK_HELPER_PROTO(freespace_write_helper); +BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); +BTRFS_WORK_HELPER_PROTO(readahead_helper); +BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); +BTRFS_WORK_HELPER_PROTO(extent_refs_helper); +BTRFS_WORK_HELPER_PROTO(scrub_helper); +BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); +BTRFS_WORK_HELPER_PROTO(scrubnc_helper); + struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh); -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index da775bfdebc9..a2e90f855d7d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, - NULL, NULL); + btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, + btrfs_async_run_delayed_root, NULL, NULL); async_work->nr = nr; btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 39ff591ae1b4..7e221b090308 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -39,7 +39,6 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" -#include "async-thread.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" @@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; fs_info = end_io_wq->info; end_io_wq->error = err; - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); if (bio->bi_rw & REQ_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - btrfs_queue_work(fs_info->endio_meta_write_workers, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - btrfs_queue_work(fs_info->endio_freespace_worker, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_write_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { + wq = fs_info->endio_meta_write_workers; + func = btrfs_endio_meta_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { + wq = fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else { + wq = fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else if (end_io_wq->metadata) - btrfs_queue_work(fs_info->endio_meta_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else if (end_io_wq->metadata) { + wq = fs_info->endio_meta_workers; + func = btrfs_endio_meta_helper; + } else { + wq = fs_info->endio_workers; + func = btrfs_endio_helper; + } } + + btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); + btrfs_queue_work(wq, &end_io_wq->work); } /* @@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; - btrfs_init_work(&async->work, run_one_async_start, + btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, run_one_async_done, run_one_async_free); async->bio_flags = bio_flags; @@ -3450,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); - btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_total_bytes(dev_item, + dev->disk_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 102ed3143976..3efe1c3877bf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; atomic_set(&caching_ctl->count, 1); - btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); + btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, + caching_thread, NULL, NULL); spin_lock(&cache->lock); /* @@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->sync = 0; init_completion(&async->wait); - btrfs_init_work(&async->work, delayed_ref_async_start, - NULL, NULL); + btrfs_init_work(&async->work, btrfs_extent_refs_helper, + delayed_ref_async_start, NULL, NULL); btrfs_queue_work(root->fs_info->extent_workers, &async->work); @@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) */ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - u64 num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + u64 num_devices = root->fs_info->fs_devices->rw_devices; u64 target; u64 tmp; @@ -8440,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (stripped) return extended_to_chunk(stripped); - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + num_devices = root->fs_info->fs_devices->rw_devices; stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e11aab9f391..af0359dcf337 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + offset += len; continue; } } @@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -ENOMEM; path->leave_spinning = 1; - start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); - len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); + start = round_down(start, BTRFS_I(inode)->root->sectorsize); + len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start; /* * lookup the last file extent. We're not using i_size here diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3afac292d67..36861b7a6757 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *inode, struct file *filp) { if (filp->private_data) btrfs_ioctl_trans_end(filp); - filemap_flush(inode->i_mapping); + /* + * ordered_data_close is set by settattr when we are about to truncate + * a file from a non-zero size to a zero size. This tries to + * flush down new bytes that may have been written if the + * application were using truncate to replace a file in place. + */ + if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags)) + filemap_flush(inode->i_mapping); return 0; } @@ -2088,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, goto out; } - if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { + if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { u64 num_bytes; - path->slots[0]++; key.offset = offset; btrfs_set_item_key_safe(root, path, &key); fi = btrfs_item_ptr(leaf, path->slots[0], @@ -2216,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } - lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); + lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); lockend = round_down(offset + len, BTRFS_I(inode)->root->sectorsize) - 1; same_page = ((offset >> PAGE_CACHE_SHIFT) == @@ -2277,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) tail_start + tail_len, 0, 1); if (ret) goto out_only_mutex; - } + } } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03708ef3deef..9c194bd74d6e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - btrfs_init_work(&async_cow->work, async_cow_start, - async_cow_submit, async_cow_free); + btrfs_init_work(&async_cow->work, + btrfs_delalloc_helper, + async_cow_start, async_cow_submit, + async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; @@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_fixup_helper, + btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *workers; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); + if (btrfs_is_free_space_inode(inode)) { + wq = root->fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = root->fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } - if (btrfs_is_free_space_inode(inode)) - workers = root->fs_info->endio_freespace_worker; - else - workers = root->fs_info->endio_write_workers; - btrfs_queue_work(workers, &ordered_extent->work); + btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, + NULL); + btrfs_queue_work(wq, &ordered_extent->work); return 0; } @@ -4674,6 +4682,11 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_bit(EXTENT_FLAG_LOGGING, &em->flags); remove_extent_mapping(map_tree, em); free_extent_map(em); + if (need_resched()) { + write_unlock(&map_tree->lock); + cond_resched(); + write_lock(&map_tree->lock); + } } write_unlock(&map_tree->lock); @@ -4696,6 +4709,7 @@ static void evict_inode_truncate_pages(struct inode *inode) &cached_state, GFP_NOFS); free_extent_state(state); + cond_resched(); spin_lock(&io_tree->lock); } spin_unlock(&io_tree->lock); @@ -5181,6 +5195,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) iput(inode); inode = ERR_PTR(ret); } + /* + * If orphan cleanup did remove any orphans, it means the tree + * was modified and therefore the commit root is not the same as + * the current root anymore. This is a problem, because send + * uses the commit root and therefore can see inode items that + * don't exist in the current root anymore, and for example make + * calls to btrfs_iget, which will do tree lookups based on the + * current root and not on the commit root. Those lookups will + * fail, returning a -ESTALE error, and making send fail with + * that error. So make sure a send does not see any orphans we + * have just removed, and that it will see the same inodes + * regardless of whether a transaction commit happened before + * it started (meaning that the commit root will be the same as + * the current root) or not. + */ + if (sub_root->node != sub_root->commit_root) { + u64 sub_flags = btrfs_root_flags(&sub_root->root_item); + + if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { + struct extent_buffer *eb; + + /* + * Assert we can't have races between dentry + * lookup called through the snapshot creation + * ioctl and the VFS. + */ + ASSERT(mutex_is_locked(&dir->i_mutex)); + + down_write(&root->fs_info->commit_root_sem); + eb = sub_root->commit_root; + sub_root->commit_root = + btrfs_root_node(sub_root); + up_write(&root->fs_info->commit_root_sem); + free_extent_buffer(eb); + } + } } return inode; @@ -5606,6 +5656,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* + * O_TMPFILE, set link count to 0, so that after this point, + * we fill in an inode item with the correct link count. + */ + if (!name) + set_nlink(inode, 0); + + /* * we have to initialize this early, so we can reclaim the inode * number if we fail afterwards in this function. */ @@ -6097,14 +6154,14 @@ out_fail: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, - u64 map_start, u64 map_len) + u64 map_start) { u64 start_diff; BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); start_diff = map_start - em->start; em->start = map_start; - em->len = map_len; + em->len = existing->start - em->start; if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; @@ -6275,6 +6332,8 @@ next: goto not_found; if (start + len <= found_key.offset) goto not_found; + if (start > found_key.offset) + goto next; em->start = start; em->orig_start = start; em->len = found_key.offset - start; @@ -6390,8 +6449,7 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em, start, - root->sectorsize); + em, start); free_extent_map(existing); if (err) { free_extent_map(em); @@ -7158,7 +7216,8 @@ again: if (!ret) goto out_test; - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_init_work(&ordered->work, btrfs_endio_write_helper, + finish_ordered_fn, NULL, NULL); btrfs_queue_work(root->fs_info->endio_write_workers, &ordered->work); out_test: @@ -7306,10 +7365,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, &map_length, NULL, 0); - if (ret) { - bio_put(orig_bio); + if (ret) return -EIO; - } if (map_length >= orig_bio->bi_iter.bi_size) { bio = orig_bio; @@ -7326,6 +7383,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) return -ENOMEM; + bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; atomic_inc(&dip->pending_bios); @@ -7534,7 +7592,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, count = iov_iter_count(iter); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) - filemap_fdatawrite_range(inode->i_mapping, offset, count); + filemap_fdatawrite_range(inode->i_mapping, offset, + offset + count - 1); if (rw & WRITE) { /* @@ -8495,7 +8554,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); + WARN_ON_ONCE(!inode); + btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, + btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -8979,6 +9040,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (ret) goto out; + /* + * We set number of links to 0 in btrfs_new_inode(), and here we set + * it to 1 because d_tmpfile() will issue a warning if the count is 0, + * through: + * + * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() + */ + set_nlink(inode, 1); d_tmpfile(dentry, inode); mark_inode_dirty(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47aceb494d1d..fce6fd0e3f50 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto fail; - ret = btrfs_orphan_cleanup(pending_snapshot->snap); - if (ret) - goto fail; - - /* - * If orphan cleanup did remove any orphans, it means the tree was - * modified and therefore the commit root is not the same as the - * current root anymore. This is a problem, because send uses the - * commit root and therefore can see inode items that don't exist - * in the current root anymore, and for example make calls to - * btrfs_iget, which will do tree lookups based on the current root - * and not on the commit root. Those lookups will fail, returning a - * -ESTALE error, and making send fail with that error. So make sure - * a send does not see any orphans we have just removed, and that it - * will see the same inodes regardless of whether a transaction - * commit happened before it started (meaning that the commit root - * will be the same as the current root) or not. - */ - if (readonly && pending_snapshot->snap->node != - pending_snapshot->snap->commit_root) { - trans = btrfs_join_transaction(pending_snapshot->snap); - if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { - ret = PTR_ERR(trans); - goto fail; - } - if (!IS_ERR(trans)) { - ret = btrfs_commit_transaction(trans, - pending_snapshot->snap); - if (ret) - goto fail; - } - } - inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); @@ -3527,7 +3494,8 @@ process_slot: btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - last_dest_end = new_key.offset + datal; + last_dest_end = ALIGN(new_key.offset + datal, + root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, destoff, olen); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 963895c1f801..ac734ec4cc20 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, + btrfs_flush_delalloc_helper, btrfs_run_ordered_extent_work, NULL, NULL); list_add_tail(&ordered->work_list, &works); btrfs_queue_work(root->fs_info->flush_workers, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b497498484be..ded5c601d916 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1973,7 +1973,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, elem.seq, &roots); btrfs_put_tree_mod_seq(fs_info, &elem); if (ret < 0) - return ret; + goto out; if (roots->nnodes != 1) goto out; @@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, + btrfs_qgroup_rescan_helper, btrfs_qgroup_rescan_worker, NULL, NULL); if (ret) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4a88f073fdd7..0a6b6e4bcbb9 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1416,7 +1416,8 @@ cleanup: static void async_rmw_stripe(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + rmw_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio) static void async_read_rebuild(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + read_rebuild_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) plug = container_of(cb, struct btrfs_plug_cb, cb); if (from_schedule) { - btrfs_init_work(&plug->work, unplug_work, NULL, NULL); + btrfs_init_work(&plug->work, btrfs_rmw_helper, + unplug_work, NULL, NULL); btrfs_queue_work(plug->info->rmw_workers, &plug->work); return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 09230cf3a244..20408c6b665a 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); + btrfs_init_work(&rmw->work, btrfs_readahead_helper, + reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b6d198f5181e..f4a41f37be22 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, - NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrub_helper, + scrub_bio_end_io_worker, NULL, NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -999,8 +999,8 @@ nodatasum_case: fixup_nodatasum->root = fs_info->extent_root; fixup_nodatasum->mirror_num = failed_mirror_index + 1; scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, - NULL, NULL); + btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, + scrub_fixup_nodatasum, NULL, NULL); btrfs_queue_work(fs_info->scrub_workers, &fixup_nodatasum->work); goto out; @@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, + scrub_wr_bio_end_io_worker, NULL, NULL); btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } @@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; + struct rcu_string *name; if (btrfs_fs_closing(fs_info)) return -EINVAL; @@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -ENODEV; } + if (!is_dev_replace && !readonly && !dev->writeable) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + rcu_read_lock(); + name = rcu_dereference(dev->name); + btrfs_err(fs_info, "scrub: device %s is not writable", + name->str); + rcu_read_unlock(); + return -EROFS; + } + mutex_lock(&fs_info->scrub_lock); if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); @@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, nocow_ctx->len = len; nocow_ctx->mirror_num = mirror_num; nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); + btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, + copy_nocow_pages_worker, NULL, NULL); INIT_LIST_HEAD(&nocow_ctx->inodes); btrfs_queue_work(fs_info->scrub_nocow_workers, &nocow_ctx->work); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 78699364f537..12e53556e214 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, if (!fs_info->device_dir_kobj) return -EINVAL; - if (one_device) { + if (one_device && one_device->bdev) { disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9e1f2cd5e67a..7e0e6e3029dd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3298,7 +3298,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct list_head ordered_sums; int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; - bool need_find_last_extent = (*last_extent == 0); + bool need_find_last_extent = true; bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3352,8 +3352,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, */ if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { has_extents = true; - if (need_find_last_extent && - first_key.objectid == (u64)-1) + if (first_key.objectid == (u64)-1) first_key = ins_keys[i]; } else { need_find_last_extent = false; @@ -3427,6 +3426,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!has_extents) return ret; + if (need_find_last_extent && *last_extent == first_key.offset) { + /* + * We don't have any leafs between our current one and the one + * we processed before that can have file extent items for our + * inode (and have a generation number smaller than our current + * transaction id). + */ + need_find_last_extent = false; + } + /* * Because we use btrfs_search_forward we could skip leaves that were * not modified and then assume *last_extent is valid when it really @@ -3537,7 +3546,7 @@ fill_holes: 0, 0); if (ret) break; - *last_extent = offset + len; + *last_extent = extent_end; } /* * Need to let the callers know we dropped the path so they should diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cb82f62cb7c..340a92d08e84 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; } else if (!device->name || strcmp(device->name->str, path)) { + /* + * When FS is already mounted. + * 1. If you are here and if the device->name is NULL that + * means this device was missing at time of FS mount. + * 2. If you are here and if the device->name is different + * from 'path' that means either + * a. The same device disappeared and reappeared with + * different name. or + * b. The missing-disk-which-was-replaced, has + * reappeared now. + * + * We must allow 1 and 2a above. But 2b would be a spurious + * and unintentional. + * + * Further in case of 1 and 2a above, the disk at 'path' + * would have missed some transaction when it was away and + * in case of 2a the stale bdev has to be updated as well. + * 2b must not be allowed at all time. + */ + + /* + * As of now don't allow update to btrfs_fs_device through + * the btrfs dev scan cli, after FS has been mounted. + */ + if (fs_devices->opened) { + return -EBUSY; + } else { + /* + * That is if the FS is _not_ mounted and if you + * are here, that means there is more than one + * disk with same uuid and devid.We keep the one + * with larger generation number or the last-in if + * generation are equal. + */ + if (found_transid < device->generation) + return -EEXIST; + } + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM; @@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path, } } + /* + * Unmount does not free the btrfs_device struct but would zero + * generation along with most of the other members. So just update + * it back. We need it to pick the disk with largest generation + * (as above). + */ + if (!fs_devices->opened) + device->generation = found_transid; + if (found_transid > fs_devices->latest_trans) { fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; @@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_set_device_group(leaf, dev_item, 0); btrfs_set_device_seek_speed(leaf, dev_item, 0); @@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->total_devices--; if (device->missing) - root->fs_info->fs_devices->missing_devices--; + device->fs_devices->missing_devices--; next_device = list_entry(root->fs_info->fs_devices->devices.next, struct btrfs_device, dev_list); @@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->bdev) { fs_info->fs_devices->open_devices--; - /* zero out the old super */ - btrfs_scratch_superblock(srcdev); + /* + * zero out the old super if it is not writable + * (e.g. seed device) + */ + if (srcdev->writeable) + btrfs_scratch_superblock(srcdev); } call_rcu(&srcdev->rcu, free_device); @@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; + fs_devices->missing_devices = 0; + fs_devices->num_can_discard = 0; + fs_devices->rotating = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); + btrfs_init_work(&dev->work, btrfs_submit_helper, + pending_bios_fn, NULL, NULL); return dev; } |