summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c41
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode-map.c17
-rw-r--r--fs/btrfs/inode.c89
-rw-r--r--fs/btrfs/ioctl.c241
-rw-r--r--fs/btrfs/ordered-data.c5
-rw-r--r--fs/btrfs/qgroup.c49
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c39
-rw-r--r--fs/btrfs/tree-log.c226
-rw-r--r--fs/btrfs/volumes.c50
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/inode.c22
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/ext4/mballoc.c16
-rw-r--r--fs/ext4/migrate.c17
-rw-r--r--fs/hpfs/alloc.c95
-rw-r--r--fs/hpfs/dir.c1
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/super.c47
-rw-r--r--fs/jfs/ioctl.c3
-rw-r--r--fs/nilfs2/ioctl.c1
-rw-r--r--fs/ocfs2/ioctl.c1
-rw-r--r--fs/overlayfs/inode.c3
32 files changed, 833 insertions, 165 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 510040b04c96..b1dc51888048 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 09e4433717b8..e8aa57dc8d6d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0ef5cc13fae2..81220b2203c6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,6 +44,8 @@
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
+/* DIO is ready to submit */
+#define BTRFS_INODE_DIO_READY 12
/*
* The following 3 bits are meant only for the btree inode.
* When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80a9aefb0c46..aac314e14188 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
+ struct mutex delete_unused_bgs_mutex;
/* For btrfs to record security options */
struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f43bfea3684..a9aadb2ad525 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg)
{
struct btrfs_root *root = arg;
int again;
+ struct btrfs_trans_handle *trans;
do {
again = 0;
@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg)
}
btrfs_run_delayed_iputs(root);
- btrfs_delete_unused_bgs(root->fs_info);
again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg)
* needn't do anything special here.
*/
btrfs_run_defrag_inodes(root->fs_info);
+
+ /*
+ * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+ * with relocation (btrfs_relocate_chunk) and relocation
+ * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+ * after acquiring fs_info->delete_unused_bgs_mutex. So we
+ * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+ * unused block groups.
+ */
+ btrfs_delete_unused_bgs(root->fs_info);
sleep:
if (!try_to_freeze() && !again) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -1789,6 +1799,34 @@ sleep:
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
+
+ /*
+ * Transaction kthread is stopped before us and wakes us up.
+ * However we might have started a new transaction and COWed some
+ * tree blocks when deleting unused block groups for example. So
+ * make sure we commit the transaction we started to have a clean
+ * shutdown when evicting the btree inode - if it has dirty pages
+ * when we do the final iput() on it, eviction will trigger a
+ * writeback for it which will fail with null pointer dereferences
+ * since work queues and other resources were already released and
+ * destroyed by the time the iput/eviction/writeback is made.
+ */
+ trans = btrfs_attach_transaction(root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT)
+ btrfs_err(root->fs_info,
+ "cleaner transaction attach returned %ld",
+ PTR_ERR(trans));
+ } else {
+ int ret;
+
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "cleaner open transaction commit returned %d",
+ ret);
+ }
+
return 0;
}
@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
+ mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
seqlock_init(&fs_info->profiles_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38b76cc02f48..1c2bd1723e40 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
}
spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
+
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans:
btrfs_end_transaction(trans, root);
next:
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f6a596d5a637..d4a582ac3f73 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
+ spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
struct btrfs_free_space *info;
struct rb_node *n;
u64 count;
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return;
while (1) {
+ bool add_to_ctl = true;
+
+ spin_lock(rbroot_lock);
n = rb_first(rbroot);
- if (!n)
+ if (!n) {
+ spin_unlock(rbroot_lock);
break;
+ }
info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
- goto free;
+ add_to_ctl = false;
else if (info->offset + info->bytes > root->ino_cache_progress)
count = root->ino_cache_progress - info->offset + 1;
else
count = info->bytes;
- __btrfs_add_free_space(ctl, info->offset, count);
-free:
rb_erase(&info->offset_index, rbroot);
- kfree(info);
+ spin_unlock(rbroot_lock);
+ if (add_to_ctl)
+ __btrfs_add_free_space(ctl, info->offset, count);
+ kmem_cache_free(btrfs_free_space_cachep, info);
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 855935f6671a..b33c0cf02668 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
* We can have ongoing bios started by readpages (called from readahead)
- * that didn't get their end io callbacks called yet or they are still
- * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+ * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * still in progress (unlocked the pages in the bio but did not yet
+ * unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
* submitting those bios, which are executed by a separate task (work
* queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7547,7 @@ unlock:
current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len);
+ set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
}
/*
@@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct bio *dio_bio;
int ret;
- if (err)
- goto out_done;
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
@@ -7895,7 +7895,6 @@ out_test:
ordered = NULL;
goto again;
}
-out_done:
dio_bio = dip->dio_bio;
kfree(dip);
@@ -8163,9 +8162,8 @@ out_err:
static void btrfs_submit_direct(int rw, struct bio *dio_bio,
struct inode *inode, loff_t file_offset)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_dio_private *dip;
- struct bio *io_bio;
+ struct btrfs_dio_private *dip = NULL;
+ struct bio *io_bio = NULL;
struct btrfs_io_bio *btrfs_bio;
int skip_sum;
int write = rw & REQ_WRITE;
@@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip = kzalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
- goto free_io_bio;
+ goto free_ordered;
}
dip->private = dio_bio->bi_private;
@@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
if (btrfs_bio->end_io)
btrfs_bio->end_io(btrfs_bio, ret);
-free_io_bio:
- bio_put(io_bio);
free_ordered:
/*
- * If this is a write, we need to clean up the reserved space and kill
- * the ordered extent.
+ * If we arrived here it means either we failed to submit the dip
+ * or we either failed to clone the dio_bio or failed to allocate the
+ * dip. If we cloned the dio_bio and allocated the dip, we can just
+ * call bio_endio against our io_bio so that we get proper resource
+ * cleanup if we fail to submit the dip, otherwise, we must do the
+ * same as btrfs_endio_direct_[write|read] because we can't call these
+ * callbacks - they require an allocated dip and a clone of dio_bio.
*/
- if (write) {
- struct btrfs_ordered_extent *ordered;
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- btrfs_free_reserved_extent(root, ordered->start,
- ordered->disk_len, 1);
- btrfs_put_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
+ if (io_bio && dip) {
+ bio_endio(io_bio, ret);
+ /*
+ * The end io callbacks free our dip, do the final put on io_bio
+ * and all the cleanup and final put for dio_bio (through
+ * dio_end_io()).
+ */
+ dip = NULL;
+ io_bio = NULL;
+ } else {
+ if (write) {
+ struct btrfs_ordered_extent *ordered;
+
+ ordered = btrfs_lookup_ordered_extent(inode,
+ file_offset);
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+ /*
+ * Decrements our ref on the ordered extent and removes
+ * the ordered extent from the inode's ordered tree,
+ * doing all the proper resource cleanup such as for the
+ * reserved space and waking up any waiters for this
+ * ordered extent (through btrfs_remove_ordered_extent).
+ */
+ btrfs_finish_ordered_io(ordered);
+ } else {
+ unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+ file_offset + dio_bio->bi_iter.bi_size - 1);
+ }
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ /*
+ * Releases and cleans up our dio_bio, no need to bio_put()
+ * nor bio_endio()/bio_io_error() against dio_bio.
+ */
+ dio_end_io(dio_bio, ret);
}
- bio_endio(dio_bio, ret);
+ if (io_bio)
+ bio_put(io_bio);
+ kfree(dip);
}
static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
btrfs_submit_direct, flags);
if (iov_iter_rw(iter) == WRITE) {
current->journal_info = NULL;
- if (ret < 0 && ret != -EIOCBQUEUED)
- btrfs_delalloc_release_space(inode, count);
- else if (ret >= 0 && (size_t)ret < count)
+ if (ret < 0 && ret != -EIOCBQUEUED) {
+ /*
+ * If the error comes from submitting stage,
+ * btrfs_get_blocsk_direct() has free'd data space,
+ * and metadata space will be handled by
+ * finish_ordered_fn, don't do that again to make
+ * sure bytes_may_use is correct.
+ */
+ if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
+ &BTRFS_I(inode)->runtime_flags))
+ btrfs_delalloc_release_space(inode, count);
+ } else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c86b835da7a8..5d91776e12a2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
static int btrfs_clone(struct inode *src, struct inode *inode,
- u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff,
+ int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@ out:
return ret;
}
-static struct page *extent_same_get_page(struct inode *inode, u64 off)
+static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{
struct page *page;
- pgoff_t index;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- index = off >> PAGE_CACHE_SHIFT;
-
page = grab_cache_page(inode->i_mapping, index);
if (!page)
return NULL;
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
return page;
}
+static int gather_extent_pages(struct inode *inode, struct page **pages,
+ int num_pages, u64 off)
+{
+ int i;
+ pgoff_t index = off >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < num_pages; i++) {
+ pages[i] = extent_same_get_page(inode, index + i);
+ if (!pages[i])
+ return -ENOMEM;
+ }
+ return 0;
+}
+
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
{
/* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
}
}
-static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
{
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
- unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
-
mutex_unlock(&inode1->i_mutex);
mutex_unlock(&inode2->i_mutex);
}
-static void btrfs_double_lock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1 < inode2)
+ swap(inode1, inode2);
+
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ if (inode1 != inode2)
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+}
+
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
{
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
}
-
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
lock_extent_range(inode1, loff1, len);
- if (inode1 != inode2) {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ if (inode1 != inode2)
lock_extent_range(inode2, loff2, len);
+}
+
+struct cmp_pages {
+ int num_pages;
+ struct page **src_pages;
+ struct page **dst_pages;
+};
+
+static void btrfs_cmp_data_free(struct cmp_pages *cmp)
+{
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < cmp->num_pages; i++) {
+ pg = cmp->src_pages[i];
+ if (pg)
+ page_cache_release(pg);
+ pg = cmp->dst_pages[i];
+ if (pg)
+ page_cache_release(pg);
+ }
+ kfree(cmp->src_pages);
+ kfree(cmp->dst_pages);
+}
+
+static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
+ struct inode *dst, u64 dst_loff,
+ u64 len, struct cmp_pages *cmp)
+{
+ int ret;
+ int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+ struct page **src_pgarr, **dst_pgarr;
+
+ /*
+ * We must gather up all the pages before we initiate our
+ * extent locking. We use an array for the page pointers. Size
+ * of the array is bounded by len, which is in turn bounded by
+ * BTRFS_MAX_DEDUPE_LEN.
+ */
+ src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ if (!src_pgarr || !dst_pgarr) {
+ kfree(src_pgarr);
+ kfree(dst_pgarr);
+ return -ENOMEM;
}
+ cmp->num_pages = num_pages;
+ cmp->src_pages = src_pgarr;
+ cmp->dst_pages = dst_pgarr;
+
+ ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+ if (ret)
+ goto out;
+
+ ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+
+out:
+ if (ret)
+ btrfs_cmp_data_free(cmp);
+ return 0;
}
static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
- u64 dst_loff, u64 len)
+ u64 dst_loff, u64 len, struct cmp_pages *cmp)
{
int ret = 0;
+ int i;
struct page *src_page, *dst_page;
unsigned int cmp_len = PAGE_CACHE_SIZE;
void *addr, *dst_addr;
+ i = 0;
while (len) {
if (len < PAGE_CACHE_SIZE)
cmp_len = len;
- src_page = extent_same_get_page(src, loff);
- if (!src_page)
- return -EINVAL;
- dst_page = extent_same_get_page(dst, dst_loff);
- if (!dst_page) {
- page_cache_release(src_page);
- return -EINVAL;
- }
+ BUG_ON(i >= cmp->num_pages);
+
+ src_page = cmp->src_pages[i];
+ dst_page = cmp->dst_pages[i];
+
addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page);
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
kunmap_atomic(addr);
kunmap_atomic(dst_addr);
- page_cache_release(src_page);
- page_cache_release(dst_page);
if (ret)
break;
- loff += cmp_len;
- dst_loff += cmp_len;
len -= cmp_len;
+ i++;
}
return ret;
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
{
int ret;
u64 len = olen;
+ struct cmp_pages cmp;
+ int same_inode = 0;
+ u64 same_lock_start = 0;
+ u64 same_lock_len = 0;
- /*
- * btrfs_clone() can't handle extents in the same file
- * yet. Once that works, we can drop this check and replace it
- * with a check for the same inode, but overlapping extents.
- */
if (src == dst)
- return -EINVAL;
+ same_inode = 1;
if (len == 0)
return 0;
- btrfs_double_lock(src, loff, dst, dst_loff, len);
+ if (same_inode) {
+ mutex_lock(&src->i_mutex);
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
+ /*
+ * Single inode case wants the same checks, except we
+ * don't want our length pushed out past i_size as
+ * comparing that data range makes no sense.
+ *
+ * extent_same_check_offsets() will do this for an
+ * unaligned length at i_size, so catch it here and
+ * reject the request.
+ *
+ * This effectively means we require aligned extents
+ * for the single-inode case, whereas the other cases
+ * allow an unaligned length so long as it ends at
+ * i_size.
+ */
+ if (len != olen) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Check for overlapping ranges */
+ if (dst_loff + len > loff && dst_loff < loff + len) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ same_lock_start = min_t(u64, loff, dst_loff);
+ same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
+ } else {
+ btrfs_double_inode_lock(src, dst);
+
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+ }
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock;
}
- ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+ ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+ if (ret)
+ goto out_unlock;
+
+ if (same_inode)
+ lock_extent_range(src, same_lock_start, same_lock_len);
+ else
+ btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+
+ /* pass original length for comparison so we stay within i_size */
+ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
if (ret == 0)
- ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
+ ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
+
+ if (same_inode)
+ unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
+ same_lock_start + same_lock_len - 1);
+ else
+ btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
+ btrfs_cmp_data_free(&cmp);
out_unlock:
- btrfs_double_unlock(src, loff, dst, dst_loff, len);
+ if (same_inode)
+ mutex_unlock(&src->i_mutex);
+ else
+ btrfs_double_inode_unlock(src, dst);
return ret;
}
@@ -3100,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
const u64 destoff,
- const u64 olen)
+ const u64 olen,
+ int no_time_update)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ if (!no_time_update)
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/*
* We round up to the block size at eof when determining which
* extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode,
* @inode: Inode to clone to
* @off: Offset within source to start clone from
* @olen: Original length, passed by user, of range to clone
- * @olen_aligned: Block-aligned value of olen, extent_same uses
- * identical values here
+ * @olen_aligned: Block-aligned value of olen
* @destoff: Offset within @inode to start clone
+ * @no_time_update: Whether to update mtime/ctime on the target inode
*/
static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
- const u64 destoff)
+ const u64 destoff, int no_time_update)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path = NULL;
@@ -3521,7 +3655,8 @@ process_slot:
root->sectorsize);
ret = clone_finish_inode_update(trans, inode,
last_dest_end,
- destoff, olen);
+ destoff, olen,
+ no_time_update);
if (ret)
goto out;
if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3694,7 @@ process_slot:
clone_update_extent_map(inode, trans, NULL, last_dest_end,
destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
- destoff, olen);
+ destoff, olen, no_time_update);
}
out:
@@ -3696,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
lock_extent_range(inode, destoff, len);
}
- ret = btrfs_clone(src, inode, off, olen, len, destoff);
+ ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
if (same_inode) {
u64 lock_start = min_t(u64, off, destoff);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 89656d799ff6..52170cf1757e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) {
+ ASSERT(list_empty(&entry->log_list));
+ ASSERT(list_empty(&entry->trans_list));
+ ASSERT(list_empty(&entry->root_extent_list));
+ ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode)
btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) {
@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
+ RB_CLEAR_NODE(node);
if (tree->last == node)
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d5f1f033b7a0..e9ace099162c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
+ /* Sometimes we would want to clear the limit on this qgroup.
+ * To meet this requirement, we treat the -1 as a special value
+ * which tell kernel to clear the limit on this qgroup.
+ */
+ const u64 CLEAR_VALUE = -1;
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
}
spin_lock(&fs_info->qgroup_lock);
- if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
- qgroup->max_rfer = limit->max_rfer;
- if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
- qgroup->max_excl = limit->max_excl;
- if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
- qgroup->rsv_rfer = limit->rsv_rfer;
- if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
- qgroup->rsv_excl = limit->rsv_excl;
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
+ if (limit->max_rfer == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+ qgroup->max_rfer = 0;
+ } else {
+ qgroup->max_rfer = limit->max_rfer;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
+ if (limit->max_excl == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+ qgroup->max_excl = 0;
+ } else {
+ qgroup->max_excl = limit->max_excl;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
+ if (limit->rsv_rfer == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+ qgroup->rsv_rfer = 0;
+ } else {
+ qgroup->rsv_rfer = limit->rsv_rfer;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
+ if (limit->rsv_excl == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+ qgroup->rsv_excl = 0;
+ } else {
+ qgroup->rsv_excl = limit->rsv_excl;
+ }
+ }
qgroup->lim_flags |= limit->flags;
spin_unlock(&fs_info->qgroup_lock);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 827951fbf7fc..88cbb5995667 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4049,7 +4049,7 @@ restart:
if (trans && progress && err == -ENOSPC) {
ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
rc->block_group->flags);
- if (ret == 0) {
+ if (ret == 1) {
err = 0;
progress = 0;
goto restart;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9f2feabe99f2..94db0fa5225a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
- int ret = 0;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
fs_info->scrub_workers =
btrfs_alloc_workqueue("btrfs-scrub", flags,
max_active, 4);
- if (!fs_info->scrub_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_workers)
+ goto fail_scrub_workers;
+
fs_info->scrub_wr_completion_workers =
btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
max_active, 2);
- if (!fs_info->scrub_wr_completion_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_wr_completion_workers)
+ goto fail_scrub_wr_completion_workers;
+
fs_info->scrub_nocow_workers =
btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
- if (!fs_info->scrub_nocow_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_nocow_workers)
+ goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
btrfs_alloc_workqueue("btrfs-scrubparity", flags,
max_active, 2);
- if (!fs_info->scrub_parity_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_parity_workers)
+ goto fail_scrub_parity_workers;
}
++fs_info->scrub_workers_refcnt;
-out:
- return ret;
+ return 0;
+
+fail_scrub_parity_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+fail_scrub_nocow_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+fail_scrub_wr_completion_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_workers);
+fail_scrub_workers:
+ return -ENOMEM;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1ce80c1c4eb6..9c45431e69ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
return 0;
}
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ struct btrfs_path *path,
+ struct btrfs_path *dst_path)
+{
+ int ret;
+ struct btrfs_key key;
+ const u64 ino = btrfs_ino(inode);
+ int ins_nr = 0;
+ int start_slot = 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_XATTR_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ while (true) {
+ int slot = path->slots[0];
+ struct extent_buffer *leaf = path->nodes[0];
+ int nritems = btrfs_header_nritems(leaf);
+
+ if (slot >= nritems) {
+ if (ins_nr > 0) {
+ u64 last_extent = 0;
+
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ /* can't be 1, extent items aren't processed */
+ ASSERT(ret <= 0);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+ break;
+
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
+ path->slots[0]++;
+ cond_resched();
+ }
+ if (ins_nr > 0) {
+ u64 last_extent = 0;
+
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ /* can't be 1, extent items aren't processed */
+ ASSERT(ret <= 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * If the no holes feature is enabled we need to make sure any hole between the
+ * last extent and the i_size of our inode is explicitly marked in the log. This
+ * is to make sure that doing something like:
+ *
+ * 1) create file with 128Kb of data
+ * 2) truncate file to 64Kb
+ * 3) truncate file to 256Kb
+ * 4) fsync file
+ * 5) <crash/power failure>
+ * 6) mount fs and trigger log replay
+ *
+ * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+ * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+ * file correspond to a hole. The presence of explicit holes in a log tree is
+ * what guarantees that log replay will remove/adjust file extent items in the
+ * fs/subvol tree.
+ *
+ * Here we do not need to care about holes between extents, that is already done
+ * by copy_items(). We also only need to do this in the full sync path, where we
+ * lookup for extents from the fs/subvol tree only. In the fast path case, we
+ * lookup the list of modified extent maps and if any represents a hole, we
+ * insert a corresponding extent representing a hole in the log tree.
+ */
+static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ struct btrfs_path *path)
+{
+ int ret;
+ struct btrfs_key key;
+ u64 hole_start;
+ u64 hole_size;
+ struct extent_buffer *leaf;
+ struct btrfs_root *log = root->log_root;
+ const u64 ino = btrfs_ino(inode);
+ const u64 i_size = i_size_read(inode);
+
+ if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
+ return 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ASSERT(ret != 0);
+ if (ret < 0)
+ return ret;
+
+ ASSERT(path->slots[0] > 0);
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+ /* inode does not have any extents */
+ hole_start = 0;
+ hole_size = i_size;
+ } else {
+ struct btrfs_file_extent_item *extent;
+ u64 len;
+
+ /*
+ * If there's an extent beyond i_size, an explicit hole was
+ * already inserted by copy_items().
+ */
+ if (key.offset >= i_size)
+ return 0;
+
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, extent) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ len = btrfs_file_extent_inline_len(leaf,
+ path->slots[0],
+ extent);
+ ASSERT(len == i_size);
+ return 0;
+ }
+
+ len = btrfs_file_extent_num_bytes(leaf, extent);
+ /* Last extent goes beyond i_size, no need to log a hole. */
+ if (key.offset + len > i_size)
+ return 0;
+ hole_start = key.offset + len;
+ hole_size = i_size - hole_start;
+ }
+ btrfs_release_path(path);
+
+ /* Last extent ends at i_size. */
+ if (hole_size == 0)
+ return 0;
+
+ hole_size = ALIGN(hole_size, root->sectorsize);
+ ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+ hole_size, 0, hole_size, 0, 0, 0);
+ return ret;
+}
+
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0;
+ bool need_log_inode_item = true;
path = btrfs_alloc_path();
if (!path)
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
- ret = log_inode_item(trans, log, dst_path, inode);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
goto log_extents;
}
@@ -4290,6 +4467,28 @@ again:
if (min_key.type > max_key.type)
break;
+ if (min_key.type == BTRFS_INODE_ITEM_KEY)
+ need_log_inode_item = false;
+
+ /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+ if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+ if (ins_nr == 0)
+ goto next_slot;
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
+ ins_nr, inode_only, logged_isize);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
+ ins_nr = 0;
+ if (ret) {
+ btrfs_release_path(path);
+ continue;
+ }
+ goto next_slot;
+ }
+
src = path->nodes[0];
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
@@ -4357,9 +4556,26 @@ next_slot:
ins_nr = 0;
}
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+ err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+ if (err)
+ goto out_unlock;
+ if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+ err = btrfs_log_trailing_hole(trans, root, inode, path);
+ if (err)
+ goto out_unlock;
+ }
log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
+ if (need_log_inode_item) {
+ err = log_inode_item(trans, log, dst_path, inode);
+ if (err)
+ goto out_unlock;
+ }
if (fast_search) {
/*
* Some ordered extents started by fsync might have completed
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4b438b4c8c91..fbe7c104531c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
root = root->fs_info->chunk_root;
extent_root = root->fs_info->extent_root;
+ /*
+ * Prevent races with automatic removal of unused block groups.
+ * After we relocate and before we remove the chunk with offset
+ * chunk_offset, automatic removal of the block group can kick in,
+ * resulting in a failure when calling btrfs_remove_chunk() below.
+ *
+ * Make sure to acquire this mutex before doing a tree search (dev
+ * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+ * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+ * we release the path used to search the chunk/dev tree and before
+ * the current task acquires this mutex and calls us.
+ */
+ ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
+
ret = btrfs_can_relocate(extent_root, chunk_offset);
if (ret)
return -ENOSPC;
@@ -2814,13 +2828,18 @@ again:
key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) {
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto error;
+ }
BUG_ON(ret == 0); /* Corruption */
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
+ if (ret)
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0)
goto error;
if (ret > 0)
@@ -2843,6 +2862,7 @@ again:
else
BUG_ON(ret);
}
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (found_key.offset == 0)
break;
@@ -3299,9 +3319,12 @@ again:
goto error;
}
+ mutex_lock(&fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
+ }
/*
* this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@ again:
ret = btrfs_previous_item(chunk_root, path, 0,
BTRFS_CHUNK_ITEM_KEY);
if (ret) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
ret = 0;
break;
}
@@ -3321,8 +3345,10 @@ again:
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (found_key.objectid != key.objectid)
+ if (found_key.objectid != key.objectid) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
break;
+ }
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -3335,10 +3361,13 @@ again:
ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset);
btrfs_release_path(path);
- if (!ret)
+ if (!ret) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto loop;
+ }
if (counting) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
spin_lock(&fs_info->balance_lock);
bctl->stat.expected++;
spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@ again:
ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid,
found_key.offset);
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
goto error;
if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@ again:
key.type = BTRFS_DEV_EXTENT_KEY;
do {
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto done;
+ }
ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret)
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0)
goto done;
if (ret) {
@@ -4105,6 +4140,7 @@ again:
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != device->devid) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path);
break;
}
@@ -4113,6 +4149,7 @@ again:
length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path);
break;
}
@@ -4122,6 +4159,7 @@ again:
btrfs_release_path(path);
ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
goto done;
if (ret == -ENOSPC)
@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
static void btrfs_end_bio(struct bio *bio, int err)
{
struct btrfs_bio *bbio = bio->bi_private;
- struct btrfs_device *dev = bbio->stripes[0].dev;
int is_orig_bio = 0;
if (err) {
@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
+ struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b8e2f091f5b..48851f6ea6ec 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
/* 'X' - originally XFS but some now in the VFS */
COMPATIBLE_IOCTL(FIFREEZE)
COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FITRIM)
COMPATIBLE_IOCTL(KDGETKEYCODE)
COMPATIBLE_IOCTL(KDSETKEYCODE)
COMPATIBLE_IOCTL(KDGKBTYPE)
diff --git a/fs/dcache.c b/fs/dcache.c
index 7a3f3e5f9cea..5c8ea15e73a5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry)
/*
* If we have a d_op->d_delete() operation, we sould not
- * let the dentry count go to zero, so use "put__or_lock".
+ * let the dentry count go to zero, so use "put_or_lock".
*/
if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
return lockref_put_or_lock(&dentry->d_lockref);
@@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry)
*/
smp_rmb();
d_flags = ACCESS_ONCE(dentry->d_flags);
- d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST;
+ d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
@@ -776,6 +776,9 @@ repeat:
if (unlikely(d_unhashed(dentry)))
goto kill_it;
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ goto kill_it;
+
if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
if (dentry->d_op->d_delete(dentry))
goto kill_it;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 72afcc629d7b..feef8a9c4de7 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return rc;
switch (cmd) {
- case FITRIM:
case FS_IOC32_GETFLAGS:
case FS_IOC32_SETFLAGS:
case FS_IOC32_GETVERSION:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aadb72828834..2553aa8b608d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -504,7 +504,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
struct buffer_head *bh;
int err;
- bh = sb_getblk(inode->i_sb, pblk);
+ bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
@@ -1089,7 +1089,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
err = -EIO;
goto cleanup;
}
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh)) {
err = -ENOMEM;
goto cleanup;
@@ -1283,7 +1283,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
if (newblock == 0)
return err;
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return -ENOMEM;
lock_buffer(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41f8e55afcd1..cecf9aa10811 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1323,7 +1323,7 @@ static void ext4_da_page_release_reservation(struct page *page,
unsigned int offset,
unsigned int length)
{
- int to_release = 0;
+ int to_release = 0, contiguous_blks = 0;
struct buffer_head *head, *bh;
unsigned int curr_off = 0;
struct inode *inode = page->mapping->host;
@@ -1344,14 +1344,23 @@ static void ext4_da_page_release_reservation(struct page *page,
if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++;
+ contiguous_blks++;
clear_buffer_delay(bh);
+ } else if (contiguous_blks) {
+ lblk = page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ lblk += (curr_off >> inode->i_blkbits) -
+ contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
+ contiguous_blks = 0;
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- if (to_release) {
+ if (contiguous_blks) {
lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- ext4_es_remove_extent(inode, lblk, to_release);
+ lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
}
/* If we have released all the blocks belonging to a cluster, then we
@@ -4344,7 +4353,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
- ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
+ /*
+ * Calculate the first inode in the inode table block. Inode
+ * numbers are one-based. That is, the first inode in a block
+ * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
+ */
+ ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index cb8451246b30..1346cfa355d0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return err;
}
case EXT4_IOC_MOVE_EXT:
- case FITRIM:
case EXT4_IOC_RESIZE_FS:
case EXT4_IOC_PRECACHE_EXTENTS:
case EXT4_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f6aedf88da43..34b610ea5030 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4816,18 +4816,12 @@ do_more:
/*
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
+ *
+ * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+ * to fail.
*/
- retry:
- new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- /*
- * We use a retry loop because
- * ext4_free_blocks() is not allowed to fail.
- */
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
- }
+ new_entry = kmem_cache_alloc(ext4_free_data_cachep,
+ GFP_NOFS|__GFP_NOFAIL);
new_entry->efd_start_cluster = bit;
new_entry->efd_group = block_group;
new_entry->efd_count = count_clusters;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b52374e42102..6163ad21cb0e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent *ex;
unsigned int i, len;
+ ext4_lblk_t start, end;
ext4_fsblk_t blk;
handle_t *handle;
int ret;
@@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode)
EXT4_FEATURE_RO_COMPAT_BIGALLOC))
return -EOPNOTSUPP;
+ /*
+ * In order to get correct extent info, force all delayed allocation
+ * blocks to be allocated, otherwise delayed allocation blocks may not
+ * be reflected and bypass the checks on extent header.
+ */
+ if (test_opt(inode->i_sb, DELALLOC))
+ ext4_alloc_da_blocks(inode);
+
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode)
goto errout;
}
if (eh->eh_entries == 0)
- blk = len = 0;
+ blk = len = start = end = 0;
else {
len = le16_to_cpu(ex->ee_len);
blk = ext4_ext_pblock(ex);
- if (len > EXT4_NDIR_BLOCKS) {
+ start = le32_to_cpu(ex->ee_block);
+ end = start + len - 1;
+ if (end >= EXT4_NDIR_BLOCKS) {
ret = -EOPNOTSUPP;
goto errout;
}
@@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode)
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
memset(ei->i_data, 0, sizeof(ei->i_data));
- for (i=0; i < len; i++)
+ for (i = start; i <= end; i++)
ei->i_data[i] = cpu_to_le32(blk++);
ext4_mark_inode_dirty(handle, inode);
errout:
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index f005046e1591..d6a4b55d2ab0 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
a->btree.first_free = cpu_to_le16(8);
return a;
}
+
+static unsigned find_run(__le32 *bmp, unsigned *idx)
+{
+ unsigned len;
+ while (tstbits(bmp, *idx, 1)) {
+ (*idx)++;
+ if (unlikely(*idx >= 0x4000))
+ return 0;
+ }
+ len = 1;
+ while (!tstbits(bmp, *idx + len, 1))
+ len++;
+ return len;
+}
+
+static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result)
+{
+ int err;
+ secno end;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ end = start + len;
+ if (start < limit_start)
+ start = limit_start;
+ if (end > limit_end)
+ end = limit_end;
+ if (start >= end)
+ return 0;
+ if (end - start < minlen)
+ return 0;
+ err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0);
+ if (err)
+ return err;
+ *result += end - start;
+ return 0;
+}
+
+int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result)
+{
+ int err = 0;
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ unsigned idx, len, start_bmp, end_bmp;
+ __le32 *bmp;
+ struct quad_buffer_head qbh;
+
+ *result = 0;
+ if (!end || end > sbi->sb_fs_size)
+ end = sbi->sb_fs_size;
+ if (start >= sbi->sb_fs_size)
+ return 0;
+ if (minlen > 0x4000)
+ return 0;
+ if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) {
+ hpfs_lock(s);
+ if (s->s_flags & MS_RDONLY) {
+ err = -EROFS;
+ goto unlock_1;
+ }
+ if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
+ err = -EIO;
+ goto unlock_1;
+ }
+ idx = 0;
+ while ((len = find_run(bmp, &idx)) && !err) {
+ err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result);
+ idx += len;
+ }
+ hpfs_brelse4(&qbh);
+unlock_1:
+ hpfs_unlock(s);
+ }
+ start_bmp = start >> 14;
+ end_bmp = (end + 0x3fff) >> 14;
+ while (start_bmp < end_bmp && !err) {
+ hpfs_lock(s);
+ if (s->s_flags & MS_RDONLY) {
+ err = -EROFS;
+ goto unlock_2;
+ }
+ if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) {
+ err = -EIO;
+ goto unlock_2;
+ }
+ idx = 0;
+ while ((len = find_run(bmp, &idx)) && !err) {
+ err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result);
+ idx += len;
+ }
+ hpfs_brelse4(&qbh);
+unlock_2:
+ hpfs_unlock(s);
+ start_bmp++;
+ }
+ return err;
+}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 2a8e07425de0..dc540bfcee1d 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops =
.iterate = hpfs_readdir,
.release = hpfs_dir_release,
.fsync = hpfs_file_fsync,
+ .unlocked_ioctl = hpfs_ioctl,
};
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 6d8cfe9b52d6..7ca28d604bf7 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops =
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
.splice_read = generic_file_splice_read,
+ .unlocked_ioctl = hpfs_ioctl,
};
const struct inode_operations hpfs_file_iops =
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index bb04b58d1d69..c4867b5116dd 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,6 +18,8 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
#include <asm/unaligned.h>
#include "hpfs.h"
@@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno);
struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
+int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *);
/* anode.c */
@@ -318,6 +321,7 @@ __printf(2, 3)
void hpfs_error(struct super_block *, const char *, ...);
int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
unsigned hpfs_get_free_dnodes(struct super_block *);
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg);
/*
* local time (HPFS) to GMT (Unix)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 7cd00d3a7c9b..68a9bed05628 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s)
}
/* Filesystem error... */
-static char err_buf[1024];
-
void hpfs_error(struct super_block *s, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("filesystem error: %pV", &vaf);
+
va_end(args);
- pr_err("filesystem error: %s", err_buf);
if (!hpfs_sb(s)->sb_was_error) {
if (hpfs_sb(s)->sb_err == 2) {
pr_cont("; crashing the system because you wanted it\n");
@@ -196,12 +199,39 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case FITRIM: {
+ struct fstrim_range range;
+ secno n_trimmed;
+ int r;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
+ return -EFAULT;
+ r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed);
+ if (r)
+ return r;
+ range.len = (u64)n_trimmed << 9;
+ if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range)))
+ return -EFAULT;
+ return 0;
+ }
+ default: {
+ return -ENOIOCTLCMD;
+ }
+ }
+}
+
+
static struct kmem_cache * hpfs_inode_cachep;
static struct inode *hpfs_alloc_inode(struct super_block *sb)
{
struct hpfs_inode_info *ei;
- ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
+ ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
ei->vfs_inode.i_version = 1;
@@ -424,11 +454,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
int o;
struct hpfs_sb_info *sbi = hpfs_sb(s);
char *new_opts = kstrdup(data, GFP_KERNEL);
-
+
+ if (!new_opts)
+ return -ENOMEM;
+
sync_filesystem(s);
*flags |= MS_NOATIME;
-
+
hpfs_lock(s);
uid = sbi->sb_uid; gid = sbi->sb_gid;
umask = 0777 & ~sbi->sb_mode;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 93a1232894f6..8db8b7d61e40 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case JFS_IOC_SETFLAGS32:
cmd = JFS_IOC_SETFLAGS;
break;
- case FITRIM:
- cmd = FITRIM;
- break;
}
return jfs_ioctl(filp, cmd, arg);
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9a20e513d7eb..aba43811d6ef 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NILFS_IOCTL_SYNC:
case NILFS_IOCTL_RESIZE:
case NILFS_IOCTL_SET_ALLOC_RANGE:
- case FITRIM:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 53e6c40ed4c6..3cb097ccce60 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case OCFS2_IOC_GROUP_EXTEND:
case OCFS2_IOC_GROUP_ADD:
case OCFS2_IOC_GROUP_ADD64:
- case FITRIM:
break;
case OCFS2_IOC_REFLINK:
if (copy_from_user(&args, argp, sizeof(args)))
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f140e3dbfb7b..d9da5a4e9382 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -343,6 +343,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
struct path realpath;
enum ovl_path_type type;
+ if (d_is_dir(dentry))
+ return d_backing_inode(dentry);
+
type = ovl_path_real(dentry, &realpath);
if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
err = ovl_want_write(dentry);