38 files changed, 612 insertions, 409 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cde698a07d21..a2ae42720a6a 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 	set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
 	inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
 	BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+        BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
+
 	inode->i_version = btrfs_stack_inode_sequence(inode_item);
 	inode->i_rdev = 0;
 	*rdev = btrfs_stack_inode_rdev(inode_item);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1eef4ee01d1a..0ec8e228b89f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3178,8 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
 	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
 	btrfs_mark_buffer_dirty(leaf);
-	btrfs_release_path(path);
 fail:
+	btrfs_release_path(path);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
@@ -3305,8 +3305,7 @@ again:
 
 	spin_lock(&block_group->lock);
 	if (block_group->cached != BTRFS_CACHE_FINISHED ||
-	    !btrfs_test_opt(root, SPACE_CACHE) ||
-	    block_group->delalloc_bytes) {
+	    !btrfs_test_opt(root, SPACE_CACHE)) {
 		/*
 		 * don't bother trying to write stuff out _if_
 		 * a) we're not cached,
@@ -3408,17 +3407,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
 	int loops = 0;
 
 	spin_lock(&cur_trans->dirty_bgs_lock);
-	if (!list_empty(&cur_trans->dirty_bgs)) {
-		list_splice_init(&cur_trans->dirty_bgs, &dirty);
+	if (list_empty(&cur_trans->dirty_bgs)) {
+		spin_unlock(&cur_trans->dirty_bgs_lock);
+		return 0;
 	}
+	list_splice_init(&cur_trans->dirty_bgs, &dirty);
 	spin_unlock(&cur_trans->dirty_bgs_lock);
 
 again:
-	if (list_empty(&dirty)) {
-		btrfs_free_path(path);
-		return 0;
-	}
-
 	/*
 	 * make sure all the block groups on our dirty list actually
 	 * exist
@@ -3431,18 +3427,16 @@ again:
 			return -ENOMEM;
 	}
 
+	/*
+	 * cache_write_mutex is here only to save us from balance or automatic
+	 * removal of empty block groups deleting this block group while we are
+	 * writing out the cache
+	 */
+	mutex_lock(&trans->transaction->cache_write_mutex);
 	while (!list_empty(&dirty)) {
 		cache = list_first_entry(&dirty,
 					 struct btrfs_block_group_cache,
 					 dirty_list);
-
-		/*
-		 * cache_write_mutex is here only to save us from balance
-		 * deleting this block group while we are writing out the
-		 * cache
-		 */
-		mutex_lock(&trans->transaction->cache_write_mutex);
-
 		/*
 		 * this can happen if something re-dirties a block
 		 * group that is already under IO.  Just wait for it to
@@ -3495,7 +3489,6 @@ again:
 		}
 		if (!ret)
 			ret = write_one_cache_group(trans, root, path, cache);
-		mutex_unlock(&trans->transaction->cache_write_mutex);
 
 		/* if its not on the io list, we need to put the block group */
 		if (should_put)
@@ -3503,7 +3496,16 @@ again:
 
 		if (ret)
 			break;
+
+		/*
+		 * Avoid blocking other tasks for too long. It might even save
+		 * us from writing caches for block groups that are going to be
+		 * removed.
+		 */
+		mutex_unlock(&trans->transaction->cache_write_mutex);
+		mutex_lock(&trans->transaction->cache_write_mutex);
 	}
+	mutex_unlock(&trans->transaction->cache_write_mutex);
 
 	/*
 	 * go through delayed refs for all the stuff we've just kicked off
@@ -3514,8 +3516,15 @@ again:
 		loops++;
 		spin_lock(&cur_trans->dirty_bgs_lock);
 		list_splice_init(&cur_trans->dirty_bgs, &dirty);
+		/*
+		 * dirty_bgs_lock protects us from concurrent block group
+		 * deletes too (not just cache_write_mutex).
+		 */
+		if (!list_empty(&dirty)) {
+			spin_unlock(&cur_trans->dirty_bgs_lock);
+			goto again;
+		}
 		spin_unlock(&cur_trans->dirty_bgs_lock);
-		goto again;
 	}
 
 	btrfs_free_path(path);
@@ -7537,7 +7546,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
  * returns the key for the extent through ins, and a tree buffer for
  * the first block of the extent through buf.
  *
- * returns the tree buffer or NULL.
+ * returns the tree buffer or an ERR_PTR on error.
  */
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
@@ -7548,6 +7557,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 	struct btrfs_key ins;
 	struct btrfs_block_rsv *block_rsv;
 	struct extent_buffer *buf;
+	struct btrfs_delayed_extent_op *extent_op;
 	u64 flags = 0;
 	int ret;
 	u32 blocksize = root->nodesize;
@@ -7568,13 +7578,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_reserve_extent(root, blocksize, blocksize,
 				   empty_size, hint, &ins, 0, 0);
-	if (ret) {
-		unuse_block_rsv(root->fs_info, block_rsv, blocksize);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto out_unuse;
 
 	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
-	BUG_ON(IS_ERR(buf)); /* -ENOMEM */
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto out_free_reserved;
+	}
 
 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
 		if (parent == 0)
@@ -7584,9 +7595,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 		BUG_ON(parent > 0);
 
 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-		struct btrfs_delayed_extent_op *extent_op;
 		extent_op = btrfs_alloc_delayed_extent_op();
-		BUG_ON(!extent_op); /* -ENOMEM */
+		if (!extent_op) {
+			ret = -ENOMEM;
+			goto out_free_buf;
+		}
 		if (key)
 			memcpy(&extent_op->key, key, sizeof(extent_op->key));
 		else
@@ -7601,13 +7614,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 		extent_op->level = level;
 
 		ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
-					ins.objectid,
-					ins.offset, parent, root_objectid,
-					level, BTRFS_ADD_DELAYED_EXTENT,
-					extent_op, 0);
-		BUG_ON(ret); /* -ENOMEM */
+						 ins.objectid, ins.offset,
+						 parent, root_objectid, level,
+						 BTRFS_ADD_DELAYED_EXTENT,
+						 extent_op, 0);
+		if (ret)
+			goto out_free_delayed;
 	}
 	return buf;
+
+out_free_delayed:
+	btrfs_free_delayed_extent_op(extent_op);
+out_free_buf:
+	free_extent_buffer(buf);
+out_free_reserved:
+	btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
+out_unuse:
+	unuse_block_rsv(root->fs_info, block_rsv, blocksize);
+	return ERR_PTR(ret);
 }
 
 struct walk_control {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 782f3bc4651d..43af5a61ad25 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 	do {
 		index--;
 		page = eb->pages[index];
-		if (page && mapped) {
+		if (!page)
+			continue;
+		if (mapped)
 			spin_lock(&page->mapping->private_lock);
+		/*
+		 * We do this since we'll remove the pages after we've
+		 * removed the eb from the radix tree, so we could race
+		 * and have this page now attached to the new eb.  So
+		 * only clear page_private if it's still connected to
+		 * this eb.
+		 */
+		if (PagePrivate(page) &&
+		    page->private == (unsigned long)eb) {
+			BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+			BUG_ON(PageDirty(page));
+			BUG_ON(PageWriteback(page));
 			/*
-			 * We do this since we'll remove the pages after we've
-			 * removed the eb from the radix tree, so we could race
-			 * and have this page now attached to the new eb.  So
-			 * only clear page_private if it's still connected to
-			 * this eb.
+			 * We need to make sure we haven't be attached
+			 * to a new eb.
 			 */
-			if (PagePrivate(page) &&
-			    page->private == (unsigned long)eb) {
-				BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-				BUG_ON(PageDirty(page));
-				BUG_ON(PageWriteback(page));
-				/*
-				 * We need to make sure we haven't be attached
-				 * to a new eb.
-				 */
-				ClearPagePrivate(page);
-				set_page_private(page, 0);
-				/* One for the page private */
-				page_cache_release(page);
-			}
-			spin_unlock(&page->mapping->private_lock);
-
-		}
-		if (page) {
-			/* One for when we alloced the page */
+			ClearPagePrivate(page);
+			set_page_private(page, 0);
+			/* One for the page private */
 			page_cache_release(page);
 		}
+
+		if (mapped)
+			spin_unlock(&page->mapping->private_lock);
+
+		/* One for when we alloced the page */
+		page_cache_release(page);
 	} while (index != 0);
 }
 
@@ -4870,6 +4871,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 				mark_extent_buffer_accessed(exists, p);
 				goto free_eb;
 			}
+			exists = NULL;
 
 			/*
 			 * Do this so attach doesn't complain and we need to
@@ -4933,12 +4935,12 @@ again:
 	return eb;
 
 free_eb:
+	WARN_ON(!atomic_dec_and_test(&eb->refs));
 	for (i = 0; i < num_pages; i++) {
 		if (eb->pages[i])
 			unlock_page(eb->pages[i]);
 	}
 
-	WARN_ON(!atomic_dec_and_test(&eb->refs));
 	btrfs_release_extent_buffer(eb);
 	return exists;
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81fa75a8e1f3..5e020d76fd07 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 
 	mapping_set_gfp_mask(inode->i_mapping,
 			mapping_gfp_mask(inode->i_mapping) &
-			~(GFP_NOFS & ~__GFP_HIGHMEM));
+			~(__GFP_FS | __GFP_HIGHMEM));
 
 	return inode;
 }
@@ -1218,7 +1218,7 @@ out:
  *
  * This function writes out a free space cache struct to disk for quick recovery
  * on mount.  This will return 0 if it was successfull in writing the cache out,
- * and -1 if it was not.
+ * or an errno if it was not.
  */
 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 				   struct btrfs_free_space_ctl *ctl,
@@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 	int must_iput = 0;
 
 	if (!i_size_read(inode))
-		return -1;
+		return -EIO;
 
 	WARN_ON(io_ctl->pages);
 	ret = io_ctl_init(io_ctl, inode, root, 1);
 	if (ret)
-		return -1;
+		return ret;
 
 	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
 		down_write(&block_group->data_rwsem);
@@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 	}
 
 	/* Lock all pages first so we can lock the extent safely. */
-	io_ctl_prepare_pages(io_ctl, inode, 0);
+	ret = io_ctl_prepare_pages(io_ctl, inode, 0);
+	if (ret)
+		goto out;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
 			 0, &cached_state);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada4d24ed11b..8bb013672aee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3632,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
 	BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
 
+	inode->i_version = btrfs_inode_sequence(leaf, inode_item);
+	inode->i_generation = BTRFS_I(inode)->generation;
+	inode->i_rdev = 0;
+	rdev = btrfs_inode_rdev(leaf, inode_item);
+
+	BTRFS_I(inode)->index_cnt = (u64)-1;
+	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
 	/*
 	 * If we were modified in the current generation and evicted from memory
 	 * and then re-read we need to do a full sync since we don't have any
 	 * idea about which extents were modified before we were evicted from
 	 * cache.
+	 *
+	 * This is required for both inode re-read from disk and delayed inode
+	 * in delayed_nodes_tree.
 	 */
 	if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
 		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			&BTRFS_I(inode)->runtime_flags);
 
-	inode->i_version = btrfs_inode_sequence(leaf, inode_item);
-	inode->i_generation = BTRFS_I(inode)->generation;
-	inode->i_rdev = 0;
-	rdev = btrfs_inode_rdev(leaf, inode_item);
-
-	BTRFS_I(inode)->index_cnt = (u64)-1;
-	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
-cache_index:
 	path->slots[0]++;
 	if (inode->i_nlink != 1 ||
 	    path->slots[0] >= btrfs_header_nritems(leaf))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b05653f182c2..1c22c6518504 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 			"Attempt to delete subvolume %llu during send",
 			dest->root_key.objectid);
 		err = -EPERM;
-		goto out_dput;
+		goto out_unlock_inode;
 	}
 
 	d_invalidate(dentry);
@@ -2505,6 +2505,7 @@ out_up_write:
 				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
 		spin_unlock(&dest->root_item_lock);
 	}
+out_unlock_inode:
 	mutex_unlock(&inode->i_mutex);
 	if (!err) {
 		shrink_dcache_sb(root->fs_info->sb);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bcd2a007517..96aebf3bcd5b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
 	struct extent_map *em;
 	struct list_head *search_list = &trans->transaction->pending_chunks;
 	int ret = 0;
+	u64 physical_start = *start;
 
 again:
 	list_for_each_entry(em, search_list, list) {
@@ -1068,9 +1069,9 @@ again:
 		for (i = 0; i < map->num_stripes; i++) {
 			if (map->stripes[i].dev != device)
 				continue;
-			if (map->stripes[i].physical >= *start + len ||
+			if (map->stripes[i].physical >= physical_start + len ||
 			    map->stripes[i].physical + em->orig_block_len <=
-			    *start)
+			    physical_start)
 				continue;
 			*start = map->stripes[i].physical +
 				em->orig_block_len;
@@ -1193,8 +1194,14 @@ again:
 			 */
 			if (contains_pending_extent(trans, device,
 						    &search_start,
-						    hole_size))
-				hole_size = 0;
+						    hole_size)) {
+				if (key.offset >= search_start) {
+					hole_size = key.offset - search_start;
+				} else {
+					WARN_ON_ONCE(1);
+					hole_size = 0;
+				}
+			}
 
 			if (hole_size > max_hole_size) {
 				max_hole_start = search_start;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index da94e41bdbf6..537356742091 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -173,5 +173,5 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION("0.0.2");
 MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
 
-module_init(configfs_init);
+core_initcall(configfs_init);
 module_exit(configfs_exit);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 59fedbcf8798..86a2121828c3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
 	int len, i;
 	int err = -ENOMEM;
 
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return err;
 
diff --git a/fs/exec.c b/fs/exec.c
index 49a1c61433b7..1977c2a553ac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
+	/* Add space for stack randomization. */
+	stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
 	/* Make sure we didn't let the argument array grow too large. */
 	if (vma->vm_end - vma->vm_start > stack_base)
 		return -ENOMEM;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 18228c201f7f..024f2284d3f6 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,8 +64,8 @@ config EXT4_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
-config EXT4_FS_ENCRYPTION
-	bool "Ext4 Encryption"
+config EXT4_ENCRYPTION
+	tristate "Ext4 Encryption"
 	depends on EXT4_FS
 	select CRYPTO_AES
 	select CRYPTO_CBC
@@ -81,6 +81,11 @@ config EXT4_FS_ENCRYPTION
 	  efficient since it avoids caching the encrypted and
 	  decrypted pages in the page cache.
 
+config EXT4_FS_ENCRYPTION
+	bool
+	default y
+	depends on EXT4_ENCRYPTION
+
 config EXT4_DEBUG
 	bool "EXT4 debugging support"
 	depends on EXT4_FS
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index ca2f5948c1ac..fded02f72299 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -66,6 +66,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
 	int res = 0;
 	char iv[EXT4_CRYPTO_BLOCK_SIZE];
 	struct scatterlist sg[1];
+	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
 	char *workbuf;
 
 	if (iname->len <= 0 || iname->len > ctx->lim)
@@ -73,6 +74,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
 
 	ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
 		EXT4_CRYPTO_BLOCK_SIZE : iname->len;
+	ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
 	ciphertext_len = (ciphertext_len > ctx->lim)
 			? ctx->lim : ciphertext_len;
 
@@ -101,7 +103,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
 	/* Create encryption request */
 	sg_init_table(sg, 1);
 	sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
-	ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+	ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
 	res = crypto_ablkcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
 		BUG_ON(req->base.data != &ecr);
@@ -198,106 +200,57 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
 	return oname->len;
 }
 
+static const char *lookup_table =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
 /**
  * ext4_fname_encode_digest() -
  *
  * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
  * The encoded string is roughly 4/3 times the size of the input string.
  */
-int ext4_fname_encode_digest(char *dst, char *src, u32 len)
+static int digest_encode(const char *src, int len, char *dst)
 {
-	static const char *lookup_table =
-		"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+";
-	u32 current_chunk, num_chunks, i;
-	char tmp_buf[3];
-	u32 c0, c1, c2, c3;
-
-	current_chunk = 0;
-	num_chunks = len/3;
-	for (i = 0; i < num_chunks; i++) {
-		c0 = src[3*i] & 0x3f;
-		c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f;
-		c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
-		c3 = (src[3*i+2]>>2) & 0x3f;
-		dst[4*i] = lookup_table[c0];
-		dst[4*i+1] = lookup_table[c1];
-		dst[4*i+2] = lookup_table[c2];
-		dst[4*i+3] = lookup_table[c3];
-	}
-	if (i*3 < len) {
-		memset(tmp_buf, 0, 3);
-		memcpy(tmp_buf, &src[3*i], len-3*i);
-		c0 = tmp_buf[0] & 0x3f;
-		c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
-		c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
-		c3 = (tmp_buf[2]>>2) & 0x3f;
-		dst[4*i] = lookup_table[c0];
-		dst[4*i+1] = lookup_table[c1];
-		dst[4*i+2] = lookup_table[c2];
-		dst[4*i+3] = lookup_table[c3];
+	int i = 0, bits = 0, ac = 0;
+	char *cp = dst;
+
+	while (i < len) {
+		ac += (((unsigned char) src[i]) << bits);
+		bits += 8;
+		do {
+			*cp++ = lookup_table[ac & 0x3f];
+			ac >>= 6;
+			bits -= 6;
+		} while (bits >= 6);
 		i++;
 	}
-	return (i * 4);
+	if (bits)
+		*cp++ = lookup_table[ac & 0x3f];
+	return cp - dst;
 }
 
-/**
- * ext4_fname_hash() -
- *
- * This function computes the hash of the input filename, and sets the output
- * buffer to the *encoded* digest.  It returns the length of the digest as its
- * return value.  Errors are returned as negative numbers.  We trust the caller
- * to allocate sufficient memory to oname string.
- */
-static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
-			   const struct ext4_str *iname,
-			   struct ext4_str *oname)
+static int digest_decode(const char *src, int len, char *dst)
 {
-	struct scatterlist sg;
-	struct hash_desc desc = {
-		.tfm = (struct crypto_hash *)ctx->htfm,
-		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
-	};
-	int res = 0;
-
-	if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
-		res = ext4_fname_encode_digest(oname->name, iname->name,
-					       iname->len);
-		oname->len = res;
-		return res;
-	}
-
-	sg_init_one(&sg, iname->name, iname->len);
-	res = crypto_hash_init(&desc);
-	if (res) {
-		printk(KERN_ERR
-		       "%s: Error initializing crypto hash; res = [%d]\n",
-		       __func__, res);
-		goto out;
-	}
-	res = crypto_hash_update(&desc, &sg, iname->len);
-	if (res) {
-		printk(KERN_ERR
-		       "%s: Error updating crypto hash; res = [%d]\n",
-		       __func__, res);
-		goto out;
-	}
-	res = crypto_hash_final(&desc,
-		&oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
-	if (res) {
-		printk(KERN_ERR
-		       "%s: Error finalizing crypto hash; res = [%d]\n",
-		       __func__, res);
-		goto out;
+	int i = 0, bits = 0, ac = 0;
+	const char *p;
+	char *cp = dst;
+
+	while (i < len) {
+		p = strchr(lookup_table, src[i]);
+		if (p == NULL || src[i] == 0)
+			return -2;
+		ac += (p - lookup_table) << bits;
+		bits += 6;
+		if (bits >= 8) {
+			*cp++ = ac & 0xff;
+			ac >>= 8;
+			bits -= 8;
+		}
+		i++;
 	}
-	/* Encode the digest as a printable string--this will increase the
-	 * size of the digest */
-	oname->name[0] = 'I';
-	res = ext4_fname_encode_digest(oname->name+1,
-		&oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
-		EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
-	oname->len = res;
-out:
-	return res;
+	if (ac)
+		return -1;
+	return cp - dst;
 }
 
 /**
@@ -405,6 +358,7 @@ struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
 	if (IS_ERR(ctx))
 		return ctx;
 
+	ctx->flags = ei->i_crypt_policy_flags;
 	if (ctx->has_valid_key) {
 		if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
 			printk_once(KERN_WARNING
@@ -517,6 +471,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
 				      u32 namelen)
 {
 	u32 ciphertext_len;
+	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
 
 	if (ctx == NULL)
 		return -EIO;
@@ -524,6 +479,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
 		return -EACCES;
 	ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
 		EXT4_CRYPTO_BLOCK_SIZE : namelen;
+	ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
 	ciphertext_len = (ciphertext_len > ctx->lim)
 			? ctx->lim : ciphertext_len;
 	return (int) ciphertext_len;
@@ -539,10 +495,13 @@ int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
 				   u32 ilen, struct ext4_str *crypto_str)
 {
 	unsigned int olen;
+	int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
 
 	if (!ctx)
 		return -EIO;
-	olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE);
+	if (padding < EXT4_CRYPTO_BLOCK_SIZE)
+		padding = EXT4_CRYPTO_BLOCK_SIZE;
+	olen = ext4_fname_crypto_round_up(ilen, padding);
 	crypto_str->len = olen;
 	if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
 		olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@@ -571,9 +530,13 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
  * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
  */
 int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
-			   const struct ext4_str *iname,
-			   struct ext4_str *oname)
+			    struct dx_hash_info *hinfo,
+			    const struct ext4_str *iname,
+			    struct ext4_str *oname)
 {
+	char buf[24];
+	int ret;
+
 	if (ctx == NULL)
 		return -EIO;
 	if (iname->len < 3) {
@@ -587,18 +550,33 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
 	}
 	if (ctx->has_valid_key)
 		return ext4_fname_decrypt(ctx, iname, oname);
-	else
-		return ext4_fname_hash(ctx, iname, oname);
+
+	if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
+		ret = digest_encode(iname->name, iname->len, oname->name);
+		oname->len = ret;
+		return ret;
+	}
+	if (hinfo) {
+		memcpy(buf, &hinfo->hash, 4);
+		memcpy(buf+4, &hinfo->minor_hash, 4);
+	} else
+		memset(buf, 0, 8);
+	memcpy(buf + 8, iname->name + iname->len - 16, 16);
+	oname->name[0] = '_';
+	ret = digest_encode(buf, 24, oname->name+1);
+	oname->len = ret + 1;
+	return ret + 1;
 }
 
 int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+			   struct dx_hash_info *hinfo,
 			   const struct ext4_dir_entry_2 *de,
 			   struct ext4_str *oname)
 {
 	struct ext4_str iname = {.name = (unsigned char *) de->name,
 				 .len = de->name_len };
 
-	return _ext4_fname_disk_to_usr(ctx, &iname, oname);
+	return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
 }
 
 
@@ -640,10 +618,11 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
 			    const struct qstr *iname,
 			    struct dx_hash_info *hinfo)
 {
-	struct ext4_str tmp, tmp2;
+	struct ext4_str tmp;
 	int ret = 0;
+	char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
 
-	if (!ctx || !ctx->has_valid_key ||
+	if (!ctx ||
 	    ((iname->name[0] == '.') &&
 	     ((iname->len == 1) ||
 	      ((iname->name[1] == '.') && (iname->len == 2))))) {
@@ -651,59 +630,90 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
 		return 0;
 	}
 
+	if (!ctx->has_valid_key && iname->name[0] == '_') {
+		if (iname->len != 33)
+			return -ENOENT;
+		ret = digest_decode(iname->name+1, iname->len, buf);
+		if (ret != 24)
+			return -ENOENT;
+		memcpy(&hinfo->hash, buf, 4);
+		memcpy(&hinfo->minor_hash, buf + 4, 4);
+		return 0;
+	}
+
+	if (!ctx->has_valid_key && iname->name[0] != '_') {
+		if (iname->len > 43)
+			return -ENOENT;
+		ret = digest_decode(iname->name, iname->len, buf);
+		ext4fs_dirhash(buf, ret, hinfo);
+		return 0;
+	}
+
 	/* First encrypt the plaintext name */
 	ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
 	if (ret < 0)
 		return ret;
 
 	ret = ext4_fname_encrypt(ctx, iname, &tmp);
-	if (ret < 0)
-		goto out;
-
-	tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
-	tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
-	if (tmp2.name == NULL) {
-		ret = -ENOMEM;
-		goto out;
+	if (ret >= 0) {
+		ext4fs_dirhash(tmp.name, tmp.len, hinfo);
+		ret = 0;
 	}
 
-	ret = ext4_fname_hash(ctx, &tmp, &tmp2);
-	if (ret > 0)
-		ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
-	ext4_fname_crypto_free_buffer(&tmp2);
-out:
 	ext4_fname_crypto_free_buffer(&tmp);
 	return ret;
 }
 
-/**
- * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string
- */
-int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
-			    const struct ext4_dir_entry_2 *de,
-			    struct dx_hash_info *hinfo)
+int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
+		     int len, const char * const name,
+		     struct ext4_dir_entry_2 *de)
 {
-	struct ext4_str iname = {.name = (unsigned char *) de->name,
-				 .len = de->name_len};
-	struct ext4_str tmp;
-	int ret;
+	int ret = -ENOENT;
+	int bigname = (*name == '_');
 
-	if (!ctx ||
-	    ((iname.name[0] == '.') &&
-	     ((iname.len == 1) ||
-	      ((iname.name[1] == '.') && (iname.len == 2))))) {
-		ext4fs_dirhash(iname.name, iname.len, hinfo);
-		return 0;
+	if (ctx->has_valid_key) {
+		if (cstr->name == NULL) {
+			struct qstr istr;
+
+			ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
+			if (ret < 0)
+				goto errout;
+			istr.name = name;
+			istr.len = len;
+			ret = ext4_fname_encrypt(ctx, &istr, cstr);
+			if (ret < 0)
+				goto errout;
+		}
+	} else {
+		if (cstr->name == NULL) {
+			cstr->name = kmalloc(32, GFP_KERNEL);
+			if (cstr->name == NULL)
+				return -ENOMEM;
+			if ((bigname && (len != 33)) ||
+			    (!bigname && (len > 43)))
+				goto errout;
+			ret = digest_decode(name+bigname, len-bigname,
+					    cstr->name);
+			if (ret < 0) {
+				ret = -ENOENT;
+				goto errout;
+			}
+			cstr->len = ret;
+		}
+		if (bigname) {
+			if (de->name_len < 16)
+				return 0;
+			ret = memcmp(de->name + de->name_len - 16,
+				     cstr->name + 8, 16);
+			return (ret == 0) ? 1 : 0;
+		}
 	}
-
-	tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
-	tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL);
-	if (tmp.name == NULL)
-		return -ENOMEM;
-
-	ret = ext4_fname_hash(ctx, &iname, &tmp);
-	if (ret > 0)
-		ext4fs_dirhash(tmp.name, tmp.len, hinfo);
-	ext4_fname_crypto_free_buffer(&tmp);
+	if (de->name_len != cstr->len)
+		return 0;
+	ret = memcmp(de->name, cstr->name, cstr->len);
+	return (ret == 0) ? 1 : 0;
+errout:
+	kfree(cstr->name);
+	cstr->name = NULL;
 	return ret;
 }
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index c8392af8abbb..52170d0b7c40 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -110,6 +110,7 @@ int ext4_generate_encryption_key(struct inode *inode)
 	}
 	res = 0;
 
+	ei->i_crypt_policy_flags = ctx.flags;
 	if (S_ISREG(inode->i_mode))
 		crypt_key->mode = ctx.contents_encryption_mode;
 	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 30eaf9e9864a..a6d6291aea16 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -37,6 +37,8 @@ static int ext4_is_encryption_context_consistent_with_policy(
 		return 0;
 	return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
 			EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+		(ctx.flags ==
+		 policy->flags) &&
 		(ctx.contents_encryption_mode ==
 		 policy->contents_encryption_mode) &&
 		(ctx.filenames_encryption_mode ==
@@ -56,25 +58,25 @@ static int ext4_create_encryption_context_from_policy(
 		printk(KERN_WARNING
 		       "%s: Invalid contents encryption mode %d\n", __func__,
 			policy->contents_encryption_mode);
-		res = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 	if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
 		printk(KERN_WARNING
 		       "%s: Invalid filenames encryption mode %d\n", __func__,
 			policy->filenames_encryption_mode);
-		res = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
+	if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
+		return -EINVAL;
 	ctx.contents_encryption_mode = policy->contents_encryption_mode;
 	ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+	ctx.flags = policy->flags;
 	BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
 	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
 
 	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
 			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
 			     sizeof(ctx), 0);
-out:
 	if (!res)
 		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
 	return res;
@@ -115,6 +117,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
 	policy->version = 0;
 	policy->contents_encryption_mode = ctx.contents_encryption_mode;
 	policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+	policy->flags = ctx.flags;
 	memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
 	       EXT4_KEY_DESCRIPTOR_SIZE);
 	return 0;
@@ -176,6 +179,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
 				EXT4_ENCRYPTION_MODE_AES_256_XTS;
 			ctx.filenames_encryption_mode =
 				EXT4_ENCRYPTION_MODE_AES_256_CTS;
+			ctx.flags = 0;
 			memset(ctx.master_key_descriptor, 0x42,
 			       EXT4_KEY_DESCRIPTOR_SIZE);
 			res = 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 61db51a5ce4c..5665d82d2332 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -249,7 +249,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 				} else {
 					/* Directory is encrypted */
 					err = ext4_fname_disk_to_usr(enc_ctx,
-							de, &fname_crypto_str);
+						NULL, de, &fname_crypto_str);
 					if (err < 0)
 						goto errout;
 					if (!dir_emit(ctx,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ef267adce19a..009a0590b20f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -911,6 +911,7 @@ struct ext4_inode_info {
 
 	/* on-disk additional length */
 	__u16 i_extra_isize;
+	char i_crypt_policy_flags;
 
 	/* Indicate the inline data space. */
 	u16 i_inline_off;
@@ -1066,12 +1067,6 @@ extern void ext4_set_bits(void *bm, int cur, int len);
 /* Metadata checksum algorithm codes */
 #define EXT4_CRC32C_CHKSUM		1
 
-/* Encryption algorithms */
-#define EXT4_ENCRYPTION_MODE_INVALID		0
-#define EXT4_ENCRYPTION_MODE_AES_256_XTS	1
-#define EXT4_ENCRYPTION_MODE_AES_256_GCM	2
-#define EXT4_ENCRYPTION_MODE_AES_256_CBC	3
-
 /*
  * Structure of the super block
  */
@@ -2093,9 +2088,11 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
 int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
 				   u32 ilen, struct ext4_str *crypto_str);
 int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+			    struct dx_hash_info *hinfo,
 			    const struct ext4_str *iname,
 			    struct ext4_str *oname);
 int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+			   struct dx_hash_info *hinfo,
 			   const struct ext4_dir_entry_2 *de,
 			   struct ext4_str *oname);
 int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
@@ -2104,11 +2101,12 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
 int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
 			   const struct qstr *iname,
 			   struct dx_hash_info *hinfo);
-int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
-			    const struct ext4_dir_entry_2 *de,
-			    struct dx_hash_info *hinfo);
 int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
 				      u32 namelen);
+int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
+		     int len, const char * const name,
+		     struct ext4_dir_entry_2 *de);
+
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index c2ba35a914b6..d75159c101ce 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -20,12 +20,20 @@ struct ext4_encryption_policy {
 	char version;
 	char contents_encryption_mode;
 	char filenames_encryption_mode;
+	char flags;
 	char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
 } __attribute__((__packed__));
 
 #define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
 #define EXT4_KEY_DERIVATION_NONCE_SIZE 16
 
+#define EXT4_POLICY_FLAGS_PAD_4		0x00
+#define EXT4_POLICY_FLAGS_PAD_8		0x01
+#define EXT4_POLICY_FLAGS_PAD_16	0x02
+#define EXT4_POLICY_FLAGS_PAD_32	0x03
+#define EXT4_POLICY_FLAGS_PAD_MASK	0x03
+#define EXT4_POLICY_FLAGS_VALID		0x03
+
 /**
  * Encryption context for inode
  *
@@ -41,7 +49,7 @@ struct ext4_encryption_context {
 	char format;
 	char contents_encryption_mode;
 	char filenames_encryption_mode;
-	char reserved;
+	char flags;
 	char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
 	char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
 } __attribute__((__packed__));
@@ -120,6 +128,7 @@ struct ext4_fname_crypto_ctx {
 	struct crypto_hash *htfm;
 	struct page *workpage;
 	struct ext4_encryption_key key;
+	unsigned flags : 8;
 	unsigned has_valid_key : 1;
 	unsigned ctfm_key_is_ready : 1;
 };
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 973816bfe4a9..d74e08029643 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4927,13 +4927,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (ret)
 		return ret;
 
-	/*
-	 * currently supporting (pre)allocate mode for extent-based
-	 * files _only_
-	 */
-	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-		return -EOPNOTSUPP;
-
 	if (mode & FALLOC_FL_COLLAPSE_RANGE)
 		return ext4_collapse_range(inode, offset, len);
 
@@ -4955,6 +4948,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 
 	mutex_lock(&inode->i_mutex);
 
+	/*
+	 * We only support preallocation for extent-based files only
+	 */
+	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
 	     offset + len > i_size_read(inode)) {
 		new_size = offset + len;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d33d5a6852b9..26724aeece73 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -703,6 +703,14 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 
 	BUG_ON(end < lblk);
 
+	if ((status & EXTENT_STATUS_DELAYED) &&
+	    (status & EXTENT_STATUS_WRITTEN)) {
+		ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
+				" delayed and written which can potentially "
+				" cause data loss.\n", lblk, len);
+		WARN_ON(1);
+	}
+
 	newes.es_lblk = lblk;
 	newes.es_len = len;
 	ext4_es_store_pblock_status(&newes, pblk, status);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbd0654a2675..55b187c3bac1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -531,6 +531,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+		    !(status & EXTENT_STATUS_WRITTEN) &&
 		    ext4_find_delalloc_range(inode, map->m_lblk,
 					     map->m_lblk + map->m_len - 1))
 			status |= EXTENT_STATUS_DELAYED;
@@ -635,6 +636,7 @@ found:
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+		    !(status & EXTENT_STATUS_WRITTEN) &&
 		    ext4_find_delalloc_range(inode, map->m_lblk,
 					     map->m_lblk + map->m_len - 1))
 			status |= EXTENT_STATUS_DELAYED;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7223b0b4bc38..814f3beb4369 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -640,7 +640,7 @@ static struct stats dx_show_leaf(struct inode *dir,
 						ext4_put_fname_crypto_ctx(&ctx);
 						ctx = NULL;
 					}
-					res = ext4_fname_disk_to_usr(ctx, de,
+					res = ext4_fname_disk_to_usr(ctx, NULL, de,
 							&fname_crypto_str);
 					if (res < 0) {
 						printk(KERN_WARNING "Error "
@@ -653,15 +653,8 @@ static struct stats dx_show_leaf(struct inode *dir,
 						name = fname_crypto_str.name;
 						len = fname_crypto_str.len;
 					}
-					res = ext4_fname_disk_to_hash(ctx, de,
-								      &h);
-					if (res < 0) {
-						printk(KERN_WARNING "Error "
-							"converting filename "
-							"from disk to htree"
-							"\n");
-						h.hash = 0xDEADBEEF;
-					}
+					ext4fs_dirhash(de->name, de->name_len,
+						       &h);
 					printk("%*.s:(E)%x.%u ", len, name,
 					       h.hash, (unsigned) ((char *) de
 								   - base));
@@ -1008,15 +1001,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 			/* silently ignore the rest of the block */
 			break;
 		}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-		err = ext4_fname_disk_to_hash(ctx, de, hinfo);
-		if (err < 0) {
-			count = err;
-			goto errout;
-		}
-#else
 		ext4fs_dirhash(de->name, de->name_len, hinfo);
-#endif
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
 		     (hinfo->minor_hash < start_minor_hash)))
@@ -1032,7 +1017,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 				   &tmp_str);
 		} else {
 			/* Directory is encrypted */
-			err = ext4_fname_disk_to_usr(ctx, de,
+			err = ext4_fname_disk_to_usr(ctx, hinfo, de,
 						     &fname_crypto_str);
 			if (err < 0) {
 				count = err;
@@ -1193,26 +1178,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
 	int count = 0;
 	char *base = (char *) de;
 	struct dx_hash_info h = *hinfo;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	struct ext4_fname_crypto_ctx *ctx = NULL;
-	int err;
-
-	ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-#endif
 
 	while ((char *) de < base + blocksize) {
 		if (de->name_len && de->inode) {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-			err = ext4_fname_disk_to_hash(ctx, de, &h);
-			if (err < 0) {
-				ext4_put_fname_crypto_ctx(&ctx);
-				return err;
-			}
-#else
 			ext4fs_dirhash(de->name, de->name_len, &h);
-#endif
 			map_tail--;
 			map_tail->hash = h.hash;
 			map_tail->offs = ((char *) de - base)>>2;
@@ -1223,9 +1192,6 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
 		de = ext4_next_entry(de, blocksize);
 	}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	ext4_put_fname_crypto_ctx(&ctx);
-#endif
 	return count;
 }
 
@@ -1287,16 +1253,8 @@ static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
 		return 0;
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-	if (ctx) {
-		/* Directory is encrypted */
-		res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
-		if (res < 0)
-			return res;
-		if (len != res)
-			return 0;
-		res = memcmp(name, fname_crypto_str->name, len);
-		return (res == 0) ? 1 : 0;
-	}
+	if (ctx)
+		return ext4_fname_match(ctx, fname_crypto_str, len, name, de);
 #endif
 	if (len != de->name_len)
 		return 0;
@@ -1324,16 +1282,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 	if (IS_ERR(ctx))
 		return -1;
 
-	if (ctx != NULL) {
-		/* Allocate buffer to hold maximum name length */
-		res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
-						     &fname_crypto_str);
-		if (res < 0) {
-			ext4_put_fname_crypto_ctx(&ctx);
-			return -1;
-		}
-	}
-
 	de = (struct ext4_dir_entry_2 *)search_buf;
 	dlimit = search_buf + buf_size;
 	while ((char *) de < dlimit) {
@@ -1872,14 +1820,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
 			return res;
 		}
 		reclen = EXT4_DIR_REC_LEN(res);
-
-		/* Allocate buffer to hold maximum name length */
-		res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
-						     &fname_crypto_str);
-		if (res < 0) {
-			ext4_put_fname_crypto_ctx(&ctx);
-			return -1;
-		}
 	}
 
 	de = (struct ext4_dir_entry_2 *)buf;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 8a8ec6293b19..cf0c472047e3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1432,12 +1432,15 @@ static int ext4_flex_group_add(struct super_block *sb,
 		goto exit;
 	/*
 	 * We will always be modifying at least the superblock and  GDT
-	 * block.  If we are adding a group past the last current GDT block,
+	 * blocks.  If we are adding a group past the last current GDT block,
 	 * we will also modify the inode and the dindirect block.  If we
 	 * are adding a group with superblock/GDT backups  we will also
 	 * modify each of the reserved GDT dindirect blocks.
 	 */
-	credit = flex_gd->count * 4 + reserved_gdb;
+	credit = 3;	/* sb, resize inode, resize inode dindirect */
+	/* GDT blocks */
+	credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
+	credit += reserved_gdb;	/* Reserved GDT dindirect blocks */
 	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 19f78f20975e..187b78920314 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -74,7 +74,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto errout;
 	}
 	pstr.name = paddr;
-	res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr);
+	res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
 	if (res < 0)
 		goto errout;
 	/* Null-terminate the name */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b91b0e10678e..1e1aae669fa8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	bool locked = false;
 	int ret;
 	long diff;
 
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 
 	diff = nr_pages_to_write(sbi, DATA, wbc);
 
+	if (!S_ISDIR(inode->i_mode)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
 	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
+	if (locked)
+		mutex_unlock(&sbi->writepages);
 
 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d8921cf2ba9a..8de34ab6d5b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
 	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
+	struct mutex writepages;		/* mutex for writepages() */
 	wait_queue_head_t cp_wait;
 
 	struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e3794edae42..658e8079aaf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -298,16 +298,14 @@ fail:
 
 static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	struct page *page;
+	struct page *page = page_follow_link_light(dentry, nd);
 
-	page = page_follow_link_light(dentry, nd);
-	if (IS_ERR(page))
+	if (IS_ERR_OR_NULL(page))
 		return page;
 
 	/* this is broken symlink case */
 	if (*nd_get_link(nd) == 0) {
-		kunmap(page);
-		page_cache_release(page);
+		page_put_link(dentry, nd, page);
 		return ERR_PTR(-ENOENT);
 	}
 	return page;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 160b88346b24..b2dd1b01f076 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1035,6 +1035,7 @@ try_onemore:
 	sbi->raw_super = raw_super;
 	sbi->raw_super_buf = raw_super_buf;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	clear_sbi_flag(sbi, SBI_POR_DOING);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f131fc23ffc4..fffca9517321 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	if (!kn)
 		goto err_out1;
 
-	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+	/*
+	 * If the ino of the sysfs entry created for a kmem cache gets
+	 * allocated from an ida layer, which is accounted to the memcg that
+	 * owns the cache, the memcg will get pinned forever. So do not account
+	 * ino ida allocations.
+	 */
+	ret = ida_simple_get(&root->ino_ida, 1, 0,
+			     GFP_KERNEL | __GFP_NOACCOUNT);
 	if (ret < 0)
 		goto err_out2;
 	kn->ino = ret;
diff --git a/fs/namei.c b/fs/namei.c
index 4a8d998b7274..fe30d3be43a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1415,6 +1415,7 @@ static int lookup_fast(struct nameidata *nd,
 	 */
 	if (nd->flags & LOOKUP_RCU) {
 		unsigned seq;
+		bool negative;
 		dentry = __d_lookup_rcu(parent, &nd->last, &seq);
 		if (!dentry)
 			goto unlazy;
@@ -1424,8 +1425,11 @@ static int lookup_fast(struct nameidata *nd,
 		 * the dentry name information from lookup.
 		 */
 		*inode = dentry->d_inode;
+		negative = d_is_negative(dentry);
 		if (read_seqcount_retry(&dentry->d_seq, seq))
 			return -ECHILD;
+		if (negative)
+			return -ENOENT;
 
 		/*
 		 * This sequence count validates that the parent had no
@@ -1472,6 +1476,10 @@ unlazy:
 		goto need_lookup;
 	}
 
+	if (unlikely(d_is_negative(dentry))) {
+		dput(dentry);
+		return -ENOENT;
+	}
 	path->mnt = mnt;
 	path->dentry = dentry;
 	err = follow_managed(path, nd->flags);
@@ -1583,10 +1591,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 			goto out_err;
 
 		inode = path->dentry->d_inode;
+		err = -ENOENT;
+		if (d_is_negative(path->dentry))
+			goto out_path_put;
 	}
-	err = -ENOENT;
-	if (d_is_negative(path->dentry))
-		goto out_path_put;
 
 	if (should_follow_link(path->dentry, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
@@ -3036,14 +3044,13 @@ retry_lookup:
 
 	BUG_ON(nd->flags & LOOKUP_RCU);
 	inode = path->dentry->d_inode;
-finish_lookup:
-	/* we _can_ be in RCU mode here */
 	error = -ENOENT;
 	if (d_is_negative(path->dentry)) {
 		path_to_nameidata(path, nd);
 		goto out;
 	}
-
+finish_lookup:
+	/* we _can_ be in RCU mode here */
 	if (should_follow_link(path->dentry, !symlink_ok)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(nd->path.mnt != path->mnt ||
@@ -3226,7 +3233,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
 
 	if (unlikely(file->f_flags & __O_TMPFILE)) {
 		error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
-		goto out;
+		goto out2;
 	}
 
 	error = path_init(dfd, pathname, flags, nd);
@@ -3256,6 +3263,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
 	}
 out:
 	path_cleanup(nd);
+out2:
 	if (!(opened & FILE_OPENED)) {
 		BUG_ON(!error);
 		put_filp(file);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1f4f9dac6e5a..1b9e11167bae 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3179,6 +3179,12 @@ bool fs_fully_visible(struct file_system_type *type)
 		if (mnt->mnt.mnt_sb->s_type != type)
 			continue;
 
+		/* This mount is not fully visible if it's root directory
+		 * is not the root directory of the filesystem.
+		 */
+		if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+			continue;
+
 		/* This mount is not fully visible if there are any child mounts
 		 * that cover anything except for empty directories.
 		 */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647bf195d..cdefaa331a07 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
 }
 
 const struct nfsd4_layout_ops bl_layout_ops = {
+	/*
+	 * Pretend that we send notification to the client.  This is a blatant
+	 * lie to force recent Linux clients to cache our device IDs.
+	 * We rarely ever change the device ID, so the harm of leaking deviceids
+	 * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
+	 * in this regard, but I filed errata 4119 for this a while ago, and
+	 * hopefully the Linux client will eventually start caching deviceids
+	 * without this again.
+	 */
+	.notify_types		=
+			NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
 	.proc_getdeviceinfo	= nfsd4_block_proc_getdeviceinfo,
 	.encode_getdeviceinfo	= nfsd4_block_encode_getdeviceinfo,
 	.proc_layoutget		= nfsd4_block_proc_layoutget,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 58277859a467..5694cfb7a47b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
 }
 
 static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
-			       enum nfsstat4 *status)
+			       int *status)
 {
 	__be32 *p;
 	u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
 	op = be32_to_cpup(p++);
 	if (unlikely(op != expected))
 		goto out_unexpected;
-	*status = be32_to_cpup(p);
+	*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
 static int decode_cb_sequence4res(struct xdr_stream *xdr,
 				  struct nfsd4_callback *cb)
 {
-	enum nfsstat4 nfserr;
 	int status;
 
 	if (cb->cb_minorversion == 0)
 		return 0;
 
-	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		goto out_default;
-	status = decode_cb_sequence4resok(xdr, cb);
-out:
-	return status;
-out_default:
-	return nfs_cb_stat_to_errno(nfserr);
+	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
+	if (unlikely(status || cb->cb_status))
+		return status;
+
+	return decode_cb_sequence4resok(xdr, cb);
 }
 
 /*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 				  struct nfsd4_callback *cb)
 {
 	struct nfs4_cb_compound_hdr hdr;
-	enum nfsstat4 nfserr;
 	int status;
 
 	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
-		goto out;
+		return status;
 
 	if (cb != NULL) {
 		status = decode_cb_sequence4res(xdr, cb);
-		if (unlikely(status))
-			goto out;
+		if (unlikely(status || cb->cb_status))
+			return status;
 	}
 
-	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		status = nfs_cb_stat_to_errno(nfserr);
-out:
-	return status;
+	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
 #ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 				  struct nfsd4_callback *cb)
 {
 	struct nfs4_cb_compound_hdr hdr;
-	enum nfsstat4 nfserr;
 	int status;
 
 	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
-		goto out;
+		return status;
+
 	if (cb) {
 		status = decode_cb_sequence4res(xdr, cb);
-		if (unlikely(status))
-			goto out;
+		if (unlikely(status || cb->cb_status))
+			return status;
 	}
-	status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		status = nfs_cb_stat_to_errno(nfserr);
-out:
-	return status;
+	return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
 
@@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 		if (!nfsd41_cb_get_slot(clp, task))
 			return;
 	}
-	spin_lock(&clp->cl_lock);
-	if (list_empty(&cb->cb_per_client)) {
-		/* This is the first call, not a restart */
-		cb->cb_done = false;
-		list_add(&cb->cb_per_client, &clp->cl_callbacks);
-	}
-	spin_unlock(&clp->cl_lock);
 	rpc_call_start(task);
 }
 
@@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 
 	if (clp->cl_minorversion) {
 		/* No need for lock, access serialized in nfsd4_cb_prepare */
-		++clp->cl_cb_session->se_cb_seq_nr;
+		if (!task->tk_status)
+			++clp->cl_cb_session->se_cb_seq_nr;
 		clear_bit(0, &clp->cl_cb_slot_busy);
 		rpc_wake_up_next(&clp->cl_cb_waitq);
 		dprintk("%s: freed slot, new seqid=%d\n", __func__,
 			clp->cl_cb_session->se_cb_seq_nr);
 	}
 
-	if (clp->cl_cb_client != task->tk_client) {
-		/* We're shutting down or changing cl_cb_client; leave
-		 * it to nfsd4_process_cb_update to restart the call if
-		 * necessary. */
+	/*
+	 * If the backchannel connection was shut down while this
+	 * task was queued, we need to resubmit it after setting up
+	 * a new backchannel connection.
+	 *
+	 * Note that if we lost our callback connection permanently
+	 * the submission code will error out, so we don't need to
+	 * handle that case here.
+	 */
+	if (task->tk_flags & RPC_TASK_KILLED) {
+		task->tk_status = 0;
+		cb->cb_need_restart = true;
 		return;
 	}
 
-	if (cb->cb_done)
-		return;
+	if (cb->cb_status) {
+		WARN_ON_ONCE(task->tk_status);
+		task->tk_status = cb->cb_status;
+	}
 
 	switch (cb->cb_ops->done(cb, task)) {
 	case 0:
@@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 	default:
 		BUG();
 	}
-	cb->cb_done = true;
 }
 
 static void nfsd4_cb_release(void *calldata)
 {
 	struct nfsd4_callback *cb = calldata;
-	struct nfs4_client *clp = cb->cb_clp;
-
-	if (cb->cb_done) {
-		spin_lock(&clp->cl_lock);
-		list_del(&cb->cb_per_client);
-		spin_unlock(&clp->cl_lock);
 
+	if (cb->cb_need_restart)
+		nfsd4_run_cb(cb);
+	else
 		cb->cb_ops->release(cb);
-	}
+
 }
 
 static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
 		nfsd4_mark_cb_down(clp, err);
 		return;
 	}
-	/* Yay, the callback channel's back! Restart any callbacks: */
-	list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
-		queue_work(callback_wq, &cb->cb_work);
 }
 
 static void
@@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
 	struct nfs4_client *clp = cb->cb_clp;
 	struct rpc_clnt *clnt;
 
-	if (cb->cb_ops && cb->cb_ops->prepare)
-		cb->cb_ops->prepare(cb);
+	if (cb->cb_need_restart) {
+		cb->cb_need_restart = false;
+	} else {
+		if (cb->cb_ops && cb->cb_ops->prepare)
+			cb->cb_ops->prepare(cb);
+	}
 
 	if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
 		nfsd4_process_cb_update(cb);
@@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
 			cb->cb_ops->release(cb);
 		return;
 	}
+
+	/*
+	 * Don't send probe messages for 4.1 or later.
+	 */
+	if (!cb->cb_ops && clp->cl_minorversion) {
+		clp->cl_cb_state = NFSD4_CB_UP;
+		return;
+	}
+
 	cb->cb_msg.rpc_cred = clp->cl_cb_cred;
 	rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 			cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
@@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 	cb->cb_msg.rpc_resp = cb;
 	cb->cb_ops = ops;
 	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
-	INIT_LIST_HEAD(&cb->cb_per_client);
-	cb->cb_done = true;
+	cb->cb_status = 0;
+	cb->cb_need_restart = false;
 }
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 38f2d7abe3a7..039f9c8a95e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab;
 static struct kmem_cache *file_slab;
 static struct kmem_cache *stateid_slab;
 static struct kmem_cache *deleg_slab;
+static struct kmem_cache *odstate_slab;
 
 static void free_session(struct nfsd4_session *);
 
@@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi)
 	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
 		hlist_del_rcu(&fi->fi_hash);
 		spin_unlock(&state_lock);
+		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
 		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
 		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
 	}
@@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
 		__nfs4_file_put_access(fp, O_RDONLY);
 }
 
+/*
+ * Allocate a new open/delegation state counter. This is needed for
+ * pNFS for proper return on close semantics.
+ *
+ * Note that we only allocate it for pNFS-enabled exports, otherwise
+ * all pointers to struct nfs4_clnt_odstate are always NULL.
+ */
+static struct nfs4_clnt_odstate *
+alloc_clnt_odstate(struct nfs4_client *clp)
+{
+	struct nfs4_clnt_odstate *co;
+
+	co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
+	if (co) {
+		co->co_client = clp;
+		atomic_set(&co->co_odcount, 1);
+	}
+	return co;
+}
+
+static void
+hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
+{
+	struct nfs4_file *fp = co->co_file;
+
+	lockdep_assert_held(&fp->fi_lock);
+	list_add(&co->co_perfile, &fp->fi_clnt_odstate);
+}
+
+static inline void
+get_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+	if (co)
+		atomic_inc(&co->co_odcount);
+}
+
+static void
+put_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+	struct nfs4_file *fp;
+
+	if (!co)
+		return;
+
+	fp = co->co_file;
+	if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+		list_del(&co->co_perfile);
+		spin_unlock(&fp->fi_lock);
+
+		nfsd4_return_all_file_layouts(co->co_client, fp);
+		kmem_cache_free(odstate_slab, co);
+	}
+}
+
+static struct nfs4_clnt_odstate *
+find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
+{
+	struct nfs4_clnt_odstate *co;
+	struct nfs4_client *cl;
+
+	if (!new)
+		return NULL;
+
+	cl = new->co_client;
+
+	spin_lock(&fp->fi_lock);
+	list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
+		if (co->co_client == cl) {
+			get_clnt_odstate(co);
+			goto out;
+		}
+	}
+	co = new;
+	co->co_file = fp;
+	hash_clnt_odstate_locked(new);
+out:
+	spin_unlock(&fp->fi_lock);
+	return co;
+}
+
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
 					 struct kmem_cache *slab)
 {
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
 }
 
 static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
+alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+		 struct nfs4_clnt_odstate *odstate)
 {
 	struct nfs4_delegation *dp;
 	long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
 	INIT_LIST_HEAD(&dp->dl_perfile);
 	INIT_LIST_HEAD(&dp->dl_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
+	dp->dl_clnt_odstate = odstate;
+	get_clnt_odstate(odstate);
 	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
 	dp->dl_retries = 1;
 	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
 	spin_lock(&state_lock);
 	unhash_delegation_locked(dp);
 	spin_unlock(&state_lock);
+	put_clnt_odstate(dp->dl_clnt_odstate);
 	nfs4_put_deleg_lease(dp->dl_stid.sc_file);
 	nfs4_put_stid(&dp->dl_stid);
 }
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
 	WARN_ON(!list_empty(&dp->dl_recall_lru));
 
+	put_clnt_odstate(dp->dl_clnt_odstate);
 	nfs4_put_deleg_lease(dp->dl_stid.sc_file);
 
 	if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
 {
 	struct nfs4_ol_stateid *stp = openlockstateid(stid);
 
+	put_clnt_odstate(stp->st_clnt_odstate);
 	release_all_access(stp);
 	if (stp->st_stateowner)
 		nfs4_put_stateowner(stp->st_stateowner);
@@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 	INIT_LIST_HEAD(&clp->cl_openowners);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
-	INIT_LIST_HEAD(&clp->cl_callbacks);
 	INIT_LIST_HEAD(&clp->cl_revoked);
 #ifdef CONFIG_NFSD_PNFS
 	INIT_LIST_HEAD(&clp->cl_lo_states);
@@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp)
 	while (!list_empty(&reaplist)) {
 		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
 		list_del_init(&dp->dl_recall_lru);
+		put_clnt_odstate(dp->dl_clnt_odstate);
 		nfs4_put_deleg_lease(dp->dl_stid.sc_file);
 		nfs4_put_stid(&dp->dl_stid);
 	}
@@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 	spin_lock_init(&fp->fi_lock);
 	INIT_LIST_HEAD(&fp->fi_stateids);
 	INIT_LIST_HEAD(&fp->fi_delegations);
+	INIT_LIST_HEAD(&fp->fi_clnt_odstate);
 	fh_copy_shallow(&fp->fi_fhandle, fh);
 	fp->fi_deleg_file = NULL;
 	fp->fi_had_conflict = false;
@@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 void
 nfsd4_free_slabs(void)
 {
+	kmem_cache_destroy(odstate_slab);
 	kmem_cache_destroy(openowner_slab);
 	kmem_cache_destroy(lockowner_slab);
 	kmem_cache_destroy(file_slab);
@@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void)
 			sizeof(struct nfs4_delegation), 0, 0, NULL);
 	if (deleg_slab == NULL)
 		goto out_free_stateid_slab;
+	odstate_slab = kmem_cache_create("nfsd4_odstate",
+			sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+	if (odstate_slab == NULL)
+		goto out_free_deleg_slab;
 	return 0;
 
+out_free_deleg_slab:
+	kmem_cache_destroy(deleg_slab);
 out_free_stateid_slab:
 	kmem_cache_destroy(stateid_slab);
 out_free_file_slab:
@@ -3581,6 +3677,14 @@ alloc_stateid:
 	open->op_stp = nfs4_alloc_open_stateid(clp);
 	if (!open->op_stp)
 		return nfserr_jukebox;
+
+	if (nfsd4_has_session(cstate) &&
+	    (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
+		open->op_odstate = alloc_clnt_odstate(clp);
+		if (!open->op_odstate)
+			return nfserr_jukebox;
+	}
+
 	return nfs_ok;
 }
 
@@ -3869,7 +3973,7 @@ out_fput:
 
 static struct nfs4_delegation *
 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
-		    struct nfs4_file *fp)
+		    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
 {
 	int status;
 	struct nfs4_delegation *dp;
@@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 	if (fp->fi_had_conflict)
 		return ERR_PTR(-EAGAIN);
 
-	dp = alloc_init_deleg(clp, fh);
+	dp = alloc_init_deleg(clp, fh, odstate);
 	if (!dp)
 		return ERR_PTR(-ENOMEM);
 
@@ -3903,6 +4007,7 @@ out_unlock:
 	spin_unlock(&state_lock);
 out:
 	if (status) {
+		put_clnt_odstate(dp->dl_clnt_odstate);
 		nfs4_put_stid(&dp->dl_stid);
 		return ERR_PTR(status);
 	}
@@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
 		default:
 			goto out_no_deleg;
 	}
-	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
+	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
 	if (IS_ERR(dp))
 		goto out_no_deleg;
 
@@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 			release_open_stateid(stp);
 			goto out;
 		}
+
+		stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
+							open->op_odstate);
+		if (stp->st_clnt_odstate == open->op_odstate)
+			open->op_odstate = NULL;
 	}
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 		kmem_cache_free(file_slab, open->op_file);
 	if (open->op_stp)
 		nfs4_put_stid(&open->op_stp->st_stid);
+	if (open->op_odstate)
+		kmem_cache_free(odstate_slab, open->op_odstate);
 }
 
 __be32
@@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
 	return nfserr_old_stateid;
 }
 
+static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
+{
+	if (ols->st_stateowner->so_is_open_owner &&
+	    !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+		return nfserr_bad_stateid;
+	return nfs_ok;
+}
+
 static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 {
 	struct nfs4_stid *s;
-	struct nfs4_ol_stateid *ols;
 	__be32 status = nfserr_bad_stateid;
 
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 		break;
 	case NFS4_OPEN_STID:
 	case NFS4_LOCK_STID:
-		ols = openlockstateid(s);
-		if (ols->st_stateowner->so_is_open_owner
-	    			&& !(openowner(ols->st_stateowner)->oo_flags
-						& NFS4_OO_CONFIRMED))
-			status = nfserr_bad_stateid;
-		else
-			status = nfs_ok;
+		status = nfsd4_check_openowner_confirmed(openlockstateid(s));
 		break;
 	default:
 		printk("unknown stateid type %x\n", s->sc_type);
@@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
 		status = nfs4_check_fh(current_fh, stp);
 		if (status)
 			goto out;
-		if (stp->st_stateowner->so_is_open_owner
-		    && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+		status = nfsd4_check_openowner_confirmed(stp);
+		if (status)
 			goto out;
 		status = nfs4_check_openmode(stp, flags);
 		if (status)
@@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
-	nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
-				      stp->st_stid.sc_file);
-
 	nfsd4_close_open_stateid(stp);
 
 	/* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net)
 	list_for_each_safe(pos, next, &reaplist) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		list_del_init(&dp->dl_recall_lru);
+		put_clnt_odstate(dp->dl_clnt_odstate);
 		nfs4_put_deleg_lease(dp->dl_stid.sc_file);
 		nfs4_put_stid(&dp->dl_stid);
 	}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4f3bfeb11766..dbc4f85a5008 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,12 +63,12 @@ typedef struct {
 
 struct nfsd4_callback {
 	struct nfs4_client *cb_clp;
-	struct list_head cb_per_client;
 	u32 cb_minorversion;
 	struct rpc_message cb_msg;
 	struct nfsd4_callback_ops *cb_ops;
 	struct work_struct cb_work;
-	bool cb_done;
+	int cb_status;
+	bool cb_need_restart;
 };
 
 struct nfsd4_callback_ops {
@@ -126,6 +126,7 @@ struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
 	struct list_head	dl_recall_lru;  /* delegation recalled */
+	struct nfs4_clnt_odstate *dl_clnt_odstate;
 	u32			dl_type;
 	time_t			dl_time;
 /* For recall: */
@@ -332,7 +333,6 @@ struct nfs4_client {
 	int			cl_cb_state;
 	struct nfsd4_callback	cl_cb_null;
 	struct nfsd4_session	*cl_cb_session;
-	struct list_head	cl_callbacks; /* list of in-progress callbacks */
 
 	/* for all client information that callback code might need: */
 	spinlock_t		cl_lock;
@@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
 }
 
 /*
+ * Per-client state indicating no. of opens and outstanding delegations
+ * on a file from a particular client.'od' stands for 'open & delegation'
+ */
+struct nfs4_clnt_odstate {
+	struct nfs4_client	*co_client;
+	struct nfs4_file	*co_file;
+	struct list_head	co_perfile;
+	atomic_t		co_odcount;
+};
+
+/*
  * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
  *
  * These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +496,7 @@ struct nfs4_file {
 		struct list_head	fi_delegations;
 		struct rcu_head		fi_rcu;
 	};
+	struct list_head	fi_clnt_odstate;
 	/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
 	struct file *		fi_fds[3];
 	/*
@@ -526,6 +538,7 @@ struct nfs4_ol_stateid {
 	struct list_head              st_perstateowner;
 	struct list_head              st_locks;
 	struct nfs4_stateowner      * st_stateowner;
+	struct nfs4_clnt_odstate    * st_clnt_odstate;
 	unsigned char                 st_access_bmap;
 	unsigned char                 st_deny_bmap;
 	struct nfs4_ol_stateid         * st_openstp;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f982ae84f0cd..2f8c092be2b3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -247,6 +247,7 @@ struct nfsd4_open {
 	struct nfs4_openowner *op_openowner; /* used during processing */
 	struct nfs4_file *op_file;          /* used during processing */
 	struct nfs4_ol_stateid *op_stp;	    /* used during processing */
+	struct nfs4_clnt_odstate *op_odstate; /* used during processing */
 	struct nfs4_acl *op_acl;
 	struct xdr_netobj op_label;
 };
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 059f37137f9a..919fd5bb14a8 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 	nchildren = nilfs_btree_node_get_nchildren(node);
 
 	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
-		     level > NILFS_BTREE_LEVEL_MAX ||
+		     level >= NILFS_BTREE_LEVEL_MAX ||
 		     nchildren < 0 ||
 		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 		pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..fdf4b41d0609 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -757,6 +757,19 @@ lookup:
 	if (tmpres) {
 		spin_unlock(&dlm->spinlock);
 		spin_lock(&tmpres->spinlock);
+
+		/*
+		 * Right after dlm spinlock was released, dlm_thread could have
+		 * purged the lockres. Check if lockres got unhashed. If so
+		 * start over.
+		 */
+		if (hlist_unhashed(&tmpres->hash_node)) {
+			spin_unlock(&tmpres->spinlock);
+			dlm_lockres_put(tmpres);
+			tmpres = NULL;
+			goto lookup;
+		}
+
 		/* Wait on the thread that is mastering the resource */
 		if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
 			__dlm_wait_on_lockres(tmpres);
diff --git a/fs/splice.c b/fs/splice.c
index 476024bb6546..bfe62ae40f40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	long ret, bytes;
 	umode_t i_mode;
 	size_t len;
-	int i, flags;
+	int i, flags, more;
 
 	/*
 	 * We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	 * Don't block on output, we have to drain the direct pipe.
 	 */
 	sd->flags &= ~SPLICE_F_NONBLOCK;
+	more = sd->flags & SPLICE_F_MORE;
 
 	while (len) {
 		size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 		sd->total_len = read_len;
 
 		/*
+		 * If more data is pending, set SPLICE_F_MORE
+		 * If this is the last data and SPLICE_F_MORE was not set
+		 * initially, clears it.
+		 */
+		if (read_len < len)
+			sd->flags |= SPLICE_F_MORE;
+		else if (!more)
+			sd->flags &= ~SPLICE_F_MORE;
+		/*
 		 * NOTE: nonblocking mode only applies to the input. We
 		 * must not do the output in nonblocking mode as then we
 		 * could get stuck data in the internal pipe: