219 files changed, 10290 insertions, 5087 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 7b623e9fc1b0..8493a3f0c4b1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -264,6 +264,7 @@ source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/erofs/Kconfig"
+source "fs/vboxsf/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index 98be354fdb61..96520ba7bd64 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -133,3 +133,4 @@ obj-$(CONFIG_CEPH_FS)		+= ceph/
 obj-$(CONFIG_PSTORE)		+= pstore/
 obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
+obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7f8a9b3137bf..dda7a9a66848 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -38,13 +38,13 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int afs_show_devname(struct seq_file *m, struct dentry *root);
 static int afs_show_options(struct seq_file *m, struct dentry *root);
 static int afs_init_fs_context(struct fs_context *fc);
-static const struct fs_parameter_description afs_fs_parameters;
+static const struct fs_parameter_spec afs_fs_parameters[];
 
 struct file_system_type afs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "afs",
 	.init_fs_context	= afs_init_fs_context,
-	.parameters		= &afs_fs_parameters,
+	.parameters		= afs_fs_parameters,
 	.kill_sb		= afs_kill_super,
 	.fs_flags		= FS_RENAME_DOES_D_MOVE,
 };
@@ -73,28 +73,22 @@ enum afs_param {
 	Opt_source,
 };
 
-static const struct fs_parameter_spec afs_param_specs[] = {
-	fsparam_flag  ("autocell",	Opt_autocell),
-	fsparam_flag  ("dyn",		Opt_dyn),
-	fsparam_enum  ("flock",		Opt_flock),
-	fsparam_string("source",	Opt_source),
+static const struct constant_table afs_param_flock[] = {
+	{"local",	afs_flock_mode_local },
+	{"openafs",	afs_flock_mode_openafs },
+	{"strict",	afs_flock_mode_strict },
+	{"write",	afs_flock_mode_write },
 	{}
 };
 
-static const struct fs_parameter_enum afs_param_enums[] = {
-	{ Opt_flock,	"local",	afs_flock_mode_local },
-	{ Opt_flock,	"openafs",	afs_flock_mode_openafs },
-	{ Opt_flock,	"strict",	afs_flock_mode_strict },
-	{ Opt_flock,	"write",	afs_flock_mode_write },
+static const struct fs_parameter_spec afs_fs_parameters[] = {
+	fsparam_flag  ("autocell",	Opt_autocell),
+	fsparam_flag  ("dyn",		Opt_dyn),
+	fsparam_enum  ("flock",		Opt_flock, afs_param_flock),
+	fsparam_string("source",	Opt_source),
 	{}
 };
 
-static const struct fs_parameter_description afs_fs_parameters = {
-	.name		= "kAFS",
-	.specs		= afs_param_specs,
-	.enums		= afs_param_enums,
-};
-
 /*
  * initialise the filesystem
  */
@@ -323,7 +317,7 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	struct afs_fs_context *ctx = fc->fs_private;
 	int opt;
 
-	opt = fs_parse(fc, &afs_fs_parameters, param, &result);
+	opt = fs_parse(fc, afs_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
diff --git a/fs/aio.c b/fs/aio.c
index a9fbad2ce5e6..5f3d3d814928 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 	return 0;
 }
 
+static void aio_poll_put_work(struct work_struct *work)
+{
+	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+
+	iocb_put(iocb);
+}
+
 static void aio_poll_complete_work(struct work_struct *work)
 {
 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	list_del_init(&req->wait.entry);
 
 	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+		struct kioctx *ctx = iocb->ki_ctx;
+
 		/*
 		 * Try to complete the iocb inline if we can. Use
 		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
@@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		list_del(&iocb->ki_list);
 		iocb->ki_res.res = mangle_poll(mask);
 		req->done = true;
-		spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
-		iocb_put(iocb);
+		if (iocb->ki_eventfd && eventfd_signal_count()) {
+			iocb = NULL;
+			INIT_WORK(&req->work, aio_poll_put_work);
+			schedule_work(&req->work);
+		}
+		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+		if (iocb)
+			iocb_put(iocb);
 	} else {
 		schedule_work(&req->work);
 	}
diff --git a/fs/attr.c b/fs/attr.c
index df28035aa23e..b4bbdbd4c8ca 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -183,18 +183,12 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 		inode->i_uid = attr->ia_uid;
 	if (ia_valid & ATTR_GID)
 		inode->i_gid = attr->ia_gid;
-	if (ia_valid & ATTR_ATIME) {
-		inode->i_atime = timestamp_truncate(attr->ia_atime,
-						  inode);
-	}
-	if (ia_valid & ATTR_MTIME) {
-		inode->i_mtime = timestamp_truncate(attr->ia_mtime,
-						  inode);
-	}
-	if (ia_valid & ATTR_CTIME) {
-		inode->i_ctime = timestamp_truncate(attr->ia_ctime,
-						  inode);
-	}
+	if (ia_valid & ATTR_ATIME)
+		inode->i_atime = attr->ia_atime;
+	if (ia_valid & ATTR_MTIME)
+		inode->i_mtime = attr->ia_mtime;
+	if (ia_valid & ATTR_CTIME)
+		inode->i_ctime = attr->ia_ctime;
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 
@@ -268,8 +262,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
 	attr->ia_ctime = now;
 	if (!(ia_valid & ATTR_ATIME_SET))
 		attr->ia_atime = now;
+	else
+		attr->ia_atime = timestamp_truncate(attr->ia_atime, inode);
 	if (!(ia_valid & ATTR_MTIME_SET))
 		attr->ia_mtime = now;
+	else
+		attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode);
+
 	if (ia_valid & ATTR_KILL_PRIV) {
 		error = security_inode_need_killpriv(dentry);
 		if (error < 0)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 14851584e245..404e050ce8ee 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1191,7 +1191,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
 {
 	struct btrfs_space_info *sinfo = cache->space_info;
 	u64 num_bytes;
-	u64 sinfo_used;
 	int ret = -ENOSPC;
 
 	spin_lock(&sinfo->lock);
@@ -1205,19 +1204,38 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
 
 	num_bytes = cache->length - cache->reserved - cache->pinned -
 		    cache->bytes_super - cache->used;
-	sinfo_used = btrfs_space_info_used(sinfo, true);
 
 	/*
-	 * sinfo_used + num_bytes should always <= sinfo->total_bytes.
-	 *
-	 * Here we make sure if we mark this bg RO, we still have enough
-	 * free space as buffer.
+	 * Data never overcommits, even in mixed mode, so do just the straight
+	 * check of left over space in how much we have allocated.
 	 */
-	if (sinfo_used + num_bytes <= sinfo->total_bytes) {
+	if (force) {
+		ret = 0;
+	} else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
+		u64 sinfo_used = btrfs_space_info_used(sinfo, true);
+
+		/*
+		 * Here we make sure if we mark this bg RO, we still have enough
+		 * free space as buffer.
+		 */
+		if (sinfo_used + num_bytes <= sinfo->total_bytes)
+			ret = 0;
+	} else {
+		/*
+		 * We overcommit metadata, so we need to do the
+		 * btrfs_can_overcommit check here, and we need to pass in
+		 * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
+		 * leeway to allow us to mark this block group as read only.
+		 */
+		if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
+					 BTRFS_RESERVE_NO_FLUSH))
+			ret = 0;
+	}
+
+	if (!ret) {
 		sinfo->bytes_readonly += num_bytes;
 		cache->ro++;
 		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
-		ret = 0;
 	}
 out:
 	spin_unlock(&cache->lock);
@@ -1225,9 +1243,6 @@ out:
 	if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
 		btrfs_info(cache->fs_info,
 			"unable to make block group %llu ro", cache->start);
-		btrfs_info(cache->fs_info,
-			"sinfo_used=%llu bg_num_bytes=%llu",
-			sinfo_used, num_bytes);
 		btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
 	}
 	return ret;
@@ -2225,7 +2240,7 @@ again:
 		}
 	}
 
-	ret = inc_block_group_ro(cache, !do_chunk_alloc);
+	ret = inc_block_group_ro(cache, 0);
 	if (!do_chunk_alloc)
 		goto unlock_out;
 	if (!ret)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 24658b5a5787..f2ec1a9bae28 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -326,12 +326,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			   struct seq_list *elem)
 {
 	write_lock(&fs_info->tree_mod_log_lock);
-	spin_lock(&fs_info->tree_mod_seq_lock);
 	if (!elem->seq) {
 		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
 		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
 	}
-	spin_unlock(&fs_info->tree_mod_seq_lock);
 	write_unlock(&fs_info->tree_mod_log_lock);
 
 	return elem->seq;
@@ -351,7 +349,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 	if (!seq_putting)
 		return;
 
-	spin_lock(&fs_info->tree_mod_seq_lock);
+	write_lock(&fs_info->tree_mod_log_lock);
 	list_del(&elem->list);
 	elem->seq = 0;
 
@@ -362,19 +360,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 				 * blocker with lower sequence number exists, we
 				 * cannot remove anything from the log
 				 */
-				spin_unlock(&fs_info->tree_mod_seq_lock);
+				write_unlock(&fs_info->tree_mod_log_lock);
 				return;
 			}
 			min_seq = cur_elem->seq;
 		}
 	}
-	spin_unlock(&fs_info->tree_mod_seq_lock);
 
 	/*
 	 * anything that's lower than the lowest existing (read: blocked)
 	 * sequence number can be removed from the tree.
 	 */
-	write_lock(&fs_info->tree_mod_log_lock);
 	tm_root = &fs_info->tree_mod_log;
 	for (node = rb_first(tm_root); node; node = next) {
 		next = rb_next(node);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f90b82050d2d..36df977b64d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -714,14 +714,12 @@ struct btrfs_fs_info {
 	atomic_t nr_delayed_iputs;
 	wait_queue_head_t delayed_iputs_wait;
 
-	/* this protects tree_mod_seq_list */
-	spinlock_t tree_mod_seq_lock;
 	atomic64_t tree_mod_seq;
-	struct list_head tree_mod_seq_list;
 
-	/* this protects tree_mod_log */
+	/* this protects tree_mod_log and tree_mod_seq_list */
 	rwlock_t tree_mod_log_lock;
 	struct rb_root tree_mod_log;
+	struct list_head tree_mod_seq_list;
 
 	atomic_t async_delalloc_pages;
 
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index df3bd880061d..dfdb7d4f8406 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 	if (head->is_data)
 		return;
 
-	spin_lock(&fs_info->tree_mod_seq_lock);
+	read_lock(&fs_info->tree_mod_log_lock);
 	if (!list_empty(&fs_info->tree_mod_seq_list)) {
 		struct seq_list *elem;
 
@@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 					struct seq_list, list);
 		seq = elem->seq;
 	}
-	spin_unlock(&fs_info->tree_mod_seq_lock);
+	read_unlock(&fs_info->tree_mod_log_lock);
 
 again:
 	for (node = rb_first_cached(&head->ref_tree); node;
@@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
 	struct seq_list *elem;
 	int ret = 0;
 
-	spin_lock(&fs_info->tree_mod_seq_lock);
+	read_lock(&fs_info->tree_mod_log_lock);
 	if (!list_empty(&fs_info->tree_mod_seq_list)) {
 		elem = list_first_entry(&fs_info->tree_mod_seq_list,
 					struct seq_list, list);
@@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
 		}
 	}
 
-	spin_unlock(&fs_info->tree_mod_seq_lock);
+	read_unlock(&fs_info->tree_mod_log_lock);
 	return ret;
 }
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index aea48d6ddc0c..7fa9bb79ad08 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2697,7 +2697,6 @@ int __cold open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
 	spin_lock_init(&fs_info->delayed_iput_lock);
 	spin_lock_init(&fs_info->defrag_inodes_lock);
-	spin_lock_init(&fs_info->tree_mod_seq_lock);
 	spin_lock_init(&fs_info->super_lock);
 	spin_lock_init(&fs_info->buffer_lock);
 	spin_lock_init(&fs_info->unused_bgs_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e2d30287e2d5..c0f202741e09 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1593,21 +1593,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
 	/* Find first extent with bits cleared */
 	while (1) {
 		node = __etree_search(tree, start, &next, &prev, NULL, NULL);
-		if (!node) {
+		if (!node && !next && !prev) {
+			/*
+			 * Tree is completely empty, send full range and let
+			 * caller deal with it
+			 */
+			*start_ret = 0;
+			*end_ret = -1;
+			goto out;
+		} else if (!node && !next) {
+			/*
+			 * We are past the last allocated chunk, set start at
+			 * the end of the last extent.
+			 */
+			state = rb_entry(prev, struct extent_state, rb_node);
+			*start_ret = state->end + 1;
+			*end_ret = -1;
+			goto out;
+		} else if (!node) {
 			node = next;
-			if (!node) {
-				/*
-				 * We are past the last allocated chunk,
-				 * set start at the end of the last extent. The
-				 * device alloc tree should never be empty so
-				 * prev is always set.
-				 */
-				ASSERT(prev);
-				state = rb_entry(prev, struct extent_state, rb_node);
-				*start_ret = state->end + 1;
-				*end_ret = -1;
-				goto out;
-			}
 		}
 		/*
 		 * At this point 'node' either contains 'start' or start is
@@ -3438,11 +3442,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 	ret = btrfs_writepage_cow_fixup(page, start, page_end);
 	if (ret) {
 		/* Fixup worker will requeue */
-		if (ret == -EBUSY)
-			wbc->pages_skipped++;
-		else
-			redirty_page_for_writepage(wbc, page);
-
+		redirty_page_for_writepage(wbc, page);
 		update_nr_written(wbc, nr_written);
 		unlock_page(page);
 		return 1;
@@ -4166,7 +4166,16 @@ retry:
 		 */
 		scanned = 1;
 		index = 0;
-		goto retry;
+
+		/*
+		 * If we're looping we could run into a page that is locked by a
+		 * writer and that writer could be waiting on writeback for a
+		 * page in our current bio, and thus deadlock, so flush the
+		 * write bio here.
+		 */
+		ret = flush_write_bio(epd);
+		if (!ret)
+			goto retry;
 	}
 
 	if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d2bb58d277a..5b3ec93ff911 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2189,6 +2189,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 /* see btrfs_writepage_start_hook for details on why this is required */
 struct btrfs_writepage_fixup {
 	struct page *page;
+	struct inode *inode;
 	struct btrfs_work work;
 };
 
@@ -2202,27 +2203,71 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	struct inode *inode;
 	u64 page_start;
 	u64 page_end;
-	int ret;
+	int ret = 0;
+	bool free_delalloc_space = true;
 
 	fixup = container_of(work, struct btrfs_writepage_fixup, work);
 	page = fixup->page;
+	inode = fixup->inode;
+	page_start = page_offset(page);
+	page_end = page_offset(page) + PAGE_SIZE - 1;
+
+	/*
+	 * This is similar to page_mkwrite, we need to reserve the space before
+	 * we take the page lock.
+	 */
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
+					   PAGE_SIZE);
 again:
 	lock_page(page);
+
+	/*
+	 * Before we queued this fixup, we took a reference on the page.
+	 * page->mapping may go NULL, but it shouldn't be moved to a different
+	 * address space.
+	 */
 	if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
-		ClearPageChecked(page);
+		/*
+		 * Unfortunately this is a little tricky, either
+		 *
+		 * 1) We got here and our page had already been dealt with and
+		 *    we reserved our space, thus ret == 0, so we need to just
+		 *    drop our space reservation and bail.  This can happen the
+		 *    first time we come into the fixup worker, or could happen
+		 *    while waiting for the ordered extent.
+		 * 2) Our page was already dealt with, but we happened to get an
+		 *    ENOSPC above from the btrfs_delalloc_reserve_space.  In
+		 *    this case we obviously don't have anything to release, but
+		 *    because the page was already dealt with we don't want to
+		 *    mark the page with an error, so make sure we're resetting
+		 *    ret to 0.  This is why we have this check _before_ the ret
+		 *    check, because we do not want to have a surprise ENOSPC
+		 *    when the page was already properly dealt with.
+		 */
+		if (!ret) {
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+						       PAGE_SIZE);
+			btrfs_delalloc_release_space(inode, data_reserved,
+						     page_start, PAGE_SIZE,
+						     true);
+		}
+		ret = 0;
 		goto out_page;
 	}
 
-	inode = page->mapping->host;
-	page_start = page_offset(page);
-	page_end = page_offset(page) + PAGE_SIZE - 1;
+	/*
+	 * We can't mess with the page state unless it is locked, so now that
+	 * it is locked bail if we failed to make our space reservation.
+	 */
+	if (ret)
+		goto out_page;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			 &cached_state);
 
 	/* already ordered? We're done */
 	if (PagePrivate2(page))
-		goto out;
+		goto out_reserved;
 
 	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
 					PAGE_SIZE);
@@ -2235,39 +2280,49 @@ again:
 		goto again;
 	}
 
-	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
-					   PAGE_SIZE);
-	if (ret) {
-		mapping_set_error(page->mapping, ret);
-		end_extent_writepage(page, ret, page_start, page_end);
-		ClearPageChecked(page);
-		goto out;
-	 }
-
 	ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
 					&cached_state);
-	if (ret) {
-		mapping_set_error(page->mapping, ret);
-		end_extent_writepage(page, ret, page_start, page_end);
-		ClearPageChecked(page);
+	if (ret)
 		goto out_reserved;
-	}
 
-	ClearPageChecked(page);
-	set_page_dirty(page);
+	/*
+	 * Everything went as planned, we're now the owner of a dirty page with
+	 * delayed allocation bits set and space reserved for our COW
+	 * destination.
+	 *
+	 * The page was dirty when we started, nothing should have cleaned it.
+	 */
+	BUG_ON(!PageDirty(page));
+	free_delalloc_space = false;
 out_reserved:
 	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
-	if (ret)
+	if (free_delalloc_space)
 		btrfs_delalloc_release_space(inode, data_reserved, page_start,
 					     PAGE_SIZE, true);
-out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			     &cached_state);
 out_page:
+	if (ret) {
+		/*
+		 * We hit ENOSPC or other errors.  Update the mapping and page
+		 * to reflect the errors and clean the page.
+		 */
+		mapping_set_error(page->mapping, ret);
+		end_extent_writepage(page, ret, page_start, page_end);
+		clear_page_dirty_for_io(page);
+		SetPageError(page);
+	}
+	ClearPageChecked(page);
 	unlock_page(page);
 	put_page(page);
 	kfree(fixup);
 	extent_changeset_free(data_reserved);
+	/*
+	 * As a precaution, do a delayed iput in case it would be the last iput
+	 * that could need flushing space. Recursing back to fixup worker would
+	 * deadlock.
+	 */
+	btrfs_add_delayed_iput(inode);
 }
 
 /*
@@ -2291,6 +2346,13 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 	if (TestClearPagePrivate2(page))
 		return 0;
 
+	/*
+	 * PageChecked is set below when we create a fixup worker for this page,
+	 * don't try to create another one if we're already PageChecked()
+	 *
+	 * The extent_io writepage code will redirty the page if we send back
+	 * EAGAIN.
+	 */
 	if (PageChecked(page))
 		return -EAGAIN;
 
@@ -2298,12 +2360,21 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 	if (!fixup)
 		return -EAGAIN;
 
+	/*
+	 * We are already holding a reference to this inode from
+	 * write_cache_pages.  We need to hold it because the space reservation
+	 * takes place outside of the page lock, and we can't trust
+	 * page->mapping outside of the page lock.
+	 */
+	ihold(inode);
 	SetPageChecked(page);
 	get_page(page);
 	btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
 	fixup->page = page;
+	fixup->inode = inode;
 	btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
-	return -EBUSY;
+
+	return -EAGAIN;
 }
 
 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 091e5bc8c7ea..a055b657cb85 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1269,7 +1269,8 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
 		 * destination of the stream.
 		 */
 		if (ino == bctx->cur_objectid &&
-		    offset >= bctx->sctx->cur_inode_next_write_offset)
+		    offset + bctx->extent_len >
+		    bctx->sctx->cur_inode_next_write_offset)
 			return 0;
 	}
 
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 537bc310a673..01297c5b2666 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -159,9 +159,9 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
 	return (global->size << 1);
 }
 
-static int can_overcommit(struct btrfs_fs_info *fs_info,
-			  struct btrfs_space_info *space_info, u64 bytes,
-			  enum btrfs_reserve_flush_enum flush)
+int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+			 struct btrfs_space_info *space_info, u64 bytes,
+			 enum btrfs_reserve_flush_enum flush)
 {
 	u64 profile;
 	u64 avail;
@@ -226,7 +226,8 @@ again:
 
 		/* Check and see if our ticket can be satisified now. */
 		if ((used + ticket->bytes <= space_info->total_bytes) ||
-		    can_overcommit(fs_info, space_info, ticket->bytes, flush)) {
+		    btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
+					 flush)) {
 			btrfs_space_info_update_bytes_may_use(fs_info,
 							      space_info,
 							      ticket->bytes);
@@ -639,13 +640,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
 		return to_reclaim;
 
 	to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
-	if (can_overcommit(fs_info, space_info, to_reclaim,
-			   BTRFS_RESERVE_FLUSH_ALL))
+	if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
+				 BTRFS_RESERVE_FLUSH_ALL))
 		return 0;
 
 	used = btrfs_space_info_used(space_info, true);
 
-	if (can_overcommit(fs_info, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
+	if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
+				 BTRFS_RESERVE_FLUSH_ALL))
 		expected = div_factor_fine(space_info->total_bytes, 95);
 	else
 		expected = div_factor_fine(space_info->total_bytes, 90);
@@ -1004,7 +1006,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
 	 */
 	if (!pending_tickets &&
 	    ((used + orig_bytes <= space_info->total_bytes) ||
-	     can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+	     btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
 		btrfs_space_info_update_bytes_may_use(fs_info, space_info,
 						      orig_bytes);
 		ret = 0;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 1a349e3f9cc1..24514cd2c6c1 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -127,6 +127,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
 				 enum btrfs_reserve_flush_enum flush);
 void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
 				struct btrfs_space_info *space_info);
+int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+			 struct btrfs_space_info *space_info, u64 bytes,
+			 enum btrfs_reserve_flush_enum flush);
 
 static inline void btrfs_space_info_free_bytes_may_use(
 				struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a906315efd19..0616a5434793 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2135,7 +2135,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 */
 	thresh = SZ_4M;
 
-	if (!mixed && total_free_meta - thresh < block_rsv->size)
+	/*
+	 * We only want to claim there's no available space if we can no longer
+	 * allocate chunks for our metadata profile and our global reserve will
+	 * not fit in the free metadata space.  If we aren't ->full then we
+	 * still can allocate chunks and thus are fine using the currently
+	 * calculated f_bavail.
+	 */
+	if (!mixed && block_rsv->space_info->full &&
+	    total_free_meta - thresh < block_rsv->size)
 		buf->f_bavail = 0;
 
 	buf->f_type = BTRFS_SUPER_MAGIC;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index c12b91ff5f56..84fb3fa940a6 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -142,7 +142,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
 	spin_lock_init(&fs_info->qgroup_lock);
 	spin_lock_init(&fs_info->super_lock);
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
-	spin_lock_init(&fs_info->tree_mod_seq_lock);
 	mutex_init(&fs_info->qgroup_ioctl_lock);
 	mutex_init(&fs_info->qgroup_rescan_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 123d9a614357..df7ce874a74b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void)
 	int ret = -EINVAL;
 
 	test_msg("running find_first_clear_extent_bit test");
+
 	extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
 
+	/* Test correct handling of empty tree */
+	find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
+	if (start != 0 || end != -1) {
+		test_err(
+	"error getting a range from completely empty tree: start %llu end %llu",
+			 start, end);
+		goto out;
+	}
 	/*
 	 * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
 	 * 4M-32M
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 44a3ce1e4ce4..1dc97f2d6201 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -396,7 +396,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
 	struct inode *inode;
-	sector_t block0, block;
+	sector_t block;
 	unsigned shift;
 	int ret;
 
@@ -412,7 +412,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	inode = d_backing_inode(object->backer);
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
@@ -428,12 +427,14 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	 *   enough for this as it doesn't indicate errors, but it's all we've
 	 *   got for the moment
 	 */
-	block0 = page->index;
-	block0 <<= shift;
+	block = page->index;
+	block <<= shift;
+
+	ret = bmap(inode, &block);
+	ASSERT(ret < 0);
 
-	block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
 	_debug("%llx -> %llx",
-	       (unsigned long long) block0,
+	       (unsigned long long) (page->index << shift),
 	       (unsigned long long) block);
 
 	if (block) {
@@ -711,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	inode = d_backing_inode(object->backer);
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->bmap);
 	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
@@ -728,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	ret = space ? -ENODATA : -ENOBUFS;
 	list_for_each_entry_safe(page, _n, pages, lru) {
-		sector_t block0, block;
+		sector_t block;
 
 		/* we assume the absence or presence of the first block is a
 		 * good enough indication for the page as a whole
@@ -736,13 +736,14 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 		 *   good enough for this as it doesn't indicate errors, but
 		 *   it's all we've got for the moment
 		 */
-		block0 = page->index;
-		block0 <<= shift;
+		block = page->index;
+		block <<= shift;
+
+		ret = bmap(inode, &block);
+		ASSERT(!ret);
 
-		block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
-						      block0);
 		_debug("%llx -> %llx",
-		       (unsigned long long) block0,
+		       (unsigned long long) (page->index << shift),
 		       (unsigned long long) block);
 
 		if (block) {
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index c1da294418d1..0a0823d378db 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 	export.o caps.o snap.o xattr.o quota.o io.o \
 	mds_client.o mdsmap.o strings.o ceph_frag.o \
-	debugfs.o
+	debugfs.o util.o
 
 ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
 ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index aa55f412a6e3..26be6520d3fb 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -222,8 +222,8 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
 		err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
 		if (err)
 			goto out_err;
-		err = ceph_pagelist_encode_string(pagelist,
-						  XATTR_NAME_POSIX_ACL_DEFAULT, len);
+		ceph_pagelist_encode_string(pagelist,
+					  XATTR_NAME_POSIX_ACL_DEFAULT, len);
 		err = posix_acl_to_xattr(&init_user_ns, default_acl,
 					 tmp_buf, val_size2);
 		if (err < 0)
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 73f24f307a4a..270b769607a2 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -67,7 +67,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc)
 		if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
 			continue;
 
-		errorf(fc, "ceph: fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option",
+		errorfc(fc, "fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option",
 		       fsid);
 		err = -EBUSY;
 		goto out_unlock;
@@ -96,7 +96,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc)
 		list_add_tail(&ent->list, &ceph_fscache_list);
 	} else {
 		kfree(ent);
-		errorf(fc, "ceph: unable to register fscache cookie for fsid %pU",
+		errorfc(fc, "unable to register fscache cookie for fsid %pU",
 		       fsid);
 		/* all other fs ignore this error */
 	}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9d09bb53c1ab..28ae0c134700 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -908,7 +908,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 						       ci_node);
 					if (!__cap_is_valid(cap))
 						continue;
-					__touch_cap(cap);
+					if (cap->issued & mask)
+						__touch_cap(cap);
 				}
 			}
 			return 1;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index c281f32b54f7..fb7cabd98e7b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
 	seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
 	seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
 	seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
-	for (i = 0; i < mdsmap->m_num_mds; i++) {
+	for (i = 0; i < mdsmap->possible_max_rank; i++) {
 		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
 		int state = mdsmap->m_info[i].state;
 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2e4764fd1872..d0cd0aba5843 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1186,7 +1186,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
 	struct dentry *dn = di->dentry;
 	struct ceph_mds_client *mdsc;
 
-	dout("dentry_dir_lease_touch %p %p '%pd' (offset %lld)\n",
+	dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n",
 	     di, dn, dn, di->offset);
 
 	if (!list_empty(&di->lease_list)) {
@@ -1567,7 +1567,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 		inode = d_inode(dentry);
 	}
 
-	dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
+	dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry,
 	     dentry, inode, ceph_dentry(dentry)->offset);
 
 	/* always trust cached snapped dentries, snapdir dentry */
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 11929d2bb594..c3b8e8e0bf17 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1974,6 +1974,9 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
 		return -EOPNOTSUPP;
 
+	if (!src_fsc->have_copy_from2)
+		return -EOPNOTSUPP;
+
 	/*
 	 * Striped file layouts require that we copy partial objects, but the
 	 * OSD copy-from operation only supports full-object copies.  Limit
@@ -2101,8 +2104,14 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 			CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
 			&dst_oid, &dst_oloc,
 			CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-			CEPH_OSD_OP_FLAG_FADVISE_DONTNEED, 0);
+			CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
+			dst_ci->i_truncate_seq, dst_ci->i_truncate_size,
+			CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
 		if (err) {
+			if (err == -EOPNOTSUPP) {
+				src_fsc->have_copy_from2 = false;
+				pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
+			}
 			dout("ceph_osdc_copy_from returned %d\n", err);
 			if (!ret)
 				ret = err;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c07407586ce8..d01710a16a4a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
 	inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	if (inode->i_state & I_NEW) {
+	if (inode->i_state & I_NEW)
 		dout("get_inode created new inode %p %llx.%llx ino %llx\n",
 		     inode, ceph_vinop(inode), (u64)inode->i_ino);
-		unlock_new_inode(inode);
-	}
 
 	dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino,
 	     vino.snap, inode);
@@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent)
 	inode->i_fop = &ceph_snapdir_fops;
 	ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
 	ci->i_rbytes = 0;
+
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+
 	return inode;
 }
 
@@ -728,8 +730,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 static int fill_inode(struct inode *inode, struct page *locked_page,
 		      struct ceph_mds_reply_info_in *iinfo,
 		      struct ceph_mds_reply_dirfrag *dirinfo,
-		      struct ceph_mds_session *session,
-		      unsigned long ttl_from, int cap_fmode,
+		      struct ceph_mds_session *session, int cap_fmode,
 		      struct ceph_cap_reservation *caps_reservation)
 {
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
@@ -754,8 +755,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 	info_caps = le32_to_cpu(info->cap.caps);
 
 	/* prealloc new cap struct */
-	if (info_caps && ceph_snap(inode) == CEPH_NOSNAP)
+	if (info_caps && ceph_snap(inode) == CEPH_NOSNAP) {
 		new_cap = ceph_get_cap(mdsc, caps_reservation);
+		if (!new_cap)
+			return -ENOMEM;
+	}
 
 	/*
 	 * prealloc xattr data, if it looks like we'll need it.  only
@@ -1237,7 +1241,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 		if (dir) {
 			err = fill_inode(dir, NULL,
 					 &rinfo->diri, rinfo->dirfrag,
-					 session, req->r_request_started, -1,
+					 session, -1,
 					 &req->r_caps_reservation);
 			if (err < 0)
 				goto done;
@@ -1302,18 +1306,22 @@ retry_lookup:
 			err = PTR_ERR(in);
 			goto done;
 		}
-		req->r_target_inode = in;
 
 		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
-				session, req->r_request_started,
+				session,
 				(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
-				rinfo->head->result == 0) ?  req->r_fmode : -1,
+				 rinfo->head->result == 0) ?  req->r_fmode : -1,
 				&req->r_caps_reservation);
 		if (err < 0) {
 			pr_err("fill_inode badness %p %llx.%llx\n",
 				in, ceph_vinop(in));
+			if (in->i_state & I_NEW)
+				discard_new_inode(in);
 			goto done;
 		}
+		req->r_target_inode = in;
+		if (in->i_state & I_NEW)
+			unlock_new_inode(in);
 	}
 
 	/*
@@ -1493,12 +1501,18 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 			continue;
 		}
 		rc = fill_inode(in, NULL, &rde->inode, NULL, session,
-				req->r_request_started, -1,
-				&req->r_caps_reservation);
+				-1, &req->r_caps_reservation);
 		if (rc < 0) {
 			pr_err("fill_inode badness on %p got %d\n", in, rc);
 			err = rc;
+			if (in->i_state & I_NEW) {
+				ihold(in);
+				discard_new_inode(in);
+			}
+		} else if (in->i_state & I_NEW) {
+			unlock_new_inode(in);
 		}
+
 		/* avoid calling iput_final() in mds dispatch threads */
 		ceph_async_iput(in);
 	}
@@ -1694,19 +1708,24 @@ retry_lookup:
 		}
 
 		ret = fill_inode(in, NULL, &rde->inode, NULL, session,
-				 req->r_request_started, -1,
-				 &req->r_caps_reservation);
+				 -1, &req->r_caps_reservation);
 		if (ret < 0) {
 			pr_err("fill_inode badness on %p\n", in);
 			if (d_really_is_negative(dn)) {
 				/* avoid calling iput_final() in mds
 				 * dispatch threads */
+				if (in->i_state & I_NEW) {
+					ihold(in);
+					discard_new_inode(in);
+				}
 				ceph_async_iput(in);
 			}
 			d_drop(dn);
 			err = ret;
 			goto next_item;
 		}
+		if (in->i_state & I_NEW)
+			unlock_new_inode(in);
 
 		if (d_really_is_negative(dn)) {
 			if (ceph_security_xattr_deadlock(in)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 145d46ba25ae..bbbbddf71326 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -9,6 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/ratelimit.h>
+#include <linux/bits.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -530,6 +531,7 @@ const char *ceph_session_state_name(int s)
 	case CEPH_MDS_SESSION_OPEN: return "open";
 	case CEPH_MDS_SESSION_HUNG: return "hung";
 	case CEPH_MDS_SESSION_CLOSING: return "closing";
+	case CEPH_MDS_SESSION_CLOSED: return "closed";
 	case CEPH_MDS_SESSION_RESTARTING: return "restarting";
 	case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
 	case CEPH_MDS_SESSION_REJECTED: return "rejected";
@@ -537,7 +539,7 @@ const char *ceph_session_state_name(int s)
 	}
 }
 
-static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
+struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s)
 {
 	if (refcount_inc_not_zero(&s->s_ref)) {
 		dout("mdsc get_session %p %d -> %d\n", s,
@@ -568,7 +570,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 {
 	if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
 		return NULL;
-	return get_session(mdsc->sessions[mds]);
+	return ceph_get_mds_session(mdsc->sessions[mds]);
 }
 
 static bool __have_session(struct ceph_mds_client *mdsc, int mds)
@@ -597,7 +599,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 {
 	struct ceph_mds_session *s;
 
-	if (mds >= mdsc->mdsmap->m_num_mds)
+	if (mds >= mdsc->mdsmap->possible_max_rank)
 		return ERR_PTR(-EINVAL);
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
@@ -674,7 +676,6 @@ static void __unregister_session(struct ceph_mds_client *mdsc,
 	dout("__unregister_session mds%d %p\n", s->s_mds, s);
 	BUG_ON(mdsc->sessions[s->s_mds] != s);
 	mdsc->sessions[s->s_mds] = NULL;
-	s->s_state = 0;
 	ceph_con_close(&s->s_con);
 	ceph_put_mds_session(s);
 	atomic_dec(&mdsc->num_sessions);
@@ -878,7 +879,8 @@ static struct inode *get_nonsnap_parent(struct dentry *dentry)
  * Called under mdsc->mutex.
  */
 static int __choose_mds(struct ceph_mds_client *mdsc,
-			struct ceph_mds_request *req)
+			struct ceph_mds_request *req,
+			bool *random)
 {
 	struct inode *inode;
 	struct ceph_inode_info *ci;
@@ -888,6 +890,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	u32 hash = req->r_direct_hash;
 	bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 
+	if (random)
+		*random = false;
+
 	/*
 	 * is there a specific mds we should try?  ignore hint if we have
 	 * no session and the mds is not up (active or recovering).
@@ -895,7 +900,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	if (req->r_resend_mds >= 0 &&
 	    (__have_session(mdsc, req->r_resend_mds) ||
 	     ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
-		dout("choose_mds using resend_mds mds%d\n",
+		dout("%s using resend_mds mds%d\n", __func__,
 		     req->r_resend_mds);
 		return req->r_resend_mds;
 	}
@@ -913,7 +918,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 			rcu_read_lock();
 			inode = get_nonsnap_parent(req->r_dentry);
 			rcu_read_unlock();
-			dout("__choose_mds using snapdir's parent %p\n", inode);
+			dout("%s using snapdir's parent %p\n", __func__, inode);
 		}
 	} else if (req->r_dentry) {
 		/* ignore race with rename; old or new d_parent is okay */
@@ -933,7 +938,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 			/* direct snapped/virtual snapdir requests
 			 * based on parent dir inode */
 			inode = get_nonsnap_parent(parent);
-			dout("__choose_mds using nonsnap parent %p\n", inode);
+			dout("%s using nonsnap parent %p\n", __func__, inode);
 		} else {
 			/* dentry target */
 			inode = d_inode(req->r_dentry);
@@ -949,8 +954,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 		rcu_read_unlock();
 	}
 
-	dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
-	     (int)hash, mode);
+	dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash,
+	     hash, mode);
 	if (!inode)
 		goto random;
 	ci = ceph_inode(inode);
@@ -968,30 +973,33 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 				get_random_bytes(&r, 1);
 				r %= frag.ndist;
 				mds = frag.dist[r];
-				dout("choose_mds %p %llx.%llx "
-				     "frag %u mds%d (%d/%d)\n",
-				     inode, ceph_vinop(inode),
-				     frag.frag, mds,
-				     (int)r, frag.ndist);
+				dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n",
+				     __func__, inode, ceph_vinop(inode),
+				     frag.frag, mds, (int)r, frag.ndist);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
+				    CEPH_MDS_STATE_ACTIVE &&
+				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
 					goto out;
 			}
 
 			/* since this file/dir wasn't known to be
 			 * replicated, then we want to look for the
 			 * authoritative mds. */
-			mode = USE_AUTH_MDS;
 			if (frag.mds >= 0) {
 				/* choose auth mds */
 				mds = frag.mds;
-				dout("choose_mds %p %llx.%llx "
-				     "frag %u mds%d (auth)\n",
-				     inode, ceph_vinop(inode), frag.frag, mds);
+				dout("%s %p %llx.%llx frag %u mds%d (auth)\n",
+				     __func__, inode, ceph_vinop(inode),
+				     frag.frag, mds);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
-					goto out;
+				    CEPH_MDS_STATE_ACTIVE) {
+					if (mode == USE_ANY_MDS &&
+					    !ceph_mdsmap_is_laggy(mdsc->mdsmap,
+								  mds))
+						goto out;
+				}
 			}
+			mode = USE_AUTH_MDS;
 		}
 	}
 
@@ -1007,7 +1015,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 		goto random;
 	}
 	mds = cap->session->s_mds;
-	dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
+	dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__,
 	     inode, ceph_vinop(inode), mds,
 	     cap == ci->i_auth_cap ? "auth " : "", cap);
 	spin_unlock(&ci->i_ceph_lock);
@@ -1018,8 +1026,11 @@ out:
 	return mds;
 
 random:
+	if (random)
+		*random = true;
+
 	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
-	dout("choose_mds chose random mds%d\n", mds);
+	dout("%s chose random mds%d\n", __func__, mds);
 	return mds;
 }
 
@@ -1045,20 +1056,21 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
 	return msg;
 }
 
+static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
+#define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8)
 static void encode_supported_features(void **p, void *end)
 {
-	static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
-	static const size_t count = ARRAY_SIZE(bits);
+	static const size_t count = ARRAY_SIZE(feature_bits);
 
 	if (count > 0) {
 		size_t i;
-		size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8;
+		size_t size = FEATURE_BYTES(count);
 
 		BUG_ON(*p + 4 + size > end);
 		ceph_encode_32(p, size);
 		memset(*p, 0, size);
 		for (i = 0; i < count; i++)
-			((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8);
+			((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8);
 		*p += size;
 	} else {
 		BUG_ON(*p + 4 > end);
@@ -1079,6 +1091,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	int metadata_key_count = 0;
 	struct ceph_options *opt = mdsc->fsc->client->options;
 	struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
+	size_t size, count;
 	void *p, *end;
 
 	const char* metadata[][2] = {
@@ -1096,8 +1109,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 			strlen(metadata[i][1]);
 		metadata_key_count++;
 	}
+
 	/* supported feature */
-	extra_bytes += 4 + 8;
+	size = 0;
+	count = ARRAY_SIZE(feature_bits);
+	if (count > 0)
+		size = FEATURE_BYTES(count);
+	extra_bytes += 4 + size;
 
 	/* Allocate the message */
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
@@ -1117,7 +1135,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	 * Serialize client metadata into waiting buffer space, using
 	 * the format that userspace expects for map<string, string>
 	 *
-	 * ClientSession messages with metadata are v2
+	 * ClientSession messages with metadata are v3
 	 */
 	msg->hdr.version = cpu_to_le16(3);
 	msg->hdr.compat_version = cpu_to_le16(1);
@@ -1219,7 +1237,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *ts;
 	int i, mds = session->s_mds;
 
-	if (mds >= mdsc->mdsmap->m_num_mds)
+	if (mds >= mdsc->mdsmap->possible_max_rank)
 		return;
 
 	mi = &mdsc->mdsmap->m_info[mds];
@@ -1967,7 +1985,7 @@ void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
 	if (mdsc->stopping)
 		return;
 
-	get_session(session);
+	ceph_get_mds_session(session);
 	if (queue_work(mdsc->fsc->cap_wq,
 		       &session->s_cap_release_work)) {
 		dout("cap release work queued\n");
@@ -2072,7 +2090,6 @@ struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 {
 	struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
-	struct timespec64 ts;
 
 	if (!req)
 		return ERR_PTR(-ENOMEM);
@@ -2091,8 +2108,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
-	ktime_get_coarse_real_ts64(&ts);
-	req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran);
+	ktime_get_coarse_real_ts64(&req->r_stamp);
 
 	req->r_op = op;
 	req->r_direct_mode = mode;
@@ -2518,6 +2534,26 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 }
 
 /*
+ * called under mdsc->mutex
+ */
+static int __send_request(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session,
+			  struct ceph_mds_request *req,
+			  bool drop_cap_releases)
+{
+	int err;
+
+	err = __prepare_send_request(mdsc, req, session->s_mds,
+				     drop_cap_releases);
+	if (!err) {
+		ceph_msg_get(req->r_request);
+		ceph_con_send(&session->s_con, req->r_request);
+	}
+
+	return err;
+}
+
+/*
  * send request, or put it on the appropriate wait list.
  */
 static void __do_request(struct ceph_mds_client *mdsc,
@@ -2526,6 +2562,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *session = NULL;
 	int mds = -1;
 	int err = 0;
+	bool random;
 
 	if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
 		if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
@@ -2558,15 +2595,14 @@ static void __do_request(struct ceph_mds_client *mdsc,
 		if (!(mdsc->fsc->mount_options->flags &
 		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
 		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
-			err = -ENOENT;
-			pr_info("probably no mds server is up\n");
+			err = -EHOSTUNREACH;
 			goto finish;
 		}
 	}
 
 	put_request_session(req);
 
-	mds = __choose_mds(mdsc, req);
+	mds = __choose_mds(mdsc, req, &random);
 	if (mds < 0 ||
 	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
 		dout("do_request no mds or not active, waiting for map\n");
@@ -2583,7 +2619,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 			goto finish;
 		}
 	}
-	req->r_session = get_session(session);
+	req->r_session = ceph_get_mds_session(session);
 
 	dout("do_request mds%d session %p state %s\n", mds, session,
 	     ceph_session_state_name(session->s_state));
@@ -2594,8 +2630,12 @@ static void __do_request(struct ceph_mds_client *mdsc,
 			goto out_session;
 		}
 		if (session->s_state == CEPH_MDS_SESSION_NEW ||
-		    session->s_state == CEPH_MDS_SESSION_CLOSING)
+		    session->s_state == CEPH_MDS_SESSION_CLOSING) {
 			__open_session(mdsc, session);
+			/* retry the same mds later */
+			if (random)
+				req->r_resend_mds = mds;
+		}
 		list_add(&req->r_wait, &session->s_waiting);
 		goto out_session;
 	}
@@ -2606,11 +2646,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	if (req->r_request_started == 0)   /* note request start time */
 		req->r_request_started = jiffies;
 
-	err = __prepare_send_request(mdsc, req, mds, false);
-	if (!err) {
-		ceph_msg_get(req->r_request);
-		ceph_con_send(&session->s_con, req->r_request);
-	}
+	err = __send_request(mdsc, session, req, false);
 
 out_session:
 	ceph_put_mds_session(session);
@@ -2863,7 +2899,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			mutex_unlock(&mdsc->mutex);
 			goto out;
 		} else  {
-			int mds = __choose_mds(mdsc, req);
+			int mds = __choose_mds(mdsc, req, NULL);
 			if (mds >= 0 && mds != req->r_session->s_mds) {
 				dout("but auth changed, so resending\n");
 				__do_request(mdsc, req);
@@ -2879,6 +2915,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
 		__unregister_request(mdsc, req);
 
+		/* last request during umount? */
+		if (mdsc->stopping && !__get_oldest_req(mdsc))
+			complete_all(&mdsc->safe_umount_waiters);
+
 		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 			/*
 			 * We already handled the unsafe response, now do the
@@ -2889,9 +2929,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			 */
 			dout("got safe reply %llu, mds%d\n", tid, mds);
 
-			/* last unsafe request during umount? */
-			if (mdsc->stopping && !__get_oldest_req(mdsc))
-				complete_all(&mdsc->safe_umount_waiters);
 			mutex_unlock(&mdsc->mutex);
 			goto out;
 		}
@@ -3106,7 +3143,7 @@ static void handle_session(struct ceph_mds_session *session,
 
 	mutex_lock(&mdsc->mutex);
 	if (op == CEPH_SESSION_CLOSE) {
-		get_session(session);
+		ceph_get_mds_session(session);
 		__unregister_session(mdsc, session);
 	}
 	/* FIXME: this ttl calculation is generous */
@@ -3144,6 +3181,7 @@ static void handle_session(struct ceph_mds_session *session,
 	case CEPH_SESSION_CLOSE:
 		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
 			pr_info("mds%d reconnect denied\n", session->s_mds);
+		session->s_state = CEPH_MDS_SESSION_CLOSED;
 		cleanup_session_requests(mdsc, session);
 		remove_session_caps(session);
 		wake = 2; /* for good measure */
@@ -3211,7 +3249,6 @@ bad:
 	return;
 }
 
-
 /*
  * called under session->mutex.
  */
@@ -3220,18 +3257,12 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 {
 	struct ceph_mds_request *req, *nreq;
 	struct rb_node *p;
-	int err;
 
 	dout("replay_unsafe_requests mds%d\n", session->s_mds);
 
 	mutex_lock(&mdsc->mutex);
-	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
-		err = __prepare_send_request(mdsc, req, session->s_mds, true);
-		if (!err) {
-			ceph_msg_get(req->r_request);
-			ceph_con_send(&session->s_con, req->r_request);
-		}
-	}
+	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
+		__send_request(mdsc, session, req, true);
 
 	/*
 	 * also re-send old requests when MDS enters reconnect stage. So that MDS
@@ -3246,14 +3277,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 		if (req->r_attempts == 0)
 			continue; /* only old requests */
 		if (req->r_session &&
-		    req->r_session->s_mds == session->s_mds) {
-			err = __prepare_send_request(mdsc, req,
-						     session->s_mds, true);
-			if (!err) {
-				ceph_msg_get(req->r_request);
-				ceph_con_send(&session->s_con, req->r_request);
-			}
-		}
+		    req->r_session->s_mds == session->s_mds)
+			__send_request(mdsc, session, req, true);
 	}
 	mutex_unlock(&mdsc->mutex);
 }
@@ -3764,7 +3789,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 	dout("check_new_map new %u old %u\n",
 	     newmap->m_epoch, oldmap->m_epoch);
 
-	for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
 		if (!mdsc->sessions[i])
 			continue;
 		s = mdsc->sessions[i];
@@ -3778,9 +3803,9 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
 		     ceph_session_state_name(s->s_state));
 
-		if (i >= newmap->m_num_mds) {
+		if (i >= newmap->possible_max_rank) {
 			/* force close session for stopped mds */
-			get_session(s);
+			ceph_get_mds_session(s);
 			__unregister_session(mdsc, s);
 			__wake_requests(mdsc, &s->s_waiting);
 			mutex_unlock(&mdsc->mutex);
@@ -3835,7 +3860,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		}
 	}
 
-	for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
 		s = mdsc->sessions[i];
 		if (!s)
 			continue;
@@ -4381,7 +4406,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 	mutex_lock(&mdsc->mutex);
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		if (mdsc->sessions[i]) {
-			session = get_session(mdsc->sessions[i]);
+			session = ceph_get_mds_session(mdsc->sessions[i]);
 			__unregister_session(mdsc, session);
 			mutex_unlock(&mdsc->mutex);
 			mutex_lock(&session->s_mutex);
@@ -4609,11 +4634,8 @@ static struct ceph_connection *con_get(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 
-	if (get_session(s)) {
-		dout("mdsc con_get %p ok (%d)\n", s, refcount_read(&s->s_ref));
+	if (ceph_get_mds_session(s))
 		return con;
-	}
-	dout("mdsc con_get %p FAIL\n", s);
 	return NULL;
 }
 
@@ -4621,7 +4643,6 @@ static void con_put(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 
-	dout("mdsc con_put %p (%d)\n", s, refcount_read(&s->s_ref) - 1);
 	ceph_put_mds_session(s);
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 14c7e8c49970..27a7446e10d3 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -17,22 +17,31 @@
 #include <linux/ceph/auth.h>
 
 /* The first 8 bits are reserved for old ceph releases */
-#define CEPHFS_FEATURE_MIMIC		8
-#define CEPHFS_FEATURE_REPLY_ENCODING	9
-#define CEPHFS_FEATURE_RECLAIM_CLIENT	10
-#define CEPHFS_FEATURE_LAZY_CAP_WANTED	11
-#define CEPHFS_FEATURE_MULTI_RECONNECT  12
+enum ceph_feature_type {
+	CEPHFS_FEATURE_MIMIC = 8,
+	CEPHFS_FEATURE_REPLY_ENCODING,
+	CEPHFS_FEATURE_RECLAIM_CLIENT,
+	CEPHFS_FEATURE_LAZY_CAP_WANTED,
+	CEPHFS_FEATURE_MULTI_RECONNECT,
+
+	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MULTI_RECONNECT,
+};
 
-#define CEPHFS_FEATURES_CLIENT_SUPPORTED { 	\
+/*
+ * This will always have the highest feature bit value
+ * as the last element of the array.
+ */
+#define CEPHFS_FEATURES_CLIENT_SUPPORTED {	\
 	0, 1, 2, 3, 4, 5, 6, 7,			\
 	CEPHFS_FEATURE_MIMIC,			\
 	CEPHFS_FEATURE_REPLY_ENCODING,		\
 	CEPHFS_FEATURE_LAZY_CAP_WANTED,		\
 	CEPHFS_FEATURE_MULTI_RECONNECT,		\
+						\
+	CEPHFS_FEATURE_MAX,			\
 }
 #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
 
-
 /*
  * Some lock dependencies:
  *
@@ -151,7 +160,8 @@ enum {
 	CEPH_MDS_SESSION_RESTARTING = 5,
 	CEPH_MDS_SESSION_RECONNECTING = 6,
 	CEPH_MDS_SESSION_CLOSING = 7,
-	CEPH_MDS_SESSION_REJECTED = 8,
+	CEPH_MDS_SESSION_CLOSED = 8,
+	CEPH_MDS_SESSION_REJECTED = 9,
 };
 
 struct ceph_mds_session {
@@ -174,6 +184,7 @@ struct ceph_mds_session {
 
 	/* protected by s_cap_lock */
 	spinlock_t        s_cap_lock;
+	refcount_t        s_ref;
 	struct list_head  s_caps;     /* all caps issued by this session */
 	struct ceph_cap  *s_cap_iterator;
 	int               s_nr_caps;
@@ -188,7 +199,6 @@ struct ceph_mds_session {
 	unsigned long     s_renew_requested; /* last time we sent a renew req */
 	u64               s_renew_seq;
 
-	refcount_t        s_ref;
 	struct list_head  s_waiting;  /* waiting requests */
 	struct list_head  s_unsafe;   /* unsafe requests */
 };
@@ -224,6 +234,7 @@ struct ceph_mds_request {
 	struct rb_node r_node;
 	struct ceph_mds_client *r_mdsc;
 
+	struct kref       r_kref;
 	int r_op;                    /* mds op code */
 
 	/* operation on what? */
@@ -294,7 +305,6 @@ struct ceph_mds_request {
 	int               r_resend_mds; /* mds to resend to next, if any*/
 	u32               r_sent_on_mseq; /* cap mseq request was sent at*/
 
-	struct kref       r_kref;
 	struct list_head  r_wait;
 	struct completion r_completion;
 	struct completion r_safe_completion;
@@ -451,15 +461,10 @@ extern const char *ceph_mds_op_name(int op);
 extern struct ceph_mds_session *
 __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
 
-static inline struct ceph_mds_session *
-ceph_get_mds_session(struct ceph_mds_session *s)
-{
-	refcount_inc(&s->s_ref);
-	return s;
-}
-
 extern const char *ceph_session_state_name(int s);
 
+extern struct ceph_mds_session *
+ceph_get_mds_session(struct ceph_mds_session *s);
 extern void ceph_put_mds_session(struct ceph_mds_session *s);
 
 extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 471bac335fae..889627817e52 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -13,30 +13,25 @@
 
 #include "super.h"
 
+#define CEPH_MDS_IS_READY(i, ignore_laggy) \
+	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
 
-/*
- * choose a random mds that is "up" (i.e. has a state > 0), or -1.
- */
-int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
 {
 	int n = 0;
 	int i, j;
 
-	/* special case for one mds */
-	if (1 == m->m_num_mds && m->m_info[0].state > 0)
-		return 0;
-
 	/* count */
-	for (i = 0; i < m->m_num_mds; i++)
-		if (m->m_info[i].state > 0)
+	for (i = 0; i < m->possible_max_rank; i++)
+		if (CEPH_MDS_IS_READY(i, ignore_laggy))
 			n++;
 	if (n == 0)
 		return -1;
 
 	/* pick */
 	n = prandom_u32() % n;
-	for (j = 0, i = 0; i < m->m_num_mds; i++) {
-		if (m->m_info[i].state > 0)
+	for (j = 0, i = 0; i < m->possible_max_rank; i++) {
+		if (CEPH_MDS_IS_READY(i, ignore_laggy))
 			j++;
 		if (j > n)
 			break;
@@ -45,6 +40,20 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	return i;
 }
 
+/*
+ * choose a random mds that is "up" (i.e. has a state > 0), or -1.
+ */
+int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+{
+	int mds;
+
+	mds = __mdsmap_get_random_mds(m, false);
+	if (mds == m->possible_max_rank || mds == -1)
+		mds = __mdsmap_get_random_mds(m, true);
+
+	return mds == m->possible_max_rank ? -1 : mds;
+}
+
 #define __decode_and_drop_type(p, end, type, bad)		\
 	do {							\
 		if (*p + sizeof(type) > end)			\
@@ -138,14 +147,29 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_session_autoclose = ceph_decode_32(p);
 	m->m_max_file_size = ceph_decode_64(p);
 	m->m_max_mds = ceph_decode_32(p);
-	m->m_num_mds = m->m_max_mds;
 
-	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
+	/*
+	 * pick out the active nodes as the m_num_active_mds, the
+	 * m_num_active_mds maybe larger than m_max_mds when decreasing
+	 * the max_mds in cluster side, in other case it should less
+	 * than or equal to m_max_mds.
+	 */
+	m->m_num_active_mds = n = ceph_decode_32(p);
+
+	/*
+	 * the possible max rank, it maybe larger than the m_num_active_mds,
+	 * for example if the mds_max == 2 in the cluster, when the MDS(0)
+	 * was laggy and being replaced by a new MDS, we will temporarily
+	 * receive a new mds map with n_num_mds == 1 and the active MDS(1),
+	 * and the mds rank >= m_num_active_mds.
+	 */
+	m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds);
+
+	m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS);
 	if (!m->m_info)
 		goto nomem;
 
 	/* pick out active nodes from mds_info (state > 0) */
-	n = ceph_decode_32(p);
 	for (i = 0; i < n; i++) {
 		u64 global_id;
 		u32 namelen;
@@ -215,18 +239,15 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		     ceph_mds_state_name(state),
 		     laggy ? "(laggy)" : "");
 
-		if (mds < 0 || state <= 0)
+		if (mds < 0 || mds >= m->possible_max_rank) {
+			pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
 			continue;
+		}
 
-		if (mds >= m->m_num_mds) {
-			int new_num = max(mds + 1, m->m_num_mds * 2);
-			void *new_m_info = krealloc(m->m_info,
-						new_num * sizeof(*m->m_info),
-						GFP_NOFS | __GFP_ZERO);
-			if (!new_m_info)
-				goto nomem;
-			m->m_info = new_m_info;
-			m->m_num_mds = new_num;
+		if (state <= 0) {
+			pr_warn("mdsmap_decode got incorrect state(%s)\n",
+				ceph_mds_state_name(state));
+			continue;
 		}
 
 		info = &m->m_info[mds];
@@ -247,14 +268,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = NULL;
 		}
 	}
-	if (m->m_num_mds > m->m_max_mds) {
-		/* find max up mds */
-		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
-			if (i == 0 || m->m_info[i-1].state > 0)
-				break;
-		}
-		m->m_num_mds = i;
-	}
 
 	/* pg_pools */
 	ceph_decode_32_safe(p, end, n, bad);
@@ -296,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 		for (i = 0; i < n; i++) {
 			s32 mds = ceph_decode_32(p);
-			if (mds >= 0 && mds < m->m_num_mds) {
+			if (mds >= 0 && mds < m->possible_max_rank) {
 				if (m->m_info[mds].laggy)
 					num_laggy++;
 			}
 		}
 		m->m_num_laggy = num_laggy;
 
-		if (n > m->m_num_mds) {
+		if (n > m->possible_max_rank) {
 			void *new_m_info = krealloc(m->m_info,
 						    n * sizeof(*m->m_info),
 						    GFP_NOFS | __GFP_ZERO);
@@ -311,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 				goto nomem;
 			m->m_info = new_m_info;
 		}
-		m->m_num_mds = n;
+		m->possible_max_rank = n;
 	}
 
 	/* inc */
@@ -382,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 {
 	int i;
 
-	for (i = 0; i < m->m_num_mds; i++)
+	for (i = 0; i < m->possible_max_rank; i++)
 		kfree(m->m_info[i].export_targets);
 	kfree(m->m_info);
 	kfree(m->m_data_pg_pools);
@@ -396,9 +409,9 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
 		return false;
 	if (m->m_damaged)
 		return false;
-	if (m->m_num_laggy > 0)
+	if (m->m_num_laggy == m->m_num_active_mds)
 		return false;
-	for (i = 0; i < m->m_num_mds; i++) {
+	for (i = 0; i < m->possible_max_rank; i++) {
 		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
 			nr_active++;
 	}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 29a795f975df..1d9f083b8a11 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -107,7 +107,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-
 static int ceph_sync_fs(struct super_block *sb, int wait)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
@@ -163,13 +162,13 @@ enum ceph_recover_session_mode {
 	ceph_recover_session_clean
 };
 
-static const struct fs_parameter_enum ceph_mount_param_enums[] = {
-	{ Opt_recover_session,	"no",		ceph_recover_session_no },
-	{ Opt_recover_session,	"clean",	ceph_recover_session_clean },
+static const struct constant_table ceph_param_recover[] = {
+	{ "no",		ceph_recover_session_no },
+	{ "clean",	ceph_recover_session_clean },
 	{}
 };
 
-static const struct fs_parameter_spec ceph_mount_param_specs[] = {
+static const struct fs_parameter_spec ceph_mount_parameters[] = {
 	fsparam_flag_no ("acl",				Opt_acl),
 	fsparam_flag_no ("asyncreaddir",		Opt_asyncreaddir),
 	fsparam_s32	("caps_max",			Opt_caps_max),
@@ -179,8 +178,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
 	fsparam_flag_no ("copyfrom",			Opt_copyfrom),
 	fsparam_flag_no ("dcache",			Opt_dcache),
 	fsparam_flag_no ("dirstat",			Opt_dirstat),
-	__fsparam	(fs_param_is_string, "fsc",	Opt_fscache,
-			 fs_param_neg_with_no | fs_param_v_optional),
+	fsparam_flag_no	("fsc",				Opt_fscache), // fsc|nofsc
+	fsparam_string	("fsc",				Opt_fscache), // fsc=...
 	fsparam_flag_no ("ino32",			Opt_ino32),
 	fsparam_string	("mds_namespace",		Opt_mds_namespace),
 	fsparam_flag_no ("poolperm",			Opt_poolperm),
@@ -189,7 +188,7 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
 	fsparam_flag_no ("rbytes",			Opt_rbytes),
 	fsparam_u32	("readdir_max_bytes",		Opt_readdir_max_bytes),
 	fsparam_u32	("readdir_max_entries",		Opt_readdir_max_entries),
-	fsparam_enum	("recover_session",		Opt_recover_session),
+	fsparam_enum	("recover_session",		Opt_recover_session, ceph_param_recover),
 	fsparam_flag_no ("require_active_mds",		Opt_require_active_mds),
 	fsparam_u32	("rsize",			Opt_rsize),
 	fsparam_string	("snapdirname",			Opt_snapdirname),
@@ -198,12 +197,6 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
 	{}
 };
 
-static const struct fs_parameter_description ceph_mount_parameters = {
-	.name           = "ceph",
-	.specs          = ceph_mount_param_specs,
-	.enums		= ceph_mount_param_enums,
-};
-
 struct ceph_parse_opts_ctx {
 	struct ceph_options		*copts;
 	struct ceph_mount_options	*opts;
@@ -211,7 +204,6 @@ struct ceph_parse_opts_ctx {
 
 /*
  * Parse the source parameter.  Distinguish the server list from the path.
- * Internally we do not include the leading '/' in the path.
  *
  * The source will look like:
  *     <server_spec>[,<server_spec>...]:[<path>]
@@ -228,30 +220,33 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
 
 	dout("%s '%s'\n", __func__, dev_name);
 	if (!dev_name || !*dev_name)
-		return invalf(fc, "ceph: Empty source");
+		return invalfc(fc, "Empty source");
 
 	dev_name_end = strchr(dev_name, '/');
 	if (dev_name_end) {
-		if (strlen(dev_name_end) > 1) {
-			kfree(fsopt->server_path);
-			fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
-			if (!fsopt->server_path)
-				return -ENOMEM;
-		}
+		kfree(fsopt->server_path);
+
+		/*
+		 * The server_path will include the whole chars from userland
+		 * including the leading '/'.
+		 */
+		fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
+		if (!fsopt->server_path)
+			return -ENOMEM;
 	} else {
 		dev_name_end = dev_name + strlen(dev_name);
 	}
 
 	dev_name_end--;		/* back up to ':' separator */
 	if (dev_name_end < dev_name || *dev_name_end != ':')
-		return invalf(fc, "ceph: No path or : separator in source");
+		return invalfc(fc, "No path or : separator in source");
 
 	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 	if (fsopt->server_path)
 		dout("server path '%s'\n", fsopt->server_path);
 
 	ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
-				 pctx->copts, fc);
+				 pctx->copts, fc->log.log);
 	if (ret)
 		return ret;
 
@@ -269,11 +264,11 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 	unsigned int mode;
 	int token, ret;
 
-	ret = ceph_parse_param(param, pctx->copts, fc);
+	ret = ceph_parse_param(param, pctx->copts, fc->log.log);
 	if (ret != -ENOPARAM)
 		return ret;
 
-	token = fs_parse(fc, &ceph_mount_parameters, param, &result);
+	token = fs_parse(fc, ceph_mount_parameters, param, &result);
 	dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
 	if (token < 0)
 		return token;
@@ -300,7 +295,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 		break;
 	case Opt_source:
 		if (fc->source)
-			return invalf(fc, "ceph: Multiple sources specified");
+			return invalfc(fc, "Multiple sources specified");
 		return ceph_parse_source(param, fc);
 	case Opt_wsize:
 		if (result.uint_32 < PAGE_SIZE ||
@@ -391,7 +386,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 		}
 		break;
 #else
-		return invalf(fc, "ceph: fscache support is disabled");
+		return invalfc(fc, "fscache support is disabled");
 #endif
 	case Opt_poolperm:
 		if (!result.negated)
@@ -422,7 +417,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 			fc->sb_flags |= SB_POSIXACL;
 #else
-			return invalf(fc, "ceph: POSIX ACL support is disabled");
+			return invalfc(fc, "POSIX ACL support is disabled");
 #endif
 		} else {
 			fc->sb_flags &= ~SB_POSIXACL;
@@ -434,7 +429,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 	return 0;
 
 out_of_range:
-	return invalf(fc, "ceph: %s out of range", param->key);
+	return invalfc(fc, "%s out of range", param->key);
 }
 
 static void destroy_mount_options(struct ceph_mount_options *args)
@@ -461,6 +456,73 @@ static int strcmp_null(const char *s1, const char *s2)
 	return strcmp(s1, s2);
 }
 
+/**
+ * path_remove_extra_slash - Remove the extra slashes in the server path
+ * @server_path: the server path and could be NULL
+ *
+ * Return NULL if the path is NULL or only consists of "/", or a string
+ * without any extra slashes including the leading slash(es) and the
+ * slash(es) at the end of the server path, such as:
+ * "//dir1////dir2///" --> "dir1/dir2"
+ */
+static char *path_remove_extra_slash(const char *server_path)
+{
+	const char *path = server_path;
+	const char *cur, *end;
+	char *buf, *p;
+	int len;
+
+	/* if the server path is omitted */
+	if (!path)
+		return NULL;
+
+	/* remove all the leading slashes */
+	while (*path == '/')
+		path++;
+
+	/* if the server path only consists of slashes */
+	if (*path == '\0')
+		return NULL;
+
+	len = strlen(path);
+
+	buf = kmalloc(len + 1, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	end = path + len;
+	p = buf;
+	do {
+		cur = strchr(path, '/');
+		if (!cur)
+			cur = end;
+
+		len = cur - path;
+
+		/* including one '/' */
+		if (cur != end)
+			len += 1;
+
+		memcpy(p, path, len);
+		p += len;
+
+		while (cur <= end && *cur == '/')
+			cur++;
+		path = cur;
+	} while (path < end);
+
+	*p = '\0';
+
+	/*
+	 * remove the last slash if there has and just to make sure that
+	 * we will get something like "dir1/dir2"
+	 */
+	if (*(--p) == '/')
+		*p = '\0';
+
+	return buf;
+}
+
 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 				 struct ceph_options *new_opt,
 				 struct ceph_fs_client *fsc)
@@ -468,6 +530,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 	struct ceph_mount_options *fsopt1 = new_fsopt;
 	struct ceph_mount_options *fsopt2 = fsc->mount_options;
 	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
+	char *p1, *p2;
 	int ret;
 
 	ret = memcmp(fsopt1, fsopt2, ofs);
@@ -480,9 +543,21 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 	ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
 	if (ret)
 		return ret;
-	ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
+
+	p1 = path_remove_extra_slash(fsopt1->server_path);
+	if (IS_ERR(p1))
+		return PTR_ERR(p1);
+	p2 = path_remove_extra_slash(fsopt2->server_path);
+	if (IS_ERR(p2)) {
+		kfree(p1);
+		return PTR_ERR(p2);
+	}
+	ret = strcmp_null(p1, p2);
+	kfree(p1);
+	kfree(p2);
 	if (ret)
 		return ret;
+
 	ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
 	if (ret)
 		return ret;
@@ -637,6 +712,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	fsc->sb = NULL;
 	fsc->mount_state = CEPH_MOUNT_MOUNTING;
 	fsc->filp_gen = 1;
+	fsc->have_copy_from2 = true;
 
 	atomic_long_set(&fsc->writeback_count, 0);
 
@@ -788,7 +864,6 @@ static void destroy_caches(void)
 	ceph_fscache_unregister();
 }
 
-
 /*
  * ceph_umount_begin - initiate forced umount.  Tear down down the
  * mount, skipping steps that may hang while waiting for server(s).
@@ -868,9 +943,6 @@ out:
 	return root;
 }
 
-
-
-
 /*
  * mount: join the ceph cluster, and open root directory.
  */
@@ -885,7 +957,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 	mutex_lock(&fsc->client->mount_mutex);
 
 	if (!fsc->sb->s_root) {
-		const char *path;
+		const char *path, *p;
 		err = __ceph_open_session(fsc->client, started);
 		if (err < 0)
 			goto out;
@@ -897,17 +969,22 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 				goto out;
 		}
 
-		if (!fsc->mount_options->server_path) {
-			path = "";
-			dout("mount opening path \\t\n");
-		} else {
-			path = fsc->mount_options->server_path + 1;
-			dout("mount opening path %s\n", path);
+		p = path_remove_extra_slash(fsc->mount_options->server_path);
+		if (IS_ERR(p)) {
+			err = PTR_ERR(p);
+			goto out;
 		}
+		/* if the server path is omitted or just consists of '/' */
+		if (!p)
+			path = "";
+		else
+			path = p;
+		dout("mount opening path '%s'\n", path);
 
 		ceph_fs_debugfs_init(fsc);
 
 		root = open_root_dentry(fsc, path, started);
+		kfree(p);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
 			goto out;
@@ -1018,7 +1095,7 @@ static int ceph_get_tree(struct fs_context *fc)
 	dout("ceph_get_tree\n");
 
 	if (!fc->source)
-		return invalf(fc, "ceph: No source");
+		return invalfc(fc, "No source");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	fc->sb_flags |= SB_POSIXACL;
@@ -1070,6 +1147,11 @@ static int ceph_get_tree(struct fs_context *fc)
 	return 0;
 
 out_splat:
+	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
+		pr_info("No mds server is up or the cluster is laggy\n");
+		err = -EHOSTUNREACH;
+	}
+
 	ceph_mdsc_close_sessions(fsc->mdsc);
 	deactivate_locked_super(sb);
 	goto out_final;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3bf1a01cd536..1e456a9011bb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -106,6 +106,8 @@ struct ceph_fs_client {
 	unsigned long last_auto_reconnect;
 	bool blacklisted;
 
+	bool have_copy_from2;
+
 	u32 filp_gen;
 	loff_t max_file_size;
 
diff --git a/fs/ceph/util.c b/fs/ceph/util.c
new file mode 100644
index 000000000000..2c34875675bf
--- /dev/null
+++ b/fs/ceph/util.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Some non-inline ceph helpers
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+/*
+ * return true if @layout appears to be valid
+ */
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
+{
+	__u32 su = layout->stripe_unit;
+	__u32 sc = layout->stripe_count;
+	__u32 os = layout->object_size;
+
+	/* stripe unit, object size must be non-zero, 64k increment */
+	if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	/* object size must be a multiple of stripe unit */
+	if (os < su || os % su)
+		return 0;
+	/* stripe count must be non-zero */
+	if (!sc)
+		return 0;
+	return 1;
+}
+
+void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
+				  struct ceph_file_layout_legacy *legacy)
+{
+	fl->stripe_unit = le32_to_cpu(legacy->fl_stripe_unit);
+	fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
+	fl->object_size = le32_to_cpu(legacy->fl_object_size);
+	fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
+	if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
+	    fl->stripe_count == 0 && fl->object_size == 0)
+		fl->pool_id = -1;
+}
+
+void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
+				struct ceph_file_layout_legacy *legacy)
+{
+	legacy->fl_stripe_unit = cpu_to_le32(fl->stripe_unit);
+	legacy->fl_stripe_count = cpu_to_le32(fl->stripe_count);
+	legacy->fl_object_size = cpu_to_le32(fl->object_size);
+	if (fl->pool_id >= 0)
+		legacy->fl_pg_pool = cpu_to_le32(fl->pool_id);
+	else
+		legacy->fl_pg_pool = 0;
+}
+
+int ceph_flags_to_mode(int flags)
+{
+	int mode;
+
+#ifdef O_DIRECTORY  /* fixme */
+	if ((flags & O_DIRECTORY) == O_DIRECTORY)
+		return CEPH_FILE_MODE_PIN;
+#endif
+
+	switch (flags & O_ACCMODE) {
+	case O_WRONLY:
+		mode = CEPH_FILE_MODE_WR;
+		break;
+	case O_RDONLY:
+		mode = CEPH_FILE_MODE_RD;
+		break;
+	case O_RDWR:
+	case O_ACCMODE: /* this is what the VFS does */
+		mode = CEPH_FILE_MODE_RDWR;
+		break;
+	}
+#ifdef O_LAZY
+	if (flags & O_LAZY)
+		mode |= CEPH_FILE_MODE_LAZY;
+#endif
+
+	return mode;
+}
+
+int ceph_caps_for_mode(int mode)
+{
+	int caps = CEPH_CAP_PIN;
+
+	if (mode & CEPH_FILE_MODE_RD)
+		caps |= CEPH_CAP_FILE_SHARED |
+			CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
+	if (mode & CEPH_FILE_MODE_WR)
+		caps |= CEPH_CAP_FILE_EXCL |
+			CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
+			CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
+			CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
+	if (mode & CEPH_FILE_MODE_LAZY)
+		caps |= CEPH_CAP_FILE_LAZYIO;
+
+	return caps;
+}
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index cb18ee637cb7..7b8a070a782d 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -655,7 +655,7 @@ static int __build_xattrs(struct inode *inode)
 	u32 len;
 	const char *name, *val;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int xattr_version;
+	u64 xattr_version;
 	struct ceph_inode_xattr **xattrs = NULL;
 	int err = 0;
 	int i;
@@ -851,7 +851,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 	req_mask = __get_request_mask(inode);
 
 	spin_lock(&ci->i_ceph_lock);
-	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
+	dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name,
 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
@@ -1078,7 +1078,8 @@ retry:
 		}
 	}
 
-	dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
+	dout("setxattr %p name '%s' issued %s\n", inode, name,
+	     ceph_cap_string(issued));
 	__build_xattrs(inode);
 
 	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 19f6e592b941..276e4b5ea8e0 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -611,12 +611,12 @@ static int cifs_stats_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, cifs_stats_proc_show, NULL);
 }
 
-static const struct file_operations cifs_stats_proc_fops = {
-	.open		= cifs_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifs_stats_proc_write,
+static const struct proc_ops cifs_stats_proc_ops = {
+	.proc_open	= cifs_stats_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= cifs_stats_proc_write,
 };
 
 #ifdef CONFIG_CIFS_SMB_DIRECT
@@ -640,12 +640,12 @@ static int name##_open(struct inode *inode, struct file *file) \
 	return single_open(file, name##_proc_show, NULL); \
 } \
 \
-static const struct file_operations cifs_##name##_proc_fops = { \
-	.open		= name##_open, \
-	.read		= seq_read, \
-	.llseek		= seq_lseek, \
-	.release	= single_release, \
-	.write		= name##_write, \
+static const struct proc_ops cifs_##name##_proc_fops = { \
+	.proc_open	= name##_open, \
+	.proc_read	= seq_read, \
+	.proc_lseek	= seq_lseek, \
+	.proc_release	= single_release, \
+	.proc_write	= name##_write, \
 }
 
 PROC_FILE_DEFINE(rdma_readwrite_threshold);
@@ -659,11 +659,11 @@ PROC_FILE_DEFINE(smbd_receive_credit_max);
 #endif
 
 static struct proc_dir_entry *proc_fs_cifs;
-static const struct file_operations cifsFYI_proc_fops;
-static const struct file_operations cifs_lookup_cache_proc_fops;
-static const struct file_operations traceSMB_proc_fops;
-static const struct file_operations cifs_security_flags_proc_fops;
-static const struct file_operations cifs_linux_ext_proc_fops;
+static const struct proc_ops cifsFYI_proc_ops;
+static const struct proc_ops cifs_lookup_cache_proc_ops;
+static const struct proc_ops traceSMB_proc_ops;
+static const struct proc_ops cifs_security_flags_proc_ops;
+static const struct proc_ops cifs_linux_ext_proc_ops;
 
 void
 cifs_proc_init(void)
@@ -678,18 +678,18 @@ cifs_proc_init(void)
 	proc_create_single("open_files", 0400, proc_fs_cifs,
 			cifs_debug_files_proc_show);
 
-	proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
-	proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
-	proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
+	proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_ops);
+	proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_ops);
+	proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_ops);
 	proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
-		    &cifs_linux_ext_proc_fops);
+		    &cifs_linux_ext_proc_ops);
 	proc_create("SecurityFlags", 0644, proc_fs_cifs,
-		    &cifs_security_flags_proc_fops);
+		    &cifs_security_flags_proc_ops);
 	proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
-		    &cifs_lookup_cache_proc_fops);
+		    &cifs_lookup_cache_proc_ops);
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	proc_create("dfscache", 0644, proc_fs_cifs, &dfscache_proc_fops);
+	proc_create("dfscache", 0644, proc_fs_cifs, &dfscache_proc_ops);
 #endif
 
 #ifdef CONFIG_CIFS_SMB_DIRECT
@@ -774,12 +774,12 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static const struct file_operations cifsFYI_proc_fops = {
-	.open		= cifsFYI_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifsFYI_proc_write,
+static const struct proc_ops cifsFYI_proc_ops = {
+	.proc_open	= cifsFYI_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= cifsFYI_proc_write,
 };
 
 static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
@@ -805,12 +805,12 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file,
 	return count;
 }
 
-static const struct file_operations cifs_linux_ext_proc_fops = {
-	.open		= cifs_linux_ext_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifs_linux_ext_proc_write,
+static const struct proc_ops cifs_linux_ext_proc_ops = {
+	.proc_open	= cifs_linux_ext_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= cifs_linux_ext_proc_write,
 };
 
 static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v)
@@ -836,12 +836,12 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file,
 	return count;
 }
 
-static const struct file_operations cifs_lookup_cache_proc_fops = {
-	.open		= cifs_lookup_cache_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifs_lookup_cache_proc_write,
+static const struct proc_ops cifs_lookup_cache_proc_ops = {
+	.proc_open	= cifs_lookup_cache_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= cifs_lookup_cache_proc_write,
 };
 
 static int traceSMB_proc_show(struct seq_file *m, void *v)
@@ -867,12 +867,12 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static const struct file_operations traceSMB_proc_fops = {
-	.open		= traceSMB_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= traceSMB_proc_write,
+static const struct proc_ops traceSMB_proc_ops = {
+	.proc_open	= traceSMB_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= traceSMB_proc_write,
 };
 
 static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
@@ -978,12 +978,12 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
 	return count;
 }
 
-static const struct file_operations cifs_security_flags_proc_fops = {
-	.open		= cifs_security_flags_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifs_security_flags_proc_write,
+static const struct proc_ops cifs_security_flags_proc_ops = {
+	.proc_open	= cifs_security_flags_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= cifs_security_flags_proc_write,
 };
 #else
 inline void cifs_proc_init(void)
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 9a384d1e27b4..43c1b43a07ec 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -8,6 +8,7 @@
 #include <linux/jhash.h>
 #include <linux/ktime.h>
 #include <linux/slab.h>
+#include <linux/proc_fs.h>
 #include <linux/nls.h>
 #include <linux/workqueue.h>
 #include "cifsglob.h"
@@ -211,12 +212,12 @@ static int dfscache_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, dfscache_proc_show, NULL);
 }
 
-const struct file_operations dfscache_proc_fops = {
-	.open		= dfscache_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= dfscache_proc_write,
+const struct proc_ops dfscache_proc_ops = {
+	.proc_open	= dfscache_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= dfscache_proc_write,
 };
 
 #ifdef CONFIG_CIFS_DEBUG2
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index 76c732943f5f..99ee44f8ad07 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -24,7 +24,7 @@ struct dfs_cache_tgt_iterator {
 
 extern int dfs_cache_init(void);
 extern void dfs_cache_destroy(void);
-extern const struct file_operations dfscache_proc_fops;
+extern const struct proc_ops dfscache_proc_ops;
 
 extern int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 			  const struct nls_table *nls_codepage, int remap,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b1383c524b98..9ba623b601ec 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -113,6 +113,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
 	}
 
 	 /* revalidate if mtime or size have changed */
+	fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
 	if (timespec64_equal(&inode->i_mtime, &fattr->cf_mtime) &&
 	    cifs_i->server_eof == fattr->cf_eof) {
 		cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
@@ -162,6 +163,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 	cifs_revalidate_cache(inode, fattr);
 
 	spin_lock(&inode->i_lock);
+	fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
+	fattr->cf_atime = timestamp_truncate(fattr->cf_atime, inode);
+	fattr->cf_ctime = timestamp_truncate(fattr->cf_ctime, inode);
 	/* we do not want atime to be less than mtime, it broke some apps */
 	if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime) < 0)
 		inode->i_atime = fattr->cf_mtime;
@@ -329,8 +333,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
 	fattr->cf_uid = cifs_sb->mnt_uid;
 	fattr->cf_gid = cifs_sb->mnt_gid;
-	ktime_get_real_ts64(&fattr->cf_mtime);
-	fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran);
+	ktime_get_coarse_real_ts64(&fattr->cf_mtime);
 	fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
 	fattr->cf_nlink = 2;
 	fattr->cf_flags = CIFS_FATTR_DFS_REFERRAL;
@@ -607,10 +610,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 
 	if (info->LastAccessTime)
 		fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
-	else {
-		ktime_get_real_ts64(&fattr->cf_atime);
-		fattr->cf_atime = timespec64_trunc(fattr->cf_atime, sb->s_time_gran);
-	}
+	else
+		ktime_get_coarse_real_ts64(&fattr->cf_atime);
 
 	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
 	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 680aba9c00d5..fd0b5dd68f9e 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -76,14 +76,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
 	if (ia_valid & ATTR_GID)
 		sd_iattr->ia_gid = iattr->ia_gid;
 	if (ia_valid & ATTR_ATIME)
-		sd_iattr->ia_atime = timestamp_truncate(iattr->ia_atime,
-						      inode);
+		sd_iattr->ia_atime = iattr->ia_atime;
 	if (ia_valid & ATTR_MTIME)
-		sd_iattr->ia_mtime = timestamp_truncate(iattr->ia_mtime,
-						      inode);
+		sd_iattr->ia_mtime = iattr->ia_mtime;
 	if (ia_valid & ATTR_CTIME)
-		sd_iattr->ia_ctime = timestamp_truncate(iattr->ia_ctime,
-						      inode);
+		sd_iattr->ia_ctime = iattr->ia_ctime;
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = iattr->ia_mode;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index b1ea7dfbd149..f8296a82d01d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file)
 	pipe_lock(pipe);
 	pipe->readers++;
 	pipe->writers--;
-	wake_up_interruptible_sync(&pipe->wait);
+	wake_up_interruptible_sync(&pipe->rd_wait);
 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	pipe_unlock(pipe);
 
@@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file)
 	 * We actually want wait_event_freezable() but then we need
 	 * to clear TIF_SIGPENDING and improve dump_interrupted().
 	 */
-	wait_event_interruptible(pipe->wait, pipe->readers == 1);
+	wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
 
 	pipe_lock(pipe);
 	pipe->readers--;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 2f04024c3588..912308600d39 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -534,7 +534,7 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
 		/* check for wrong endianness */
 		if (super->magic == CRAMFS_MAGIC_WEND) {
 			if (!silent)
-				errorf(fc, "cramfs: wrong endianness");
+				errorfc(fc, "wrong endianness");
 			return -EINVAL;
 		}
 
@@ -546,22 +546,22 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
 		mutex_unlock(&read_mutex);
 		if (super->magic != CRAMFS_MAGIC) {
 			if (super->magic == CRAMFS_MAGIC_WEND && !silent)
-				errorf(fc, "cramfs: wrong endianness");
+				errorfc(fc, "wrong endianness");
 			else if (!silent)
-				errorf(fc, "cramfs: wrong magic");
+				errorfc(fc, "wrong magic");
 			return -EINVAL;
 		}
 	}
 
 	/* get feature flags first */
 	if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) {
-		errorf(fc, "cramfs: unsupported filesystem features");
+		errorfc(fc, "unsupported filesystem features");
 		return -EINVAL;
 	}
 
 	/* Check that the root inode is in a sane state */
 	if (!S_ISDIR(super->root.mode)) {
-		errorf(fc, "cramfs: root is not a directory");
+		errorfc(fc, "root is not a directory");
 		return -EINVAL;
 	}
 	/* correct strange, hard-coded permissions of mkcramfs */
@@ -580,12 +580,12 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
 	sbi->magic = super->magic;
 	sbi->flags = super->flags;
 	if (root_offset == 0)
-		infof(fc, "cramfs: empty filesystem");
+		infofc(fc, "empty filesystem");
 	else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
 		 ((root_offset != sizeof(struct cramfs_super)) &&
 		  (root_offset != 512 + sizeof(struct cramfs_super))))
 	{
-		errorf(fc, "cramfs: bad root offset %lu", root_offset);
+		errorfc(fc, "bad root offset %lu", root_offset);
 		return -EINVAL;
 	}
 
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index dc6cffc4feba..e742dfc66933 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -332,7 +332,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 		parent = debugfs_mount->mnt_root;
 
 	inode_lock(d_inode(parent));
-	dentry = lookup_one_len(name, parent, strlen(name));
+	if (unlikely(IS_DEADDIR(d_inode(parent))))
+		dentry = ERR_PTR(-ENOENT);
+	else
+		dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
 		if (d_is_dir(dentry))
 			pr_err("Directory '%s' with parent '%s' already present!\n",
@@ -681,62 +684,15 @@ static void __debugfs_file_removed(struct dentry *dentry)
 		wait_for_completion(&fsd->active_users_drained);
 }
 
-static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
-{
-	int ret = 0;
-
-	if (simple_positive(dentry)) {
-		dget(dentry);
-		if (d_is_dir(dentry)) {
-			ret = simple_rmdir(d_inode(parent), dentry);
-			if (!ret)
-				fsnotify_rmdir(d_inode(parent), dentry);
-		} else {
-			simple_unlink(d_inode(parent), dentry);
-			fsnotify_unlink(d_inode(parent), dentry);
-		}
-		if (!ret)
-			d_delete(dentry);
-		if (d_is_reg(dentry))
-			__debugfs_file_removed(dentry);
-		dput(dentry);
-	}
-	return ret;
-}
-
-/**
- * debugfs_remove - removes a file or directory from the debugfs filesystem
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.  If this parameter is NULL or an error value, nothing
- *          will be done.
- *
- * This function removes a file or directory in debugfs that was previously
- * created with a call to another debugfs function (like
- * debugfs_create_file() or variants thereof.)
- *
- * This function is required to be called in order for the file to be
- * removed, no automatic cleanup of files will happen when a module is
- * removed, you are responsible here.
- */
-void debugfs_remove(struct dentry *dentry)
+static void remove_one(struct dentry *victim)
 {
-	struct dentry *parent;
-	int ret;
-
-	if (IS_ERR_OR_NULL(dentry))
-		return;
-
-	parent = dentry->d_parent;
-	inode_lock(d_inode(parent));
-	ret = __debugfs_remove(dentry, parent);
-	inode_unlock(d_inode(parent));
-	if (!ret)
-		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+        if (d_is_reg(victim))
+		__debugfs_file_removed(victim);
+	simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
-EXPORT_SYMBOL_GPL(debugfs_remove);
 
 /**
- * debugfs_remove_recursive - recursively removes a directory
+ * debugfs_remove - recursively removes a directory
  * @dentry: a pointer to a the dentry of the directory to be removed.  If this
  *          parameter is NULL or an error value, nothing will be done.
  *
@@ -748,65 +704,16 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
  * removed, no automatic cleanup of files will happen when a module is
  * removed, you are responsible here.
  */
-void debugfs_remove_recursive(struct dentry *dentry)
+void debugfs_remove(struct dentry *dentry)
 {
-	struct dentry *child, *parent;
-
 	if (IS_ERR_OR_NULL(dentry))
 		return;
 
-	parent = dentry;
- down:
-	inode_lock(d_inode(parent));
- loop:
-	/*
-	 * The parent->d_subdirs is protected by the d_lock. Outside that
-	 * lock, the child can be unlinked and set to be freed which can
-	 * use the d_u.d_child as the rcu head and corrupt this list.
-	 */
-	spin_lock(&parent->d_lock);
-	list_for_each_entry(child, &parent->d_subdirs, d_child) {
-		if (!simple_positive(child))
-			continue;
-
-		/* perhaps simple_empty(child) makes more sense */
-		if (!list_empty(&child->d_subdirs)) {
-			spin_unlock(&parent->d_lock);
-			inode_unlock(d_inode(parent));
-			parent = child;
-			goto down;
-		}
-
-		spin_unlock(&parent->d_lock);
-
-		if (!__debugfs_remove(child, parent))
-			simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-
-		/*
-		 * The parent->d_lock protects agaist child from unlinking
-		 * from d_subdirs. When releasing the parent->d_lock we can
-		 * no longer trust that the next pointer is valid.
-		 * Restart the loop. We'll skip this one with the
-		 * simple_positive() check.
-		 */
-		goto loop;
-	}
-	spin_unlock(&parent->d_lock);
-
-	inode_unlock(d_inode(parent));
-	child = parent;
-	parent = parent->d_parent;
-	inode_lock(d_inode(parent));
-
-	if (child != dentry)
-		/* go up */
-		goto loop;
-
-	if (!__debugfs_remove(child, parent))
-		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-	inode_unlock(d_inode(parent));
+	simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count);
+	simple_recursive_removal(dentry, remove_one);
+	simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
-EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
+EXPORT_SYMBOL_GPL(debugfs_remove);
 
 /**
  * debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index cffa0c1ec829..019572c6b39a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -524,16 +524,12 @@ out:
 
 static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
 {
-	int rc = 0;
-	struct inode *inode;
-	struct inode *lower_inode;
-
-	inode = (struct inode *)mapping->host;
-	lower_inode = ecryptfs_inode_to_lower(inode);
-	if (lower_inode->i_mapping->a_ops->bmap)
-		rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping,
-							 block);
-	return rc;
+	struct inode *lower_inode = ecryptfs_inode_to_lower(mapping->host);
+	int ret = bmap(lower_inode, &block);
+
+	if (ret)
+		return 0;
+	return block;
 }
 
 const struct address_space_operations ecryptfs_aops = {
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8aa0ea8c55e8..78e41c7c3d05 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -24,6 +24,8 @@
 #include <linux/seq_file.h>
 #include <linux/idr.h>
 
+DEFINE_PER_CPU(int, eventfd_wake_count);
+
 static DEFINE_IDA(eventfd_ida);
 
 struct eventfd_ctx {
@@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 {
 	unsigned long flags;
 
+	/*
+	 * Deadlock or stack overflow issues can happen if we recurse here
+	 * through waitqueue wakeup handlers. If the caller users potentially
+	 * nested waitqueues with custom wakeup handlers, then it should
+	 * check eventfd_signal_count() before calling this function. If
+	 * it returns true, the eventfd_signal() call should be deferred to a
+	 * safe context.
+	 */
+	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+		return 0;
+
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
+	this_cpu_inc(eventfd_wake_count);
 	if (ULLONG_MAX - ctx->count < n)
 		n = ULLONG_MAX - ctx->count;
 	ctx->count += n;
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+	this_cpu_dec(eventfd_wake_count);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
 	return n;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 88b213bd32bc..8434217549b3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6043,7 +6043,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 		bh = ext4_bread(handle, inode, blk,
 				EXT4_GET_BLOCKS_CREATE |
 				EXT4_GET_BLOCKS_METADATA_NOFAIL);
-	} while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
+	} while (PTR_ERR(bh) == -ENOSPC &&
 		 ext4_should_retry_alloc(inode->i_sb, &retries));
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8bd9afa81c54..b27b72107911 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3666,12 +3666,16 @@ static int check_swap_activate(struct swap_info_struct *sis,
 			page_no < sis->max) {
 		unsigned block_in_page;
 		sector_t first_block;
+		sector_t block = 0;
+		int	 err = 0;
 
 		cond_resched();
 
-		first_block = bmap(inode, probe_block);
-		if (first_block == 0)
+		block = probe_block;
+		err = bmap(inode, &block);
+		if (err || !block)
 			goto bad_bmap;
+		first_block = block;
 
 		/*
 		 * It must be PAGE_SIZE aligned on-disk
@@ -3683,11 +3687,13 @@ static int check_swap_activate(struct swap_info_struct *sis,
 
 		for (block_in_page = 1; block_in_page < blocks_per_page;
 					block_in_page++) {
-			sector_t block;
 
-			block = bmap(inode, probe_block + block_in_page);
-			if (block == 0)
+			block = probe_block + block_in_page;
+			err = bmap(inode, &block);
+
+			if (err || !block)
 				goto bad_bmap;
+
 			if (block != first_block + block_in_page) {
 				/* Discontiguity */
 				probe_block++;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 86ddbb55d2b1..0d4da644df3b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -829,18 +829,12 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 		inode->i_uid = attr->ia_uid;
 	if (ia_valid & ATTR_GID)
 		inode->i_gid = attr->ia_gid;
-	if (ia_valid & ATTR_ATIME) {
-		inode->i_atime = timestamp_truncate(attr->ia_atime,
-						  inode);
-	}
-	if (ia_valid & ATTR_MTIME) {
-		inode->i_mtime = timestamp_truncate(attr->ia_mtime,
-						  inode);
-	}
-	if (ia_valid & ATTR_CTIME) {
-		inode->i_ctime = timestamp_truncate(attr->ia_ctime,
-						  inode);
-	}
+	if (ia_valid & ATTR_ATIME)
+		inode->i_atime = attr->ia_atime;
+	if (ia_valid & ATTR_MTIME)
+		inode->i_mtime = attr->ia_mtime;
+	if (ia_valid & ATTR_CTIME)
+		inode->i_ctime = attr->ia_ctime;
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3314a0f3405e..9d02cdcdbb07 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -875,7 +875,7 @@ static int truncate_dnode(struct dnode_of_data *dn)
 
 	/* get direct node */
 	page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
-	if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
+	if (PTR_ERR(page) == -ENOENT)
 		return 1;
 	else if (IS_ERR(page))
 		return PTR_ERR(page);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1e08bd54c5fb..f1b2a1fc2a6a 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -271,6 +271,14 @@ static inline struct timespec64 fat_timespec64_trunc_2secs(struct timespec64 ts)
 {
 	return (struct timespec64){ ts.tv_sec & ~1ULL, 0 };
 }
+
+static inline struct timespec64 fat_timespec64_trunc_10ms(struct timespec64 ts)
+{
+	if (ts.tv_nsec)
+		ts.tv_nsec -= ts.tv_nsec % 10000000UL;
+	return ts;
+}
+
 /*
  * truncate the various times with appropriate granularity:
  *   root inode:
@@ -308,7 +316,7 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
 	}
 	if (flags & S_CTIME) {
 		if (sbi->options.isvfat)
-			inode->i_ctime = timespec64_trunc(*now, 10000000);
+			inode->i_ctime = fat_timespec64_trunc_10ms(*now);
 		else
 			inode->i_ctime = fat_timespec64_trunc_2secs(*now);
 	}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 9135646e41ac..77bf5f95362d 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,8 @@ int register_filesystem(struct file_system_type * fs)
 	int res = 0;
 	struct file_system_type ** p;
 
-	if (fs->parameters && !fs_validate_description(fs->parameters))
+	if (fs->parameters &&
+	    !fs_validate_description(fs->name, fs->parameters))
 		return -EINVAL;
 
 	BUG_ON(strchr(fs->name, '.'));
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 138b5b4d621d..fc9f6ef93b55 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -45,6 +45,7 @@ static const struct constant_table common_set_sb_flag[] = {
 	{ "posixacl",	SB_POSIXACL },
 	{ "ro",		SB_RDONLY },
 	{ "sync",	SB_SYNCHRONOUS },
+	{ },
 };
 
 static const struct constant_table common_clear_sb_flag[] = {
@@ -53,6 +54,7 @@ static const struct constant_table common_clear_sb_flag[] = {
 	{ "nomand",	SB_MANDLOCK },
 	{ "rw",		SB_RDONLY },
 	{ "silent",	SB_SILENT },
+	{ },
 };
 
 static const char *const forbidden_sb_flag[] = {
@@ -175,14 +177,15 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key,
 
 	struct fs_parameter param = {
 		.key	= key,
-		.type	= fs_value_is_string,
+		.type	= fs_value_is_flag,
 		.size	= v_size,
 	};
 
-	if (v_size > 0) {
+	if (value) {
 		param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
 		if (!param.string)
 			return -ENOMEM;
+		param.type = fs_value_is_string;
 	}
 
 	ret = vfs_parse_fs_param(fc, &param);
@@ -268,6 +271,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 	fc->fs_type	= get_filesystem(fs_type);
 	fc->cred	= get_current_cred();
 	fc->net_ns	= get_net(current->nsproxy->net_ns);
+	fc->log.prefix	= fs_type->name;
 
 	mutex_init(&fc->uapi_mutex);
 
@@ -361,8 +365,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
 	get_net(fc->net_ns);
 	get_user_ns(fc->user_ns);
 	get_cred(fc->cred);
-	if (fc->log)
-		refcount_inc(&fc->log->usage);
+	if (fc->log.log)
+		refcount_inc(&fc->log.log->usage);
 
 	/* Can't call put until we've called ->dup */
 	ret = fc->ops->dup(fc, src_fc);
@@ -385,64 +389,33 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
  * @fc: The filesystem context to log to.
  * @fmt: The format of the buffer.
  */
-void logfc(struct fs_context *fc, const char *fmt, ...)
+void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
 {
-	static const char store_failure[] = "OOM: Can't store error string";
-	struct fc_log *log = fc ? fc->log : NULL;
-	const char *p;
 	va_list va;
-	char *q;
-	u8 freeable;
+	struct va_format vaf = {.fmt = fmt, .va = &va};
 
 	va_start(va, fmt);
-	if (!strchr(fmt, '%')) {
-		p = fmt;
-		goto unformatted_string;
-	}
-	if (strcmp(fmt, "%s") == 0) {
-		p = va_arg(va, const char *);
-		goto unformatted_string;
-	}
-
-	q = kvasprintf(GFP_KERNEL, fmt, va);
-copied_string:
-	if (!q)
-		goto store_failure;
-	freeable = 1;
-	goto store_string;
-
-unformatted_string:
-	if ((unsigned long)p >= (unsigned long)__start_rodata &&
-	    (unsigned long)p <  (unsigned long)__end_rodata)
-		goto const_string;
-	if (log && within_module_core((unsigned long)p, log->owner))
-		goto const_string;
-	q = kstrdup(p, GFP_KERNEL);
-	goto copied_string;
-
-store_failure:
-	p = store_failure;
-const_string:
-	q = (char *)p;
-	freeable = 0;
-store_string:
 	if (!log) {
-		switch (fmt[0]) {
+		switch (level) {
 		case 'w':
-			printk(KERN_WARNING "%s\n", q + 2);
+			printk(KERN_WARNING "%s%s%pV\n", prefix ? prefix : "",
+						prefix ? ": " : "", &vaf);
 			break;
 		case 'e':
-			printk(KERN_ERR "%s\n", q + 2);
+			printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "",
+						prefix ? ": " : "", &vaf);
 			break;
 		default:
-			printk(KERN_NOTICE "%s\n", q + 2);
+			printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "",
+						prefix ? ": " : "", &vaf);
 			break;
 		}
-		if (freeable)
-			kfree(q);
 	} else {
 		unsigned int logsize = ARRAY_SIZE(log->buffer);
 		u8 index;
+		char *q = kasprintf(GFP_KERNEL, "%c %s%s%pV\n", level,
+						prefix ? prefix : "",
+						prefix ? ": " : "", &vaf);
 
 		index = log->head & (logsize - 1);
 		BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
@@ -454,9 +427,11 @@ store_string:
 			log->tail++;
 		}
 
-		log->buffer[index] = q;
-		log->need_free &= ~(1 << index);
-		log->need_free |= freeable << index;
+		log->buffer[index] = q ? q : "OOM: Can't store error string";
+		if (q)
+			log->need_free |= 1 << index;
+		else
+			log->need_free &= ~(1 << index);
 		log->head++;
 	}
 	va_end(va);
@@ -468,12 +443,12 @@ EXPORT_SYMBOL(logfc);
  */
 static void put_fc_log(struct fs_context *fc)
 {
-	struct fc_log *log = fc->log;
+	struct fc_log *log = fc->log.log;
 	int i;
 
 	if (log) {
 		if (refcount_dec_and_test(&log->usage)) {
-			fc->log = NULL;
+			fc->log.log = NULL;
 			for (i = 0; i <= 7; i++)
 				if (log->need_free & (1 << i))
 					kfree(log->buffer[i]);
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index d1930adce68d..7e6fb43f9541 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -20,42 +20,66 @@ static const struct constant_table bool_names[] = {
 	{ "no",		false },
 	{ "true",	true },
 	{ "yes",	true },
+	{ },
 };
 
+static const struct constant_table *
+__lookup_constant(const struct constant_table *tbl, const char *name)
+{
+	for ( ; tbl->name; tbl++)
+		if (strcmp(name, tbl->name) == 0)
+			return tbl;
+	return NULL;
+}
+
 /**
  * lookup_constant - Look up a constant by name in an ordered table
  * @tbl: The table of constants to search.
- * @tbl_size: The size of the table.
  * @name: The name to look up.
  * @not_found: The value to return if the name is not found.
  */
-int __lookup_constant(const struct constant_table *tbl, size_t tbl_size,
-		      const char *name, int not_found)
+int lookup_constant(const struct constant_table *tbl, const char *name, int not_found)
 {
-	unsigned int i;
+	const struct constant_table *p = __lookup_constant(tbl, name);
 
-	for (i = 0; i < tbl_size; i++)
-		if (strcmp(name, tbl[i].name) == 0)
-			return tbl[i].value;
+	return p ? p->value : not_found;
+}
+EXPORT_SYMBOL(lookup_constant);
 
-	return not_found;
+static inline bool is_flag(const struct fs_parameter_spec *p)
+{
+	return p->type == NULL;
 }
-EXPORT_SYMBOL(__lookup_constant);
 
 static const struct fs_parameter_spec *fs_lookup_key(
-	const struct fs_parameter_description *desc,
-	const char *name)
+	const struct fs_parameter_spec *desc,
+	struct fs_parameter *param, bool *negated)
 {
-	const struct fs_parameter_spec *p;
-
-	if (!desc->specs)
-		return NULL;
-
-	for (p = desc->specs; p->name; p++)
-		if (strcmp(p->name, name) == 0)
+	const struct fs_parameter_spec *p, *other = NULL;
+	const char *name = param->key;
+	bool want_flag = param->type == fs_value_is_flag;
+
+	*negated = false;
+	for (p = desc; p->name; p++) {
+		if (strcmp(p->name, name) != 0)
+			continue;
+		if (likely(is_flag(p) == want_flag))
 			return p;
-
-	return NULL;
+		other = p;
+	}
+	if (want_flag) {
+		if (name[0] == 'n' && name[1] == 'o' && name[2]) {
+			for (p = desc; p->name; p++) {
+				if (strcmp(p->name, name + 2) != 0)
+					continue;
+				if (!(p->flags & fs_param_neg_with_no))
+					continue;
+				*negated = true;
+				return p;
+			}
+		}
+	}
+	return other;
 }
 
 /*
@@ -76,172 +100,38 @@ static const struct fs_parameter_spec *fs_lookup_key(
  * unknown parameters are okay and -EINVAL if there was a conversion issue or
  * the parameter wasn't recognised and unknowns aren't okay.
  */
-int fs_parse(struct fs_context *fc,
-	     const struct fs_parameter_description *desc,
+int __fs_parse(struct p_log *log,
+	     const struct fs_parameter_spec *desc,
 	     struct fs_parameter *param,
 	     struct fs_parse_result *result)
 {
 	const struct fs_parameter_spec *p;
-	const struct fs_parameter_enum *e;
-	int ret = -ENOPARAM, b;
 
-	result->has_value = !!param->string;
-	result->negated = false;
 	result->uint_64 = 0;
 
-	p = fs_lookup_key(desc, param->key);
-	if (!p) {
-		/* If we didn't find something that looks like "noxxx", see if
-		 * "xxx" takes the "no"-form negative - but only if there
-		 * wasn't an value.
-		 */
-		if (result->has_value)
-			goto unknown_parameter;
-		if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2])
-			goto unknown_parameter;
-
-		p = fs_lookup_key(desc, param->key + 2);
-		if (!p)
-			goto unknown_parameter;
-		if (!(p->flags & fs_param_neg_with_no))
-			goto unknown_parameter;
-		result->boolean = false;
-		result->negated = true;
-	}
+	p = fs_lookup_key(desc, param, &result->negated);
+	if (!p)
+		return -ENOPARAM;
 
 	if (p->flags & fs_param_deprecated)
-		warnf(fc, "%s: Deprecated parameter '%s'",
-		      desc->name, param->key);
-
-	if (result->negated)
-		goto okay;
-
-	/* Certain parameter types only take a string and convert it. */
-	switch (p->type) {
-	case __fs_param_wasnt_defined:
-		return -EINVAL;
-	case fs_param_is_u32:
-	case fs_param_is_u32_octal:
-	case fs_param_is_u32_hex:
-	case fs_param_is_s32:
-	case fs_param_is_u64:
-	case fs_param_is_enum:
-	case fs_param_is_string:
-		if (param->type != fs_value_is_string)
-			goto bad_value;
-		if (!result->has_value) {
-			if (p->flags & fs_param_v_optional)
-				goto okay;
-			goto bad_value;
-		}
-		/* Fall through */
-	default:
-		break;
-	}
+		warn_plog(log, "Deprecated parameter '%s'", param->key);
 
 	/* Try to turn the type we were given into the type desired by the
 	 * parameter and give an error if we can't.
 	 */
-	switch (p->type) {
-	case fs_param_is_flag:
-		if (param->type != fs_value_is_flag &&
-		    (param->type != fs_value_is_string || result->has_value))
-			return invalf(fc, "%s: Unexpected value for '%s'",
-				      desc->name, param->key);
-		result->boolean = true;
-		goto okay;
-
-	case fs_param_is_bool:
-		switch (param->type) {
-		case fs_value_is_flag:
-			result->boolean = true;
-			goto okay;
-		case fs_value_is_string:
-			if (param->size == 0) {
-				result->boolean = true;
-				goto okay;
-			}
-			b = lookup_constant(bool_names, param->string, -1);
-			if (b == -1)
-				goto bad_value;
-			result->boolean = b;
-			goto okay;
-		default:
-			goto bad_value;
-		}
-
-	case fs_param_is_u32:
-		ret = kstrtouint(param->string, 0, &result->uint_32);
-		goto maybe_okay;
-	case fs_param_is_u32_octal:
-		ret = kstrtouint(param->string, 8, &result->uint_32);
-		goto maybe_okay;
-	case fs_param_is_u32_hex:
-		ret = kstrtouint(param->string, 16, &result->uint_32);
-		goto maybe_okay;
-	case fs_param_is_s32:
-		ret = kstrtoint(param->string, 0, &result->int_32);
-		goto maybe_okay;
-	case fs_param_is_u64:
-		ret = kstrtoull(param->string, 0, &result->uint_64);
-		goto maybe_okay;
-
-	case fs_param_is_enum:
-		for (e = desc->enums; e->name[0]; e++) {
-			if (e->opt == p->opt &&
-			    strcmp(e->name, param->string) == 0) {
-				result->uint_32 = e->value;
-				goto okay;
-			}
-		}
-		goto bad_value;
-
-	case fs_param_is_string:
-		goto okay;
-	case fs_param_is_blob:
-		if (param->type != fs_value_is_blob)
-			goto bad_value;
-		goto okay;
-
-	case fs_param_is_fd: {
-		switch (param->type) {
-		case fs_value_is_string:
-			if (!result->has_value)
-				goto bad_value;
-
-			ret = kstrtouint(param->string, 0, &result->uint_32);
-			break;
-		case fs_value_is_file:
-			result->uint_32 = param->dirfd;
-			ret = 0;
-		default:
-			goto bad_value;
-		}
-
-		if (result->uint_32 > INT_MAX)
-			goto bad_value;
-		goto maybe_okay;
-	}
-
-	case fs_param_is_blockdev:
-	case fs_param_is_path:
-		goto okay;
-	default:
-		BUG();
+	if (is_flag(p)) {
+		if (param->type != fs_value_is_flag)
+			return inval_plog(log, "Unexpected value for '%s'",
+				      param->key);
+		result->boolean = !result->negated;
+	} else  {
+		int ret = p->type(log, p, param, result);
+		if (ret)
+			return ret;
 	}
-
-maybe_okay:
-	if (ret < 0)
-		goto bad_value;
-okay:
 	return p->opt;
-
-bad_value:
-	return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key);
-unknown_parameter:
-	return -ENOPARAM;
 }
-EXPORT_SYMBOL(fs_parse);
+EXPORT_SYMBOL(__fs_parse);
 
 /**
  * fs_lookup_param - Look up a path referred to by a parameter
@@ -267,9 +157,6 @@ int fs_lookup_param(struct fs_context *fc,
 			return PTR_ERR(f);
 		put_f = true;
 		break;
-	case fs_value_is_filename_empty:
-		flags = LOOKUP_EMPTY;
-		/* Fall through */
 	case fs_value_is_filename:
 		f = param->name;
 		put_f = false;
@@ -302,6 +189,124 @@ out:
 }
 EXPORT_SYMBOL(fs_lookup_param);
 
+int fs_param_bad_value(struct p_log *log, struct fs_parameter *param)
+{
+	return inval_plog(log, "Bad value for '%s'", param->key);
+}
+
+int fs_param_is_bool(struct p_log *log, const struct fs_parameter_spec *p,
+		     struct fs_parameter *param, struct fs_parse_result *result)
+{
+	int b;
+	if (param->type != fs_value_is_string)
+		return fs_param_bad_value(log, param);
+	b = lookup_constant(bool_names, param->string, -1);
+	if (b == -1)
+		return fs_param_bad_value(log, param);
+	result->boolean = b;
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_bool);
+
+int fs_param_is_u32(struct p_log *log, const struct fs_parameter_spec *p,
+		    struct fs_parameter *param, struct fs_parse_result *result)
+{
+	int base = (unsigned long)p->data;
+	if (param->type != fs_value_is_string ||
+	    kstrtouint(param->string, base, &result->uint_32) < 0)
+		return fs_param_bad_value(log, param);
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_u32);
+
+int fs_param_is_s32(struct p_log *log, const struct fs_parameter_spec *p,
+		    struct fs_parameter *param, struct fs_parse_result *result)
+{
+	if (param->type != fs_value_is_string ||
+	    kstrtoint(param->string, 0, &result->int_32) < 0)
+		return fs_param_bad_value(log, param);
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_s32);
+
+int fs_param_is_u64(struct p_log *log, const struct fs_parameter_spec *p,
+		    struct fs_parameter *param, struct fs_parse_result *result)
+{
+	if (param->type != fs_value_is_string ||
+	    kstrtoull(param->string, 0, &result->uint_64) < 0)
+		return fs_param_bad_value(log, param);
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_u64);
+
+int fs_param_is_enum(struct p_log *log, const struct fs_parameter_spec *p,
+		     struct fs_parameter *param, struct fs_parse_result *result)
+{
+	const struct constant_table *c;
+	if (param->type != fs_value_is_string)
+		return fs_param_bad_value(log, param);
+	c = __lookup_constant(p->data, param->string);
+	if (!c)
+		return fs_param_bad_value(log, param);
+	result->uint_32 = c->value;
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_enum);
+
+int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p,
+		       struct fs_parameter *param, struct fs_parse_result *result)
+{
+	if (param->type != fs_value_is_string || !*param->string)
+		return fs_param_bad_value(log, param);
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_string);
+
+int fs_param_is_blob(struct p_log *log, const struct fs_parameter_spec *p,
+		     struct fs_parameter *param, struct fs_parse_result *result)
+{
+	if (param->type != fs_value_is_blob)
+		return fs_param_bad_value(log, param);
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_blob);
+
+int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
+		  struct fs_parameter *param, struct fs_parse_result *result)
+{
+	switch (param->type) {
+	case fs_value_is_string:
+		if (kstrtouint(param->string, 0, &result->uint_32) < 0)
+			break;
+		if (result->uint_32 <= INT_MAX)
+			return 0;
+		break;
+	case fs_value_is_file:
+		result->uint_32 = param->dirfd;
+		if (result->uint_32 <= INT_MAX)
+			return 0;
+		break;
+	default:
+		break;
+	}
+	return fs_param_bad_value(log, param);
+}
+EXPORT_SYMBOL(fs_param_is_fd);
+
+int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
+		  struct fs_parameter *param, struct fs_parse_result *result)
+{
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_blockdev);
+
+int fs_param_is_path(struct p_log *log, const struct fs_parameter_spec *p,
+		     struct fs_parameter *param, struct fs_parse_result *result)
+{
+	return 0;
+}
+EXPORT_SYMBOL(fs_param_is_path);
+
 #ifdef CONFIG_VALIDATE_FS_PARSER
 /**
  * validate_constant_table - Validate a constant table
@@ -357,102 +362,26 @@ bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
  * fs_validate_description - Validate a parameter description
  * @desc: The parameter description to validate.
  */
-bool fs_validate_description(const struct fs_parameter_description *desc)
+bool fs_validate_description(const char *name,
+	const struct fs_parameter_spec *desc)
 {
 	const struct fs_parameter_spec *param, *p2;
-	const struct fs_parameter_enum *e;
-	const char *name = desc->name;
-	unsigned int nr_params = 0;
-	bool good = true, enums = false;
+	bool good = true;
 
 	pr_notice("*** VALIDATE %s ***\n", name);
 
-	if (!name[0]) {
-		pr_err("VALIDATE Parser: No name\n");
-		name = "Unknown";
-		good = false;
-	}
-
-	if (desc->specs) {
-		for (param = desc->specs; param->name; param++) {
-			enum fs_parameter_type t = param->type;
-
-			/* Check that the type is in range */
-			if (t == __fs_param_wasnt_defined ||
-			    t >= nr__fs_parameter_type) {
-				pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n",
-				       name, param->name, t);
-				good = false;
-			} else if (t == fs_param_is_enum) {
-				enums = true;
-			}
-
-			/* Check for duplicate parameter names */
-			for (p2 = desc->specs; p2 < param; p2++) {
-				if (strcmp(param->name, p2->name) == 0) {
-					pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
-					       name, param->name);
-					good = false;
-				}
-			}
-		}
-
-		nr_params = param - desc->specs;
-	}
-
-	if (desc->enums) {
-		if (!nr_params) {
-			pr_err("VALIDATE %s: Enum table but no parameters\n",
-			       name);
-			good = false;
-			goto no_enums;
-		}
-		if (!enums) {
-			pr_err("VALIDATE %s: Enum table but no enum-type values\n",
-			       name);
-			good = false;
-			goto no_enums;
-		}
-
-		for (e = desc->enums; e->name[0]; e++) {
-			/* Check that all entries in the enum table have at
-			 * least one parameter that uses them.
-			 */
-			for (param = desc->specs; param->name; param++) {
-				if (param->opt == e->opt &&
-				    param->type != fs_param_is_enum) {
-					pr_err("VALIDATE %s: e[%tu] enum val for %s\n",
-					       name, e - desc->enums, param->name);
-					good = false;
-				}
-			}
-		}
-
-		/* Check that all enum-type parameters have at least one enum
-		 * value in the enum table.
-		 */
-		for (param = desc->specs; param->name; param++) {
-			if (param->type != fs_param_is_enum)
-				continue;
-			for (e = desc->enums; e->name[0]; e++)
-				if (e->opt == param->opt)
-					break;
-			if (!e->name[0]) {
-				pr_err("VALIDATE %s: PARAM[%s] enum with no values\n",
+	for (param = desc; param->name; param++) {
+		/* Check for duplicate parameter names */
+		for (p2 = desc; p2 < param; p2++) {
+			if (strcmp(param->name, p2->name) == 0) {
+				if (is_flag(param) != is_flag(p2))
+					continue;
+				pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
 				       name, param->name);
 				good = false;
 			}
 		}
-	} else {
-		if (enums) {
-			pr_err("VALIDATE %s: enum-type values, but no enum table\n",
-			       name);
-			good = false;
-			goto no_enums;
-		}
 	}
-
-no_enums:
 	return good;
 }
 #endif /* CONFIG_VALIDATE_FS_PARSER */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 9616af3768e1..08e91efbce53 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -111,7 +111,7 @@ extern void fscache_enqueue_object(struct fscache_object *);
  * object-list.c
  */
 #ifdef CONFIG_FSCACHE_OBJECT_LIST
-extern const struct file_operations fscache_objlist_fops;
+extern const struct proc_ops fscache_objlist_proc_ops;
 
 extern void fscache_objlist_add(struct fscache_object *);
 extern void fscache_objlist_remove(struct fscache_object *);
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 72ebfe578f40..e106a1a1600d 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -7,6 +7,7 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
+#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/key.h>
@@ -405,9 +406,9 @@ static int fscache_objlist_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-const struct file_operations fscache_objlist_fops = {
-	.open		= fscache_objlist_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= fscache_objlist_release,
+const struct proc_ops fscache_objlist_proc_ops = {
+	.proc_open	= fscache_objlist_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= fscache_objlist_release,
 };
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 5523446e2952..90a7bc22f7e1 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -35,7 +35,7 @@ int __init fscache_proc_init(void)
 
 #ifdef CONFIG_FSCACHE_OBJECT_LIST
 	if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL,
-			 &fscache_objlist_fops))
+			 &fscache_objlist_proc_ops))
 		goto error_objects;
 #endif
 
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 043ffa8dc263..2fa3f241b762 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -25,7 +25,7 @@ static ssize_t fscontext_read(struct file *file,
 			      char __user *_buf, size_t len, loff_t *pos)
 {
 	struct fs_context *fc = file->private_data;
-	struct fc_log *log = fc->log;
+	struct fc_log *log = fc->log.log;
 	unsigned int logsize = ARRAY_SIZE(log->buffer);
 	ssize_t ret;
 	char *p;
@@ -97,11 +97,11 @@ static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
 
 static int fscontext_alloc_log(struct fs_context *fc)
 {
-	fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
-	if (!fc->log)
+	fc->log.log = kzalloc(sizeof(*fc->log.log), GFP_KERNEL);
+	if (!fc->log.log)
 		return -ENOMEM;
-	refcount_set(&fc->log->usage, 1);
-	fc->log->owner = fc->fs_type->owner;
+	refcount_set(&fc->log.log->usage, 1);
+	fc->log.log->owner = fc->fs_type->owner;
 	return 0;
 }
 
@@ -321,6 +321,7 @@ SYSCALL_DEFINE5(fsconfig,
 	struct fs_context *fc;
 	struct fd f;
 	int ret;
+	int lookup_flags = 0;
 
 	struct fs_parameter param = {
 		.type	= fs_value_is_undefined,
@@ -409,19 +410,12 @@ SYSCALL_DEFINE5(fsconfig,
 			goto out_key;
 		}
 		break;
+	case FSCONFIG_SET_PATH_EMPTY:
+		lookup_flags = LOOKUP_EMPTY;
+		/* fallthru */
 	case FSCONFIG_SET_PATH:
 		param.type = fs_value_is_filename;
-		param.name = getname_flags(_value, 0, NULL);
-		if (IS_ERR(param.name)) {
-			ret = PTR_ERR(param.name);
-			goto out_key;
-		}
-		param.dirfd = aux;
-		param.size = strlen(param.name->name);
-		break;
-	case FSCONFIG_SET_PATH_EMPTY:
-		param.type = fs_value_is_filename_empty;
-		param.name = getname_flags(_value, LOOKUP_EMPTY, NULL);
+		param.name = getname_flags(_value, lookup_flags, NULL);
 		if (IS_ERR(param.name)) {
 			ret = PTR_ERR(param.name);
 			goto out_key;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 00015d851382..030f094910c3 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -451,8 +451,8 @@ static int cuse_send_init(struct cuse_conn *cc)
 	ap->args.out_args[0].size = sizeof(ia->out);
 	ap->args.out_args[0].value = &ia->out;
 	ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
-	ap->args.out_argvar = 1;
-	ap->args.out_pages = 1;
+	ap->args.out_argvar = true;
+	ap->args.out_pages = true;
 	ap->num_pages = 1;
 	ap->pages = &ia->page;
 	ap->descs = &ia->desc;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ee190119f45c..de1e2fde60bd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,7 +818,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
 	struct fuse_conn *fc = get_fuse_conn(olddir);
 	int err;
 
-	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
 	if (flags) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ce715380143c..9d67b830fb7a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -803,6 +803,10 @@ static int fuse_do_readpage(struct file *file, struct page *page)
 
 	attr_ver = fuse_get_attr_version(fc);
 
+	/* Don't overflow end offset */
+	if (pos + (desc.length - 1) == LLONG_MAX)
+		desc.length--;
+
 	fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
 	res = fuse_simple_request(fc, &ia.ap.args);
 	if (res < 0)
@@ -888,6 +892,14 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
 	ap->args.out_pages = true;
 	ap->args.page_zeroing = true;
 	ap->args.page_replace = true;
+
+	/* Don't overflow end offset */
+	if (pos + (count - 1) == LLONG_MAX) {
+		count--;
+		ap->descs[ap->num_pages - 1].length--;
+	}
+	WARN_ON((loff_t) (pos + count) < 0);
+
 	fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
 	ia->read.attr_ver = fuse_get_attr_version(fc);
 	if (fc->async_read) {
@@ -1397,9 +1409,9 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
 	}
 
 	if (write)
-		ap->args.in_pages = 1;
+		ap->args.in_pages = true;
 	else
-		ap->args.out_pages = 1;
+		ap->args.out_pages = true;
 
 	*nbytesp = nbytes;
 
@@ -1465,6 +1477,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 		}
 		ia = NULL;
 		if (nres < 0) {
+			iov_iter_revert(iter, nbytes);
 			err = nres;
 			break;
 		}
@@ -1473,8 +1486,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 		count -= nres;
 		res += nres;
 		pos += nres;
-		if (nres != nbytes)
+		if (nres != nbytes) {
+			iov_iter_revert(iter, nbytes - nres);
 			break;
+		}
 		if (count) {
 			max_pages = iov_iter_npages(iter, fc->max_pages);
 			ia = fuse_io_alloc(io, max_pages);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 16aec32f7f3d..95d712d44ca1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -448,7 +448,7 @@ enum {
 	OPT_ERR
 };
 
-static const struct fs_parameter_spec fuse_param_specs[] = {
+static const struct fs_parameter_spec fuse_fs_parameters[] = {
 	fsparam_string	("source",		OPT_SOURCE),
 	fsparam_u32	("fd",			OPT_FD),
 	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
@@ -462,68 +462,63 @@ static const struct fs_parameter_spec fuse_param_specs[] = {
 	{}
 };
 
-static const struct fs_parameter_description fuse_fs_parameters = {
-	.name		= "fuse",
-	.specs		= fuse_param_specs,
-};
-
 static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct fs_parse_result result;
 	struct fuse_fs_context *ctx = fc->fs_private;
 	int opt;
 
-	opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
+	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
 	switch (opt) {
 	case OPT_SOURCE:
 		if (fc->source)
-			return invalf(fc, "fuse: Multiple sources specified");
+			return invalfc(fc, "Multiple sources specified");
 		fc->source = param->string;
 		param->string = NULL;
 		break;
 
 	case OPT_SUBTYPE:
 		if (ctx->subtype)
-			return invalf(fc, "fuse: Multiple subtypes specified");
+			return invalfc(fc, "Multiple subtypes specified");
 		ctx->subtype = param->string;
 		param->string = NULL;
 		return 0;
 
 	case OPT_FD:
 		ctx->fd = result.uint_32;
-		ctx->fd_present = 1;
+		ctx->fd_present = true;
 		break;
 
 	case OPT_ROOTMODE:
 		if (!fuse_valid_type(result.uint_32))
-			return invalf(fc, "fuse: Invalid rootmode");
+			return invalfc(fc, "Invalid rootmode");
 		ctx->rootmode = result.uint_32;
-		ctx->rootmode_present = 1;
+		ctx->rootmode_present = true;
 		break;
 
 	case OPT_USER_ID:
 		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
 		if (!uid_valid(ctx->user_id))
-			return invalf(fc, "fuse: Invalid user_id");
-		ctx->user_id_present = 1;
+			return invalfc(fc, "Invalid user_id");
+		ctx->user_id_present = true;
 		break;
 
 	case OPT_GROUP_ID:
 		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
 		if (!gid_valid(ctx->group_id))
-			return invalf(fc, "fuse: Invalid group_id");
-		ctx->group_id_present = 1;
+			return invalfc(fc, "Invalid group_id");
+		ctx->group_id_present = true;
 		break;
 
 	case OPT_DEFAULT_PERMISSIONS:
-		ctx->default_permissions = 1;
+		ctx->default_permissions = true;
 		break;
 
 	case OPT_ALLOW_OTHER:
-		ctx->allow_other = 1;
+		ctx->allow_other = true;
 		break;
 
 	case OPT_MAX_READ:
@@ -532,7 +527,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 
 	case OPT_BLKSIZE:
 		if (!ctx->is_bdev)
-			return invalf(fc, "fuse: blksize only supported for fuseblk");
+			return invalfc(fc, "blksize only supported for fuseblk");
 		ctx->blksize = result.uint_32;
 		break;
 
@@ -997,7 +992,7 @@ void fuse_send_init(struct fuse_conn *fc)
 	/* Variable length argument used for backward compatibility
 	   with interface version < 7.5.  Rest of init_out is zeroed
 	   by do_get_request(), so a short reply is not a problem */
-	ia->args.out_argvar = 1;
+	ia->args.out_argvar = true;
 	ia->args.out_args[0].size = sizeof(ia->out);
 	ia->args.out_args[0].value = &ia->out;
 	ia->args.force = true;
@@ -1347,7 +1342,7 @@ static struct file_system_type fuse_fs_type = {
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
 	.init_fs_context = fuse_init_fs_context,
-	.parameters	= &fuse_fs_parameters,
+	.parameters	= fuse_fs_parameters,
 	.kill_sb	= fuse_kill_sb_anon,
 };
 MODULE_ALIAS_FS("fuse");
@@ -1363,7 +1358,7 @@ static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
 	.init_fs_context = fuse_init_fs_context,
-	.parameters	= &fuse_fs_parameters,
+	.parameters	= fuse_fs_parameters,
 	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 6a40f75a0d25..90e3f01bd796 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -332,7 +332,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 		return -ENOMEM;
 
 	plus = fuse_use_readdirplus(inode, ctx);
-	ap->args.out_pages = 1;
+	ap->args.out_pages = true;
 	ap->num_pages = 1;
 	ap->pages = &page;
 	ap->descs = &desc;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9d58295ccf7a..cb26be6f4351 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct gfs2_inode *ip = GFS2_I(inode);
-	ssize_t written = 0, ret;
+	ssize_t ret;
 
 	ret = gfs2_rsqa_alloc(ip);
 	if (ret)
@@ -867,68 +867,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret <= 0)
-		goto out;
-
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = inode_to_bdi(inode);
+		goto out_unlock;
 
 	ret = file_remove_privs(file);
 	if (ret)
-		goto out2;
+		goto out_unlock;
 
 	ret = file_update_time(file);
 	if (ret)
-		goto out2;
+		goto out_unlock;
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct address_space *mapping = file->f_mapping;
-		loff_t pos, endbyte;
-		ssize_t buffered;
+		ssize_t buffered, ret2;
 
-		written = gfs2_file_direct_write(iocb, from);
-		if (written < 0 || !iov_iter_count(from))
-			goto out2;
+		ret = gfs2_file_direct_write(iocb, from);
+		if (ret < 0 || !iov_iter_count(from))
+			goto out_unlock;
 
-		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
-		if (unlikely(ret < 0))
-			goto out2;
-		buffered = ret;
+		iocb->ki_flags |= IOCB_DSYNC;
+		current->backing_dev_info = inode_to_bdi(inode);
+		buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+		current->backing_dev_info = NULL;
+		if (unlikely(buffered <= 0))
+			goto out_unlock;
 
 		/*
 		 * We need to ensure that the page cache pages are written to
 		 * disk and invalidated to preserve the expected O_DIRECT
-		 * semantics.
+		 * semantics.  If the writeback or invalidate fails, only report
+		 * the direct I/O range as we don't know if the buffered pages
+		 * made it to disk.
 		 */
-		pos = iocb->ki_pos;
-		endbyte = pos + buffered - 1;
-		ret = filemap_write_and_wait_range(mapping, pos, endbyte);
-		if (!ret) {
-			iocb->ki_pos += buffered;
-			written += buffered;
-			invalidate_mapping_pages(mapping,
-						 pos >> PAGE_SHIFT,
-						 endbyte >> PAGE_SHIFT);
-		} else {
-			/*
-			 * We don't know how much we wrote, so just return
-			 * the number of bytes which were direct-written
-			 */
-		}
+		iocb->ki_pos += buffered;
+		ret2 = generic_write_sync(iocb, buffered);
+		invalidate_mapping_pages(mapping,
+				(iocb->ki_pos - buffered) >> PAGE_SHIFT,
+				(iocb->ki_pos - 1) >> PAGE_SHIFT);
+		if (!ret || ret2 > 0)
+			ret += ret2;
 	} else {
+		current->backing_dev_info = inode_to_bdi(inode);
 		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
-		if (likely(ret > 0))
+		current->backing_dev_info = NULL;
+		if (likely(ret > 0)) {
 			iocb->ki_pos += ret;
+			ret = generic_write_sync(iocb, ret);
+		}
 	}
 
-out2:
-	current->backing_dev_info = NULL;
-out:
+out_unlock:
 	inode_unlock(inode);
-	if (likely(ret > 0)) {
-		/* Handle various SYNC-type writes */
-		ret = generic_write_sync(iocb, ret);
-	}
-	return written ? written : ret;
+	return ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index d9431724b788..c090d5ad3f22 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -422,7 +422,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
 
 	for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
 		if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
-			if (lh.lh_sequence > head->lh_sequence)
+			if (lh.lh_sequence >= head->lh_sequence)
 				*head = lh;
 			else {
 				ret = true;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b3e904bcc02c..a1a8ef7ed3fd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1252,6 +1252,7 @@ enum gfs2_param {
 	Opt_upgrade,
 	Opt_acl,
 	Opt_quota,
+	Opt_quota_flag,
 	Opt_suiddir,
 	Opt_data,
 	Opt_meta,
@@ -1266,17 +1267,11 @@ enum gfs2_param {
 	Opt_loccookie,
 };
 
-enum opt_quota {
-	Opt_quota_unset = 0,
-	Opt_quota_off,
-	Opt_quota_account,
-	Opt_quota_on,
-};
-
-static const unsigned int opt_quota_values[] = {
-	[Opt_quota_off]     = GFS2_QUOTA_OFF,
-	[Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
-	[Opt_quota_on]      = GFS2_QUOTA_ON,
+static const struct constant_table gfs2_param_quota[] = {
+	{"off",        GFS2_QUOTA_OFF},
+	{"account",    GFS2_QUOTA_ACCOUNT},
+	{"on",         GFS2_QUOTA_ON},
+	{}
 };
 
 enum opt_data {
@@ -1284,12 +1279,24 @@ enum opt_data {
 	Opt_data_ordered   = GFS2_DATA_ORDERED,
 };
 
+static const struct constant_table gfs2_param_data[] = {
+	{"writeback",  Opt_data_writeback },
+	{"ordered",    Opt_data_ordered },
+	{}
+};
+
 enum opt_errors {
 	Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
 	Opt_errors_panic    = GFS2_ERRORS_PANIC,
 };
 
-static const struct fs_parameter_spec gfs2_param_specs[] = {
+static const struct constant_table gfs2_param_errors[] = {
+	{"withdraw",   Opt_errors_withdraw },
+	{"panic",      Opt_errors_panic },
+	{}
+};
+
+static const struct fs_parameter_spec gfs2_fs_parameters[] = {
 	fsparam_string ("lockproto",          Opt_lockproto),
 	fsparam_string ("locktable",          Opt_locktable),
 	fsparam_string ("hostdata",           Opt_hostdata),
@@ -1302,11 +1309,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = {
 	fsparam_flag   ("upgrade",            Opt_upgrade),
 	fsparam_flag_no("acl",                Opt_acl),
 	fsparam_flag_no("suiddir",            Opt_suiddir),
-	fsparam_enum   ("data",               Opt_data),
+	fsparam_enum   ("data",               Opt_data, gfs2_param_data),
 	fsparam_flag   ("meta",               Opt_meta),
 	fsparam_flag_no("discard",            Opt_discard),
 	fsparam_s32    ("commit",             Opt_commit),
-	fsparam_enum   ("errors",             Opt_errors),
+	fsparam_enum   ("errors",             Opt_errors, gfs2_param_errors),
 	fsparam_s32    ("statfs_quantum",     Opt_statfs_quantum),
 	fsparam_s32    ("statfs_percent",     Opt_statfs_percent),
 	fsparam_s32    ("quota_quantum",      Opt_quota_quantum),
@@ -1314,27 +1321,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = {
 	fsparam_flag_no("rgrplvb",            Opt_rgrplvb),
 	fsparam_flag_no("loccookie",          Opt_loccookie),
 	/* quota can be a flag or an enum so it gets special treatment */
-	__fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
-	{}
-};
-
-static const struct fs_parameter_enum gfs2_param_enums[] = {
-	{ Opt_quota,    "off",        Opt_quota_off },
-	{ Opt_quota,    "account",    Opt_quota_account },
-	{ Opt_quota,    "on",         Opt_quota_on },
-	{ Opt_data,     "writeback",  Opt_data_writeback },
-	{ Opt_data,     "ordered",    Opt_data_ordered },
-	{ Opt_errors,   "withdraw",   Opt_errors_withdraw },
-	{ Opt_errors,   "panic",      Opt_errors_panic },
+	fsparam_flag_no("quota",	      Opt_quota_flag),
+	fsparam_enum("quota",		      Opt_quota, gfs2_param_quota),
 	{}
 };
 
-static const struct fs_parameter_description gfs2_fs_parameters = {
-	.name = "gfs2",
-	.specs = gfs2_param_specs,
-	.enums = gfs2_param_enums,
-};
-
 /* Parse a single mount parameter */
 static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
@@ -1342,7 +1333,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	struct fs_parse_result result;
 	int o;
 
-	o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+	o = fs_parse(fc, gfs2_fs_parameters, param, &result);
 	if (o < 0)
 		return o;
 
@@ -1370,7 +1361,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		break;
 	case Opt_debug:
 		if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
-			return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+			return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive");
 		args->ar_debug = result.boolean;
 		break;
 	case Opt_upgrade:
@@ -1379,17 +1370,11 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_acl:
 		args->ar_posix_acl = result.boolean;
 		break;
+	case Opt_quota_flag:
+		args->ar_quota = result.negated ? GFS2_QUOTA_OFF : GFS2_QUOTA_ON;
+		break;
 	case Opt_quota:
-		/* The quota option can be a flag or an enum. A non-zero int_32
-		   result means that we have an enum index. Otherwise we have
-		   to rely on the 'negated' flag to tell us whether 'quota' or
-		   'noquota' was specified. */
-		if (result.negated)
-			args->ar_quota = GFS2_QUOTA_OFF;
-		else if (result.int_32 > 0)
-			args->ar_quota = opt_quota_values[result.int_32];
-		else
-			args->ar_quota = GFS2_QUOTA_ON;
+		args->ar_quota = result.int_32;
 		break;
 	case Opt_suiddir:
 		args->ar_suiddir = result.boolean;
@@ -1406,27 +1391,27 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		break;
 	case Opt_commit:
 		if (result.int_32 <= 0)
-			return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+			return invalfc(fc, "commit mount option requires a positive numeric argument");
 		args->ar_commit = result.int_32;
 		break;
 	case Opt_statfs_quantum:
 		if (result.int_32 < 0)
-			return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+			return invalfc(fc, "statfs_quantum mount option requires a non-negative numeric argument");
 		args->ar_statfs_quantum = result.int_32;
 		break;
 	case Opt_quota_quantum:
 		if (result.int_32 <= 0)
-			return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+			return invalfc(fc, "quota_quantum mount option requires a positive numeric argument");
 		args->ar_quota_quantum = result.int_32;
 		break;
 	case Opt_statfs_percent:
 		if (result.int_32 < 0 || result.int_32 > 100)
-			return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+			return invalfc(fc, "statfs_percent mount option requires a numeric argument between 0 and 100");
 		args->ar_statfs_percent = result.int_32;
 		break;
 	case Opt_errors:
 		if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
-			return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+			return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive");
 		args->ar_errors = result.uint_32;
 		break;
 	case Opt_barrier:
@@ -1439,7 +1424,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		args->ar_loccookie = result.boolean;
 		break;
 	default:
-		return invalf(fc, "gfs2: invalid mount option: %s", param->key);
+		return invalfc(fc, "invalid mount option: %s", param->key);
 	}
 	return 0;
 }
@@ -1465,27 +1450,27 @@ static int gfs2_reconfigure(struct fs_context *fc)
 	spin_unlock(&gt->gt_spin);
 
 	if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
-		errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+		errorfc(fc, "reconfiguration of locking protocol not allowed");
 		return -EINVAL;
 	}
 	if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
-		errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+		errorfc(fc, "reconfiguration of lock table not allowed");
 		return -EINVAL;
 	}
 	if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
-		errorf(fc, "gfs2: reconfiguration of host data not allowed");
+		errorfc(fc, "reconfiguration of host data not allowed");
 		return -EINVAL;
 	}
 	if (newargs->ar_spectator != oldargs->ar_spectator) {
-		errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+		errorfc(fc, "reconfiguration of spectator mode not allowed");
 		return -EINVAL;
 	}
 	if (newargs->ar_localflocks != oldargs->ar_localflocks) {
-		errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+		errorfc(fc, "reconfiguration of localflocks not allowed");
 		return -EINVAL;
 	}
 	if (newargs->ar_meta != oldargs->ar_meta) {
-		errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+		errorfc(fc, "switching between gfs2 and gfs2meta not allowed");
 		return -EINVAL;
 	}
 	if (oldargs->ar_spectator)
@@ -1495,11 +1480,11 @@ static int gfs2_reconfigure(struct fs_context *fc)
 		if (fc->sb_flags & SB_RDONLY) {
 			error = gfs2_make_fs_ro(sdp);
 			if (error)
-				errorf(fc, "gfs2: unable to remount read-only");
+				errorfc(fc, "unable to remount read-only");
 		} else {
 			error = gfs2_make_fs_rw(sdp);
 			if (error)
-				errorf(fc, "gfs2: unable to remount read-write");
+				errorfc(fc, "unable to remount read-write");
 		}
 	}
 	sdp->sd_args = *newargs;
@@ -1644,7 +1629,7 @@ struct file_system_type gfs2_fs_type = {
 	.name = "gfs2",
 	.fs_flags = FS_REQUIRES_DEV,
 	.init_fs_context = gfs2_init_fs_context,
-	.parameters = &gfs2_fs_parameters,
+	.parameters = gfs2_fs_parameters,
 	.kill_sb = gfs2_kill_sb,
 	.owner = THIS_MODULE,
 };
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a66e425884d1..aff8642f0c2e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -73,7 +73,7 @@ enum hugetlb_param {
 	Opt_uid,
 };
 
-static const struct fs_parameter_spec hugetlb_param_specs[] = {
+static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
 	fsparam_u32   ("gid",		Opt_gid),
 	fsparam_string("min_size",	Opt_min_size),
 	fsparam_u32   ("mode",		Opt_mode),
@@ -84,11 +84,6 @@ static const struct fs_parameter_spec hugetlb_param_specs[] = {
 	{}
 };
 
-static const struct fs_parameter_description hugetlb_fs_parameters = {
-	.name		= "hugetlbfs",
-	.specs		= hugetlb_param_specs,
-};
-
 #ifdef CONFIG_NUMA
 static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
 					struct inode *inode, pgoff_t index)
@@ -1171,7 +1166,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
 	unsigned long ps;
 	int opt;
 
-	opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result);
+	opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -1233,7 +1228,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
 	}
 
 bad_val:
-	return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n",
+	return invalfc(fc, "Bad value '%s' for mount option '%s'\n",
 		      param->string, param->key);
 }
 
@@ -1358,7 +1353,7 @@ static int hugetlbfs_init_fs_context(struct fs_context *fc)
 static struct file_system_type hugetlbfs_fs_type = {
 	.name			= "hugetlbfs",
 	.init_fs_context	= hugetlbfs_init_fs_context,
-	.parameters		= &hugetlb_fs_parameters,
+	.parameters		= hugetlb_fs_parameters,
 	.kill_sb		= kill_litter_super,
 };
 
diff --git a/fs/inode.c b/fs/inode.c
index ea15c6d9f274..7d57068b6b7a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1599,25 +1599,31 @@ retry:
 }
 EXPORT_SYMBOL(iput);
 
+#ifdef CONFIG_BLOCK
 /**
  *	bmap	- find a block number in a file
- *	@inode: inode of file
- *	@block: block to find
- *
- *	Returns the block number on the device holding the inode that
- *	is the disk block number for the block of the file requested.
- *	That is, asked for block 4 of inode 1 the function will return the
- *	disk block relative to the disk start that holds that block of the
- *	file.
+ *	@inode:  inode owning the block number being requested
+ *	@block: pointer containing the block to find
+ *
+ *	Replaces the value in *block with the block number on the device holding
+ *	corresponding to the requested block number in the file.
+ *	That is, asked for block 4 of inode 1 the function will replace the
+ *	4 in *block, with disk block relative to the disk start that holds that
+ *	block of the file.
+ *
+ *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
+ *	hole, returns 0 and *block is also set to 0.
  */
-sector_t bmap(struct inode *inode, sector_t block)
+int bmap(struct inode *inode, sector_t *block)
 {
-	sector_t res = 0;
-	if (inode->i_mapping->a_ops->bmap)
-		res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
-	return res;
+	if (!inode->i_mapping->a_ops->bmap)
+		return -EINVAL;
+
+	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
+	return 0;
 }
 EXPORT_SYMBOL(bmap);
+#endif
 
 /*
  * With relative atime, only update atime if the previous atime is
@@ -1683,12 +1689,9 @@ EXPORT_SYMBOL(generic_update_time);
  */
 static int update_time(struct inode *inode, struct timespec64 *time, int flags)
 {
-	int (*update_time)(struct inode *, struct timespec64 *, int);
-
-	update_time = inode->i_op->update_time ? inode->i_op->update_time :
-		generic_update_time;
-
-	return update_time(inode, time, flags);
+	if (inode->i_op->update_time)
+		return inode->i_op->update_time(inode, time, flags);
+	return generic_update_time(inode, time, flags);
 }
 
 /**
@@ -2154,30 +2157,6 @@ void inode_nohighmem(struct inode *inode)
 EXPORT_SYMBOL(inode_nohighmem);
 
 /**
- * timespec64_trunc - Truncate timespec64 to a granularity
- * @t: Timespec64
- * @gran: Granularity in ns.
- *
- * Truncate a timespec64 to a granularity. Always rounds down. gran must
- * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
- */
-struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
-{
-	/* Avoid division in the common cases 1 ns and 1 s. */
-	if (gran == 1) {
-		/* nothing */
-	} else if (gran == NSEC_PER_SEC) {
-		t.tv_nsec = 0;
-	} else if (gran > 1 && gran < NSEC_PER_SEC) {
-		t.tv_nsec -= t.tv_nsec % gran;
-	} else {
-		WARN(1, "illegal file time granularity: %u", gran);
-	}
-	return t;
-}
-EXPORT_SYMBOL(timespec64_trunc);
-
-/**
  * timestamp_truncate - Truncate timespec to a granularity
  * @t: Timespec
  * @inode: inode being updated
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1806afddfea5..77f22c3da30f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -585,8 +585,7 @@ struct io_submit_state {
 	 * io_kiocb alloc cache
 	 */
 	void			*reqs[IO_IOPOLL_BATCH];
-	unsigned		int free_reqs;
-	unsigned		int cur_req;
+	unsigned int		free_reqs;
 
 	/*
 	 * File reference cache
@@ -754,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				 struct io_uring_files_update *ip,
 				 unsigned nr_args);
 static int io_grab_files(struct io_kiocb *req);
+static void io_ring_file_ref_flush(struct fixed_file_data *data);
 
 static struct kmem_cache *req_cachep;
 
@@ -1020,21 +1020,28 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 
 static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
 {
+	if (!ctx->cq_ev_fd)
+		return false;
 	if (!ctx->eventfd_async)
 		return true;
 	return io_wq_current_is_worker() || in_interrupt();
 }
 
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev)
 {
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 	if (waitqueue_active(&ctx->sqo_wait))
 		wake_up(&ctx->sqo_wait);
-	if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx))
+	if (trigger_ev)
 		eventfd_signal(ctx->cq_ev_fd, 1);
 }
 
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+	__io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx));
+}
+
 /* Returns true if there are no backlogged entries after the flush */
 static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
@@ -1183,12 +1190,10 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 			ret = 1;
 		}
 		state->free_reqs = ret - 1;
-		state->cur_req = 1;
-		req = state->reqs[0];
+		req = state->reqs[ret - 1];
 	} else {
-		req = state->reqs[state->cur_req];
 		state->free_reqs--;
-		state->cur_req++;
+		req = state->reqs[state->free_reqs];
 	}
 
 got_it:
@@ -1855,9 +1860,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	unsigned ioprio;
 	int ret;
 
-	if (!req->file)
-		return -EBADF;
-
 	if (S_ISREG(file_inode(req->file)->i_mode))
 		req->flags |= REQ_F_ISREG;
 
@@ -1866,8 +1868,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		req->flags |= REQ_F_CUR_POS;
 		kiocb->ki_pos = req->file->f_pos;
 	}
-	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
 	kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+	if (unlikely(ret))
+		return ret;
 
 	ioprio = READ_ONCE(sqe->ioprio);
 	if (ioprio) {
@@ -1879,10 +1884,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	} else
 		kiocb->ki_ioprio = get_current_ioprio();
 
-	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
-	if (unlikely(ret))
-		return ret;
-
 	/* don't allow async punt if RWF_NOWAIT was requested */
 	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
 	    (req->file->f_flags & O_NONBLOCK))
@@ -2164,10 +2165,12 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 {
 	if (!io_op_defs[req->opcode].async_ctx)
 		return 0;
-	if (!req->io && io_alloc_async_ctx(req))
-		return -ENOMEM;
+	if (!req->io) {
+		if (io_alloc_async_ctx(req))
+			return -ENOMEM;
 
-	io_req_map_rw(req, io_size, iovec, fast_iov, iter);
+		io_req_map_rw(req, io_size, iovec, fast_iov, iter);
+	}
 	req->work.func = io_rw_async;
 	return 0;
 }
@@ -2724,9 +2727,16 @@ static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
 	struct io_fadvise *fa = &req->fadvise;
 	int ret;
 
-	/* DONTNEED may block, others _should_ not */
-	if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock)
-		return -EAGAIN;
+	if (force_nonblock) {
+		switch (fa->advice) {
+		case POSIX_FADV_NORMAL:
+		case POSIX_FADV_RANDOM:
+		case POSIX_FADV_SEQUENTIAL:
+			break;
+		default:
+			return -EAGAIN;
+		}
+	}
 
 	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
 	if (ret < 0)
@@ -2837,16 +2847,13 @@ static void io_close_finish(struct io_wq_work **workptr)
 		int ret;
 
 		ret = filp_close(req->close.put_file, req->work.files);
-		if (ret < 0) {
+		if (ret < 0)
 			req_set_fail_links(req);
-		}
 		io_cqring_add_event(req, ret);
 	}
 
 	fput(req->close.put_file);
 
-	/* we bypassed the re-issue, drop the submission reference */
-	io_put_req(req);
 	io_put_req_find_next(req, &nxt);
 	if (nxt)
 		io_wq_assign_next(workptr, nxt);
@@ -2888,7 +2895,13 @@ static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
 
 eagain:
 	req->work.func = io_close_finish;
-	return -EAGAIN;
+	/*
+	 * Do manual async queue here to avoid grabbing files - we don't
+	 * need the files, and it'll cause io_close_finish() to close
+	 * the file again and cause a double CQE entry for this request
+	 */
+	io_queue_async_work(req);
+	return 0;
 }
 
 static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -3083,7 +3096,8 @@ static int io_send(struct io_kiocb *req, struct io_kiocb **nxt,
 		else if (force_nonblock)
 			flags |= MSG_DONTWAIT;
 
-		ret = __sys_sendmsg_sock(sock, &msg, flags);
+		msg.msg_flags = flags;
+		ret = sock_sendmsg(sock, &msg);
 		if (force_nonblock && ret == -EAGAIN)
 			return -EAGAIN;
 		if (ret == -ERESTARTSYS)
@@ -3109,6 +3123,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
 
 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	sr->len = READ_ONCE(sqe->len);
 
 	if (!io || req->opcode == IORING_OP_RECV)
 		return 0;
@@ -3227,7 +3242,7 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
 		else if (force_nonblock)
 			flags |= MSG_DONTWAIT;
 
-		ret = __sys_recvmsg_sock(sock, &msg, NULL, NULL, flags);
+		ret = sock_recvmsg(sock, &msg, flags);
 		if (force_nonblock && ret == -EAGAIN)
 			return -EAGAIN;
 		if (ret == -ERESTARTSYS)
@@ -3561,6 +3576,14 @@ static void io_poll_flush(struct io_wq_work **workptr)
 		__io_poll_flush(req->ctx, nodes);
 }
 
+static void io_poll_trigger_evfd(struct io_wq_work **workptr)
+{
+	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+
+	eventfd_signal(req->ctx->cq_ev_fd, 1);
+	io_put_req(req);
+}
+
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			void *key)
 {
@@ -3586,14 +3609,22 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 
 		if (llist_empty(&ctx->poll_llist) &&
 		    spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+			bool trigger_ev;
+
 			hash_del(&req->hash_node);
 			io_poll_complete(req, mask, 0);
-			req->flags |= REQ_F_COMP_LOCKED;
-			io_put_req(req);
-			spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
-			io_cqring_ev_posted(ctx);
-			req = NULL;
+			trigger_ev = io_should_trigger_evfd(ctx);
+			if (trigger_ev && eventfd_signal_count()) {
+				trigger_ev = false;
+				req->work.func = io_poll_trigger_evfd;
+			} else {
+				req->flags |= REQ_F_COMP_LOCKED;
+				io_put_req(req);
+				req = NULL;
+			}
+			spin_unlock_irqrestore(&ctx->completion_lock, flags);
+			__io_cqring_ev_posted(ctx, trigger_ev);
 		} else {
 			req->result = mask;
 			req->llist_node.next = NULL;
@@ -4815,8 +4846,7 @@ static void io_submit_state_end(struct io_submit_state *state)
 	blk_finish_plug(&state->plug);
 	io_file_put(state);
 	if (state->free_reqs)
-		kmem_cache_free_bulk(req_cachep, state->free_reqs,
-					&state->reqs[state->cur_req]);
+		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
 }
 
 /*
@@ -5041,7 +5071,8 @@ static int io_sq_thread(void *data)
 			 * reap events and wake us up.
 			 */
 			if (inflight ||
-			    (!time_after(jiffies, timeout) && ret != -EBUSY)) {
+			    (!time_after(jiffies, timeout) && ret != -EBUSY &&
+			    !percpu_ref_is_dying(&ctx->refs))) {
 				cond_resched();
 				continue;
 			}
@@ -5231,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	if (!data)
 		return -ENXIO;
 
-	/* protect against inflight atomic switch, which drops the ref */
-	percpu_ref_get(&data->refs);
-	/* wait for existing switches */
-	flush_work(&data->ref_work);
 	percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
-	wait_for_completion(&data->done);
-	percpu_ref_put(&data->refs);
-	/* flush potential new switch */
 	flush_work(&data->ref_work);
+	wait_for_completion(&data->done);
+	io_ring_file_ref_flush(data);
 	percpu_ref_exit(&data->refs);
 
 	__io_sqe_files_unregister(ctx);
@@ -5477,14 +5503,11 @@ struct io_file_put {
 	struct completion *done;
 };
 
-static void io_ring_file_ref_switch(struct work_struct *work)
+static void io_ring_file_ref_flush(struct fixed_file_data *data)
 {
 	struct io_file_put *pfile, *tmp;
-	struct fixed_file_data *data;
 	struct llist_node *node;
 
-	data = container_of(work, struct fixed_file_data, ref_work);
-
 	while ((node = llist_del_all(&data->put_llist)) != NULL) {
 		llist_for_each_entry_safe(pfile, tmp, node, llist) {
 			io_ring_file_put(data->ctx, pfile->file);
@@ -5494,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work)
 				kfree(pfile);
 		}
 	}
+}
 
+static void io_ring_file_ref_switch(struct work_struct *work)
+{
+	struct fixed_file_data *data;
+
+	data = container_of(work, struct fixed_file_data, ref_work);
+	io_ring_file_ref_flush(data);
 	percpu_ref_get(&data->refs);
 	percpu_ref_switch_to_percpu(&data->refs);
 }
@@ -5505,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref)
 
 	data = container_of(ref, struct fixed_file_data, refs);
 
-	/* we can't safely switch from inside this context, punt to wq */
-	queue_work(system_wq, &data->ref_work);
+	/*
+	 * We can't safely switch from inside this context, punt to wq. If
+	 * the table ref is going away, the table is being unregistered.
+	 * Don't queue up the async work for that case, the caller will
+	 * handle it.
+	 */
+	if (!percpu_ref_is_dying(&data->refs))
+		queue_work(system_wq, &data->ref_work);
 }
 
 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
@@ -6295,6 +6331,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	mutex_unlock(&ctx->uring_lock);
 
+	/*
+	 * Wait for sq thread to idle, if we have one. It won't spin on new
+	 * work after we've killed the ctx ref above. This is important to do
+	 * before we cancel existing commands, as the thread could otherwise
+	 * be queueing new work post that. If that's work we need to cancel,
+	 * it could cause shutdown to hang.
+	 */
+	while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
+		cpu_relax();
+
 	io_kill_timeouts(ctx);
 	io_poll_remove_all(ctx);
 
@@ -6501,6 +6547,80 @@ out_fput:
 	return submitted ? submitted : ret;
 }
 
+static int io_uring_show_cred(int id, void *p, void *data)
+{
+	const struct cred *cred = p;
+	struct seq_file *m = data;
+	struct user_namespace *uns = seq_user_ns(m);
+	struct group_info *gi;
+	kernel_cap_t cap;
+	unsigned __capi;
+	int g;
+
+	seq_printf(m, "%5d\n", id);
+	seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
+	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
+	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
+	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
+	seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
+	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
+	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
+	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
+	seq_puts(m, "\n\tGroups:\t");
+	gi = cred->group_info;
+	for (g = 0; g < gi->ngroups; g++) {
+		seq_put_decimal_ull(m, g ? " " : "",
+					from_kgid_munged(uns, gi->gid[g]));
+	}
+	seq_puts(m, "\n\tCapEff:\t");
+	cap = cred->cap_effective;
+	CAP_FOR_EACH_U32(__capi)
+		seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
+{
+	int i;
+
+	mutex_lock(&ctx->uring_lock);
+	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
+	for (i = 0; i < ctx->nr_user_files; i++) {
+		struct fixed_file_table *table;
+		struct file *f;
+
+		table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
+		f = table->files[i & IORING_FILE_TABLE_MASK];
+		if (f)
+			seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
+		else
+			seq_printf(m, "%5u: <none>\n", i);
+	}
+	seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
+	for (i = 0; i < ctx->nr_user_bufs; i++) {
+		struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
+
+		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
+						(unsigned int) buf->len);
+	}
+	if (!idr_is_empty(&ctx->personality_idr)) {
+		seq_printf(m, "Personalities:\n");
+		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
+	}
+	mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct io_ring_ctx *ctx = f->private_data;
+
+	if (percpu_ref_tryget(&ctx->refs)) {
+		__io_uring_show_fdinfo(ctx, m);
+		percpu_ref_put(&ctx->refs);
+	}
+}
+
 static const struct file_operations io_uring_fops = {
 	.release	= io_uring_release,
 	.flush		= io_uring_flush,
@@ -6511,6 +6631,7 @@ static const struct file_operations io_uring_fops = {
 #endif
 	.poll		= io_uring_poll,
 	.fasync		= io_uring_fasync,
+	.show_fdinfo	= io_uring_show_fdinfo,
 };
 
 static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
@@ -6963,6 +7084,39 @@ out_fput:
 
 static int __init io_uring_init(void)
 {
+#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
+	BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
+	BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
+} while (0)
+
+#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
+	__BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
+	BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
+	BUILD_BUG_SQE_ELEM(0,  __u8,   opcode);
+	BUILD_BUG_SQE_ELEM(1,  __u8,   flags);
+	BUILD_BUG_SQE_ELEM(2,  __u16,  ioprio);
+	BUILD_BUG_SQE_ELEM(4,  __s32,  fd);
+	BUILD_BUG_SQE_ELEM(8,  __u64,  off);
+	BUILD_BUG_SQE_ELEM(8,  __u64,  addr2);
+	BUILD_BUG_SQE_ELEM(16, __u64,  addr);
+	BUILD_BUG_SQE_ELEM(24, __u32,  len);
+	BUILD_BUG_SQE_ELEM(28,     __kernel_rwf_t, rw_flags);
+	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
+	BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  fsync_flags);
+	BUILD_BUG_SQE_ELEM(28, __u16,  poll_events);
+	BUILD_BUG_SQE_ELEM(28, __u32,  sync_range_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  msg_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  timeout_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  accept_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  cancel_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  open_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  statx_flags);
+	BUILD_BUG_SQE_ELEM(28, __u32,  fadvise_advice);
+	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
+	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
+	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
+
 	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
 	return 0;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7c9a5df5a597..282d45be6f45 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -54,19 +54,32 @@ EXPORT_SYMBOL(vfs_ioctl);
 
 static int ioctl_fibmap(struct file *filp, int __user *p)
 {
-	struct address_space *mapping = filp->f_mapping;
-	int res, block;
+	struct inode *inode = file_inode(filp);
+	int error, ur_block;
+	sector_t block;
 
-	/* do we support this mess? */
-	if (!mapping->a_ops->bmap)
-		return -EINVAL;
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
-	res = get_user(block, p);
-	if (res)
-		return res;
-	res = mapping->a_ops->bmap(mapping, block);
-	return put_user(res, p);
+
+	error = get_user(ur_block, p);
+	if (error)
+		return error;
+
+	if (ur_block < 0)
+		return -EINVAL;
+
+	block = ur_block;
+	error = bmap(inode, &block);
+
+	if (error)
+		ur_block = 0;
+	else
+		ur_block = block;
+
+	if (put_user(ur_block, p))
+		error = -EFAULT;
+
+	return error;
 }
 
 /**
@@ -523,13 +536,9 @@ static int compat_ioctl_preallocate(struct file *file, int mode,
 
 static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p)
 {
-	struct inode *inode = file_inode(filp);
-
 	switch (cmd) {
 	case FIBMAP:
 		return ioctl_fibmap(filp, p);
-	case FIONREAD:
-		return put_user(i_size_read(inode) - filp->f_pos, p);
 	case FS_IOC_RESVSP:
 	case FS_IOC_RESVSP64:
 		return ioctl_preallocate(filp, 0, p);
@@ -721,6 +730,13 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
 	case FIDEDUPERANGE:
 		return ioctl_file_dedupe_range(filp, argp);
 
+	case FIONREAD:
+		if (!S_ISREG(inode->i_mode))
+			return vfs_ioctl(filp, cmd, arg);
+
+		return put_user(i_size_read(inode) - filp->f_pos,
+				(int __user *)argp);
+
 	default:
 		if (S_ISREG(inode->i_mode))
 			return file_ioctl(filp, cmd, argp);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 60bf8ff78913..a49d0e670ddf 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -794,18 +794,22 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 {
 	int err = 0;
 	unsigned long long ret;
+	sector_t block = 0;
 
 	if (journal->j_inode) {
-		ret = bmap(journal->j_inode, blocknr);
-		if (ret)
-			*retp = ret;
-		else {
+		block = blocknr;
+		ret = bmap(journal->j_inode, &block);
+
+		if (ret || !block) {
 			printk(KERN_ALERT "%s: journal block not found "
 					"at offset %lu on %s\n",
 			       __func__, blocknr, journal->j_devname);
 			err = -EIO;
 			jbd2_journal_abort(journal, err);
+		} else {
+			*retp = block;
 		}
+
 	} else {
 		*retp = blocknr; /* +journal->j_blk_offset */
 	}
@@ -1074,12 +1078,11 @@ static int jbd2_seq_info_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-static const struct file_operations jbd2_seq_info_fops = {
-	.owner		= THIS_MODULE,
-	.open           = jbd2_seq_info_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = jbd2_seq_info_release,
+static const struct proc_ops jbd2_info_proc_ops = {
+	.proc_open	= jbd2_seq_info_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= jbd2_seq_info_release,
 };
 
 static struct proc_dir_entry *proc_jbd2_stats;
@@ -1089,7 +1092,7 @@ static void jbd2_stats_proc_init(journal_t *journal)
 	journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
 	if (journal->j_proc_entry) {
 		proc_create_data("info", S_IRUGO, journal->j_proc_entry,
-				 &jbd2_seq_info_fops, journal);
+				 &jbd2_info_proc_ops, journal);
 	}
 }
 
@@ -1244,11 +1247,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
 journal_t *jbd2_journal_init_inode(struct inode *inode)
 {
 	journal_t *journal;
+	sector_t blocknr;
 	char *p;
-	unsigned long long blocknr;
+	int err = 0;
+
+	blocknr = 0;
+	err = bmap(inode, &blocknr);
 
-	blocknr = bmap(inode, 0);
-	if (!blocknr) {
+	if (err || !blocknr) {
 		pr_err("%s: Cannot locate journal superblock\n",
 			__func__);
 		return NULL;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0e6406c4f362..05d7878dfad1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -167,27 +167,21 @@ enum {
 	Opt_rp_size,
 };
 
-static const struct fs_parameter_spec jffs2_param_specs[] = {
-	fsparam_enum	("compr",	Opt_override_compr),
-	fsparam_u32	("rp_size",	Opt_rp_size),
-	{}
-};
-
-static const struct fs_parameter_enum jffs2_param_enums[] = {
-	{ Opt_override_compr,	"none",	JFFS2_COMPR_MODE_NONE },
+static const struct constant_table jffs2_param_compr[] = {
+	{"none",	JFFS2_COMPR_MODE_NONE },
 #ifdef CONFIG_JFFS2_LZO
-	{ Opt_override_compr,	"lzo",	JFFS2_COMPR_MODE_FORCELZO },
+	{"lzo",		JFFS2_COMPR_MODE_FORCELZO },
 #endif
 #ifdef CONFIG_JFFS2_ZLIB
-	{ Opt_override_compr,	"zlib",	JFFS2_COMPR_MODE_FORCEZLIB },
+	{"zlib",	JFFS2_COMPR_MODE_FORCEZLIB },
 #endif
 	{}
 };
 
-const struct fs_parameter_description jffs2_fs_parameters = {
-	.name		= "jffs2",
-	.specs		= jffs2_param_specs,
-	.enums		= jffs2_param_enums,
+static const struct fs_parameter_spec jffs2_fs_parameters[] = {
+	fsparam_enum	("compr",	Opt_override_compr, jffs2_param_compr),
+	fsparam_u32	("rp_size",	Opt_rp_size),
+	{}
 };
 
 static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
@@ -196,7 +190,7 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	struct jffs2_sb_info *c = fc->s_fs_info;
 	int opt;
 
-	opt = fs_parse(fc, &jffs2_fs_parameters, param, &result);
+	opt = fs_parse(fc, jffs2_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -339,7 +333,7 @@ static struct file_system_type jffs2_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"jffs2",
 	.init_fs_context = jffs2_init_fs_context,
-	.parameters =	&jffs2_fs_parameters,
+	.parameters =	jffs2_fs_parameters,
 	.kill_sb =	jffs2_kill_sb,
 };
 MODULE_ALIAS_FS("jffs2");
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 888cdd685a1e..44b62b3c322e 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -43,12 +43,12 @@ static ssize_t jfs_loglevel_proc_write(struct file *file,
 	return count;
 }
 
-static const struct file_operations jfs_loglevel_proc_fops = {
-	.open		= jfs_loglevel_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= jfs_loglevel_proc_write,
+static const struct proc_ops jfs_loglevel_proc_ops = {
+	.proc_open	= jfs_loglevel_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= jfs_loglevel_proc_write,
 };
 #endif
 
@@ -68,7 +68,7 @@ void jfs_proc_init(void)
 #endif
 #ifdef CONFIG_JFS_DEBUG
 	proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show);
-	proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops);
+	proc_create("loglevel", 0, base, &jfs_loglevel_proc_ops);
 #endif
 }
 
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index caade185e568..7dfcab2a2da6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -4027,7 +4027,6 @@ static int dbGetL2AGSize(s64 nblocks)
  */
 #define MAXL0PAGES	(1 + LPERCTL)
 #define MAXL1PAGES	(1 + LPERCTL * MAXL0PAGES)
-#define MAXL2PAGES	(1 + LPERCTL * MAXL1PAGES)
 
 /*
  * convert number of map pages to the zero origin top dmapctl level
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index eac277c63d42..d0f7a5abd9a9 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -160,9 +160,9 @@ static inline void set_inode_attr(struct inode *inode,
 {
 	inode->i_uid = attrs->ia_uid;
 	inode->i_gid = attrs->ia_gid;
-	inode->i_atime = timestamp_truncate(attrs->ia_atime, inode);
-	inode->i_mtime = timestamp_truncate(attrs->ia_mtime, inode);
-	inode->i_ctime = timestamp_truncate(attrs->ia_ctime, inode);
+	inode->i_atime = attrs->ia_atime;
+	inode->i_mtime = attrs->ia_mtime;
+	inode->i_ctime = attrs->ia_ctime;
 }
 
 static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
diff --git a/fs/libfs.c b/fs/libfs.c
index 1463b038ffc4..c686bd9caac6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -19,6 +19,7 @@
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
 #include <linux/fs_context.h>
 #include <linux/pseudo_fs.h>
+#include <linux/fsnotify.h>
 
 #include <linux/uaccess.h>
 
@@ -239,6 +240,75 @@ const struct inode_operations simple_dir_inode_operations = {
 };
 EXPORT_SYMBOL(simple_dir_inode_operations);
 
+static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
+{
+	struct dentry *child = NULL;
+	struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
+
+	spin_lock(&parent->d_lock);
+	while ((p = p->next) != &parent->d_subdirs) {
+		struct dentry *d = container_of(p, struct dentry, d_child);
+		if (simple_positive(d)) {
+			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+			if (simple_positive(d))
+				child = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			if (likely(child))
+				break;
+		}
+	}
+	spin_unlock(&parent->d_lock);
+	dput(prev);
+	return child;
+}
+
+void simple_recursive_removal(struct dentry *dentry,
+                              void (*callback)(struct dentry *))
+{
+	struct dentry *this = dget(dentry);
+	while (true) {
+		struct dentry *victim = NULL, *child;
+		struct inode *inode = this->d_inode;
+
+		inode_lock(inode);
+		if (d_is_dir(this))
+			inode->i_flags |= S_DEAD;
+		while ((child = find_next_child(this, victim)) == NULL) {
+			// kill and ascend
+			// update metadata while it's still locked
+			inode->i_ctime = current_time(inode);
+			clear_nlink(inode);
+			inode_unlock(inode);
+			victim = this;
+			this = this->d_parent;
+			inode = this->d_inode;
+			inode_lock(inode);
+			if (simple_positive(victim)) {
+				d_invalidate(victim);	// avoid lost mounts
+				if (d_is_dir(victim))
+					fsnotify_rmdir(inode, victim);
+				else
+					fsnotify_unlink(inode, victim);
+				if (callback)
+					callback(victim);
+				dput(victim);		// unpin it
+			}
+			if (victim == dentry) {
+				inode->i_ctime = inode->i_mtime =
+					current_time(inode);
+				if (d_is_dir(dentry))
+					drop_nlink(inode);
+				inode_unlock(inode);
+				dput(dentry);
+				return;
+			}
+		}
+		inode_unlock(inode);
+		this = child;
+	}
+}
+EXPORT_SYMBOL(simple_recursive_removal);
+
 static const struct super_operations simple_super_operations = {
 	.statfs		= simple_statfs,
 };
diff --git a/fs/lockd/procfs.c b/fs/lockd/procfs.c
index ca9228a56d65..a01f08c8c2f3 100644
--- a/fs/lockd/procfs.c
+++ b/fs/lockd/procfs.c
@@ -60,11 +60,11 @@ nlm_end_grace_read(struct file *file, char __user *buf, size_t size,
 	return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp));
 }
 
-static const struct file_operations lockd_end_grace_operations = {
-	.write		= nlm_end_grace_write,
-	.read		= nlm_end_grace_read,
-	.llseek		= default_llseek,
-	.release	= simple_transaction_release,
+static const struct proc_ops lockd_end_grace_proc_ops = {
+	.proc_write	= nlm_end_grace_write,
+	.proc_read	= nlm_end_grace_read,
+	.proc_lseek	= default_llseek,
+	.proc_release	= simple_transaction_release,
 };
 
 int __init
@@ -76,7 +76,7 @@ lockd_create_procfs(void)
 	if (!entry)
 		return -ENOMEM;
 	entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry,
-				 &lockd_end_grace_operations);
+			    &lockd_end_grace_proc_ops);
 	if (!entry) {
 		remove_proc_entry("fs/lockd", NULL);
 		return -ENOMEM;
diff --git a/fs/namespace.c b/fs/namespace.c
index 5e1bf611a9eb..85b5f7bea82e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2979,39 +2979,10 @@ static void shrink_submounts(struct mount *mnt)
 	}
 }
 
-/*
- * Some copy_from_user() implementations do not return the exact number of
- * bytes remaining to copy on a fault.  But copy_mount_options() requires that.
- * Note that this function differs from copy_from_user() in that it will oops
- * on bad values of `to', rather than returning a short copy.
- */
-static long exact_copy_from_user(void *to, const void __user * from,
-				 unsigned long n)
-{
-	char *t = to;
-	const char __user *f = from;
-	char c;
-
-	if (!access_ok(from, n))
-		return n;
-
-	while (n) {
-		if (__get_user(c, f)) {
-			memset(t, 0, n);
-			break;
-		}
-		*t++ = c;
-		f++;
-		n--;
-	}
-	return n;
-}
-
 void *copy_mount_options(const void __user * data)
 {
-	int i;
-	unsigned long size;
 	char *copy;
+	unsigned size;
 
 	if (!data)
 		return NULL;
@@ -3020,22 +2991,16 @@ void *copy_mount_options(const void __user * data)
 	if (!copy)
 		return ERR_PTR(-ENOMEM);
 
-	/* We only care that *some* data at the address the user
-	 * gave us is valid.  Just in case, we'll zero
-	 * the remainder of the page.
-	 */
-	/* copy_from_user cannot cross TASK_SIZE ! */
-	size = TASK_SIZE - (unsigned long)untagged_addr(data);
-	if (size > PAGE_SIZE)
-		size = PAGE_SIZE;
+	size = PAGE_SIZE - offset_in_page(data);
 
-	i = size - exact_copy_from_user(copy, data, size);
-	if (!i) {
+	if (copy_from_user(copy, data, size)) {
 		kfree(copy);
 		return ERR_PTR(-EFAULT);
 	}
-	if (i != PAGE_SIZE)
-		memset(copy + i, 0, PAGE_SIZE - i);
+	if (size != PAGE_SIZE) {
+		if (copy_from_user(copy + size, data + size, PAGE_SIZE - size))
+			memset(copy + size, 0, PAGE_SIZE - size);
+	}
 	return copy;
 }
 
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 295a7a21b774..40b6c5ac46c0 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config NFS_V4
 config NFS_SWAP
 	bool "Provide swap over NFS support"
 	default n
-	depends on NFS_FS
+	depends on NFS_FS && SWAP
 	select SUNRPC_SWAP
 	help
 	  This option enables swapon to work on files located on NFS mounts.
@@ -196,3 +196,12 @@ config NFS_DEBUG
 	depends on NFS_FS && SUNRPC_DEBUG
 	select CRC32
 	default y
+
+config NFS_DISABLE_UDP_SUPPORT
+       bool "NFS: Disable NFS UDP protocol support"
+       depends on NFS_FS
+       default y
+       help
+	 Choose Y here to disable the use of NFS over UDP. NFS over UDP
+	 on modern networks (1Gb+) can lead to data corruption caused by
+	 fragmentation during high loads.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 34cdeaecccf6..2433c3e03cfa 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,7 +9,7 @@ CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
 			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
 			   write.o namespace.o mount_clnt.o nfstrace.o \
-			   export.o sysfs.o
+			   export.o sysfs.o fs_context.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 03a20f5716c7..79ff172eb1c8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -18,6 +18,7 @@
 #include "callback.h"
 #include "internal.h"
 #include "nfs4session.h"
+#include "nfs4trace.h"
 
 #define CB_OP_TAGLEN_MAXSZ		(512)
 #define CB_OP_HDR_RES_MAXSZ		(2 * 4) // opcode, status
@@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 
 	if (hdr_arg.minorversion == 0) {
 		cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
-		if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
-			if (cps.clp)
-				nfs_put_client(cps.clp);
+		if (!cps.clp) {
+			trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident);
+			goto out_invalidcred;
+		}
+		if (!check_gss_callback_principal(cps.clp, rqstp)) {
+			trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident);
+			nfs_put_client(cps.clp);
 			goto out_invalidcred;
 		}
 	}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 02110a30a49e..989c30c98511 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -474,6 +474,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 			to->to_maxval = to->to_initval;
 		to->to_exponential = 0;
 		break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
 	case XPRT_TRANSPORT_UDP:
 		if (retrans == NFS_UNSPEC_RETRANS)
 			to->to_retries = NFS_DEF_UDP_RETRANS;
@@ -484,6 +485,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
 		to->to_exponential = 1;
 		break;
+#endif
 	default:
 		BUG();
 	}
@@ -580,8 +582,10 @@ static int nfs_start_lockd(struct nfs_server *server)
 		default:
 			nlm_init.protocol = IPPROTO_TCP;
 			break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
 		case XPRT_TRANSPORT_UDP:
 			nlm_init.protocol = IPPROTO_UDP;
+#endif
 	}
 
 	host = nlmclnt_init(&nlm_init);
@@ -658,28 +662,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client);
  * Create a version 2 or 3 client
  */
 static int nfs_init_server(struct nfs_server *server,
-			   const struct nfs_parsed_mount_data *data,
-			   struct nfs_subversion *nfs_mod)
+			   const struct fs_context *fc)
 {
+	const struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct rpc_timeout timeparms;
 	struct nfs_client_initdata cl_init = {
-		.hostname = data->nfs_server.hostname,
-		.addr = (const struct sockaddr *)&data->nfs_server.address,
-		.addrlen = data->nfs_server.addrlen,
-		.nfs_mod = nfs_mod,
-		.proto = data->nfs_server.protocol,
-		.net = data->net,
+		.hostname = ctx->nfs_server.hostname,
+		.addr = (const struct sockaddr *)&ctx->nfs_server.address,
+		.addrlen = ctx->nfs_server.addrlen,
+		.nfs_mod = ctx->nfs_mod,
+		.proto = ctx->nfs_server.protocol,
+		.net = fc->net_ns,
 		.timeparms = &timeparms,
 		.cred = server->cred,
-		.nconnect = data->nfs_server.nconnect,
+		.nconnect = ctx->nfs_server.nconnect,
 		.init_flags = (1UL << NFS_CS_REUSEPORT),
 	};
 	struct nfs_client *clp;
 	int error;
 
-	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
-			data->timeo, data->retrans);
-	if (data->flags & NFS_MOUNT_NORESVPORT)
+	nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+				ctx->timeo, ctx->retrans);
+	if (ctx->flags & NFS_MOUNT_NORESVPORT)
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
 	/* Allocate or find a client reference we can use */
@@ -690,46 +694,46 @@ static int nfs_init_server(struct nfs_server *server,
 	server->nfs_client = clp;
 
 	/* Initialise the client representation from the mount data */
-	server->flags = data->flags;
-	server->options = data->options;
+	server->flags = ctx->flags;
+	server->options = ctx->options;
 	server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
 		NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
 		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
 
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
+	if (ctx->rsize)
+		server->rsize = nfs_block_size(ctx->rsize, NULL);
+	if (ctx->wsize)
+		server->wsize = nfs_block_size(ctx->wsize, NULL);
 
-	server->acregmin = data->acregmin * HZ;
-	server->acregmax = data->acregmax * HZ;
-	server->acdirmin = data->acdirmin * HZ;
-	server->acdirmax = data->acdirmax * HZ;
+	server->acregmin = ctx->acregmin * HZ;
+	server->acregmax = ctx->acregmax * HZ;
+	server->acdirmin = ctx->acdirmin * HZ;
+	server->acdirmax = ctx->acdirmax * HZ;
 
 	/* Start lockd here, before we might error out */
 	error = nfs_start_lockd(server);
 	if (error < 0)
 		goto error;
 
-	server->port = data->nfs_server.port;
-	server->auth_info = data->auth_info;
+	server->port = ctx->nfs_server.port;
+	server->auth_info = ctx->auth_info;
 
 	error = nfs_init_server_rpcclient(server, &timeparms,
-					  data->selected_flavor);
+					  ctx->selected_flavor);
 	if (error < 0)
 		goto error;
 
 	/* Preserve the values of mount_server-related mount options */
-	if (data->mount_server.addrlen) {
-		memcpy(&server->mountd_address, &data->mount_server.address,
-			data->mount_server.addrlen);
-		server->mountd_addrlen = data->mount_server.addrlen;
+	if (ctx->mount_server.addrlen) {
+		memcpy(&server->mountd_address, &ctx->mount_server.address,
+			ctx->mount_server.addrlen);
+		server->mountd_addrlen = ctx->mount_server.addrlen;
 	}
-	server->mountd_version = data->mount_server.version;
-	server->mountd_port = data->mount_server.port;
-	server->mountd_protocol = data->mount_server.protocol;
+	server->mountd_version = ctx->mount_server.version;
+	server->mountd_port = ctx->mount_server.port;
+	server->mountd_protocol = ctx->mount_server.protocol;
 
-	server->namelen  = data->namlen;
+	server->namelen  = ctx->namlen;
 	return 0;
 
 error:
@@ -951,9 +955,9 @@ EXPORT_SYMBOL_GPL(nfs_free_server);
  * Create a version 2 or 3 volume record
  * - keyed on server and FSID
  */
-struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
-				     struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs_create_server(struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct nfs_server *server;
 	struct nfs_fattr *fattr;
 	int error;
@@ -970,18 +974,18 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
 		goto error;
 
 	/* Get a client representation */
-	error = nfs_init_server(server, mount_info->parsed, nfs_mod);
+	error = nfs_init_server(server, fc);
 	if (error < 0)
 		goto error;
 
 	/* Probe the root fh to retrieve its FSID */
-	error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr);
+	error = nfs_probe_fsinfo(server, ctx->mntfh, fattr);
 	if (error < 0)
 		goto error;
 	if (server->nfs_client->rpc_ops->version == 3) {
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 			server->namelen = NFS3_MAXNAMLEN;
-		if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS))
+		if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS))
 			server->caps |= NFS_CAP_READDIRPLUS;
 	} else {
 		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
@@ -989,8 +993,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
 	}
 
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
-		error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh,
-				fattr, NULL, NULL);
+		error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
+						       fattr, NULL, NULL);
 		if (error < 0) {
 			dprintk("nfs_create_server: getattr error = %d\n", -error);
 			goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index fe57b2b5314a..4a841071d8a7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -25,13 +25,32 @@
 #include "internal.h"
 #include "nfs4trace.h"
 
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
+
+static atomic_long_t nfs_active_delegations;
+static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+
+static void __nfs_free_delegation(struct nfs_delegation *delegation)
 {
 	put_cred(delegation->cred);
 	delegation->cred = NULL;
 	kfree_rcu(delegation, rcu);
 }
 
+static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+{
+	if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+		delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
+		atomic_long_dec(&nfs_active_delegations);
+	}
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+	nfs_mark_delegation_revoked(delegation);
+	__nfs_free_delegation(delegation);
+}
+
 /**
  * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
  * @delegation: delegation to process
@@ -343,7 +362,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
 		delegation->stateid.seqid = update->stateid.seqid;
 		smp_wmb();
 		delegation->type = update->type;
-		clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+		if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+			atomic_long_inc(&nfs_active_delegations);
 	}
 }
 
@@ -423,6 +443,8 @@ add_new:
 	rcu_assign_pointer(nfsi->delegation, delegation);
 	delegation = NULL;
 
+	atomic_long_inc(&nfs_active_delegations);
+
 	trace_nfs4_set_delegation(inode, type);
 
 	spin_lock(&inode->i_lock);
@@ -432,7 +454,7 @@ add_new:
 out:
 	spin_unlock(&clp->cl_lock);
 	if (delegation != NULL)
-		nfs_free_delegation(delegation);
+		__nfs_free_delegation(delegation);
 	if (freeme != NULL) {
 		nfs_do_return_delegation(inode, freeme, 0);
 		nfs_free_delegation(freeme);
@@ -479,7 +501,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
 
 	if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
 		ret = true;
-	if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
+	else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
 		struct inode *inode;
 
 		spin_lock(&delegation->lock);
@@ -488,6 +510,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
 			ret = true;
 		spin_unlock(&delegation->lock);
 	}
+	if (ret)
+		clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
 	if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
 	    test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
 		ret = false;
@@ -607,6 +631,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
 
 	delegation = nfs_inode_detach_delegation(inode);
 	if (delegation != NULL) {
+		set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
 		set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
 		nfs_do_return_delegation(inode, delegation, 1);
 		nfs_free_delegation(delegation);
@@ -637,6 +662,40 @@ int nfs4_inode_return_delegation(struct inode *inode)
 }
 
 /**
+ * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called on file close in order to determine if the
+ * inode delegation needs to be returned immediately.
+ */
+void nfs4_inode_return_delegation_on_close(struct inode *inode)
+{
+	struct nfs_delegation *delegation;
+	struct nfs_delegation *ret = NULL;
+
+	if (!inode)
+		return;
+	rcu_read_lock();
+	delegation = nfs4_get_valid_delegation(inode);
+	if (!delegation)
+		goto out;
+	if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
+	    atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+		spin_lock(&delegation->lock);
+		if (delegation->inode &&
+		    list_empty(&NFS_I(inode)->open_files) &&
+		    !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+			clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+			ret = delegation;
+		}
+		spin_unlock(&delegation->lock);
+	}
+out:
+	rcu_read_unlock();
+	nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
  * nfs4_inode_make_writeable
  * @inode: pointer to inode
  *
@@ -760,13 +819,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
 	rcu_read_unlock();
 }
 
-static void nfs_mark_delegation_revoked(struct nfs_server *server,
-		struct nfs_delegation *delegation)
-{
-	set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
-	delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
-}
-
 static void nfs_revoke_delegation(struct inode *inode,
 		const nfs4_stateid *stateid)
 {
@@ -794,7 +846,7 @@ static void nfs_revoke_delegation(struct inode *inode,
 		}
 		spin_unlock(&delegation->lock);
 	}
-	nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+	nfs_mark_delegation_revoked(delegation);
 	ret = true;
 out:
 	rcu_read_unlock();
@@ -833,7 +885,7 @@ void nfs_delegation_mark_returned(struct inode *inode,
 			delegation->stateid.seqid = stateid->seqid;
 	}
 
-	nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+	nfs_mark_delegation_revoked(delegation);
 
 out_clear_returning:
 	clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
@@ -1317,3 +1369,5 @@ out:
 	rcu_read_unlock();
 	return ret;
 }
+
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 15d3484be028..31b84604d383 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -42,6 +42,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
 void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
 		fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
 int nfs4_inode_return_delegation(struct inode *inode);
+void nfs4_inode_return_delegation_on_close(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 void nfs_inode_evict_delegation(struct inode *inode);
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..1320288ff9ec 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -58,7 +58,7 @@ static void nfs_readdir_clear_array(struct page*);
 const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
-	.iterate	= nfs_readdir,
+	.iterate_shared	= nfs_readdir,
 	.open		= nfs_opendir,
 	.release	= nfs_closedir,
 	.fsync		= nfs_fsync_dir,
@@ -162,6 +162,17 @@ typedef struct {
 	bool eof;
 } nfs_readdir_descriptor_t;
 
+static
+void nfs_readdir_init_array(struct page *page)
+{
+	struct nfs_cache_array *array;
+
+	array = kmap_atomic(page);
+	memset(array, 0, sizeof(struct nfs_cache_array));
+	array->eof_index = -1;
+	kunmap_atomic(array);
+}
+
 /*
  * we are freeing strings created by nfs_add_to_readdir_array()
  */
@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page)
 	array = kmap_atomic(page);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
+	array->size = 0;
 	kunmap_atomic(array);
 }
 
@@ -186,7 +198,7 @@ static
 int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
 {
 	string->len = len;
-	string->name = kmemdup(name, len, GFP_KERNEL);
+	string->name = kmemdup_nul(name, len, GFP_KERNEL);
 	if (string->name == NULL)
 		return -ENOMEM;
 	/*
@@ -437,7 +449,8 @@ void nfs_force_use_readdirplus(struct inode *dir)
 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
 	    !list_empty(&nfsi->open_files)) {
 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
-		invalidate_mapping_pages(dir->i_mapping, 0, -1);
+		invalidate_mapping_pages(dir->i_mapping,
+			nfsi->page_index + 1, -1);
 	}
 }
 
@@ -610,6 +623,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	int status = -ENOMEM;
 	unsigned int array_size = ARRAY_SIZE(pages);
 
+	nfs_readdir_init_array(page);
+
 	entry.prev_cookie = 0;
 	entry.cookie = desc->last_cookie;
 	entry.eof = 0;
@@ -626,8 +641,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	}
 
 	array = kmap(page);
-	memset(array, 0, sizeof(struct nfs_cache_array));
-	array->eof_index = -1;
 
 	status = nfs_readdir_alloc_pages(pages, array_size);
 	if (status < 0)
@@ -682,6 +695,7 @@ int nfs_readdir_filler(void *data, struct page* page)
 	unlock_page(page);
 	return 0;
  error:
+	nfs_readdir_clear_array(page);
 	unlock_page(page);
 	return ret;
 }
@@ -689,8 +703,6 @@ int nfs_readdir_filler(void *data, struct page* page)
 static
 void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
-	if (!desc->page->mapping)
-		nfs_readdir_clear_array(desc->page);
 	put_page(desc->page);
 	desc->page = NULL;
 }
@@ -704,19 +716,32 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 
 /*
  * Returns 0 if desc->dir_cookie was found on page desc->page_index
+ * and locks the page to prevent removal from the page cache.
  */
 static
-int find_cache_page(nfs_readdir_descriptor_t *desc)
+int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
 {
+	struct inode *inode = file_inode(desc->file);
+	struct nfs_inode *nfsi = NFS_I(inode);
 	int res;
 
 	desc->page = get_cache_page(desc);
 	if (IS_ERR(desc->page))
 		return PTR_ERR(desc->page);
-
-	res = nfs_readdir_search_array(desc);
+	res = lock_page_killable(desc->page);
 	if (res != 0)
-		cache_page_release(desc);
+		goto error;
+	res = -EAGAIN;
+	if (desc->page->mapping != NULL) {
+		res = nfs_readdir_search_array(desc);
+		if (res == 0) {
+			nfsi->page_index = desc->page_index;
+			return 0;
+		}
+	}
+	unlock_page(desc->page);
+error:
+	cache_page_release(desc);
 	return res;
 }
 
@@ -731,7 +756,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 		desc->last_cookie = 0;
 	}
 	do {
-		res = find_cache_page(desc);
+		res = find_and_lock_cache_page(desc);
 	} while (res == -EAGAIN);
 	return res;
 }
@@ -770,7 +795,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 		desc->eof = true;
 
 	kunmap(desc->page);
-	cache_page_release(desc);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
 	return res;
@@ -816,13 +840,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 
 	status = nfs_do_filldir(desc);
 
+ out_release:
+	nfs_readdir_clear_array(desc->page);
+	cache_page_release(desc);
  out:
 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
 			__func__, status);
 	return status;
- out_release:
-	cache_page_release(desc);
-	goto out;
 }
 
 /* The file offset position represents the dirent entry number.  A
@@ -887,6 +911,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			break;
 
 		res = nfs_do_filldir(desc);
+		unlock_page(desc->page);
+		cache_page_release(desc);
 		if (res < 0)
 			break;
 	} while (!desc->eof);
@@ -1142,10 +1168,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	if (fhandle == NULL || fattr == NULL || IS_ERR(label))
 		goto out;
 
-	ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
 	if (ret < 0) {
-		if (ret == -ESTALE || ret == -ENOENT)
+		switch (ret) {
+		case -ESTALE:
+		case -ENOENT:
 			ret = 0;
+			break;
+		case -ETIMEDOUT:
+			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+				ret = 1;
+		}
 		goto out;
 	}
 	ret = 0;
@@ -1408,7 +1441,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 		goto out;
 
 	trace_nfs_lookup_enter(dir, dentry, flags);
-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
@@ -1683,7 +1716,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 	d_drop(dentry);
 
 	if (fhandle->size == 0) {
-		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
+		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL);
 		if (error)
 			goto out_error;
 	}
@@ -2312,11 +2345,11 @@ static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, s
 		/* Found an entry, is our attribute cache valid? */
 		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
 			break;
+		if (!retry)
+			break;
 		err = -ECHILD;
 		if (!may_block)
 			goto out;
-		if (!retry)
-			goto out_zap;
 		spin_unlock(&inode->i_lock);
 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 		if (err)
@@ -2353,7 +2386,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
 	lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
 	cache = list_entry(lh, struct nfs_access_entry, lru);
 	if (lh == &nfsi->access_cache_entry_lru ||
-	    cred != cache->cred)
+	    cred_fscmp(cred, cache->cred) != 0)
 		cache = NULL;
 	if (cache == NULL)
 		goto out;
@@ -2476,7 +2509,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
 {
 	struct nfs_access_entry cache;
 	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
-	int cache_mask;
+	int cache_mask = -1;
 	int status;
 
 	trace_nfs_access_enter(inode);
@@ -2515,7 +2548,7 @@ out_cached:
 	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
 		status = -EACCES;
 out:
-	trace_nfs_access_exit(inode, status);
+	trace_nfs_access_exit(inode, mask, cache_mask, status);
 	return status;
 }
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 040a50fd9bf3..b768a0b42e82 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 					 data->ds_commit_index);
 
 	/* verifier not set so always fail */
-	if (verfp->committed < 0)
+	if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE)
 		return 1;
 
-	return nfs_direct_cmp_verf(verfp, &data->verf);
+	return nfs_direct_cmp_verf(verfp, data->res.verf);
 }
 
 /**
@@ -824,7 +824,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 		/* fake unstable write to let common nfs resend pages */
 		hdr->verf.committed = NFS_UNSTABLE;
-		hdr->good_bytes = hdr->args.count;
+		hdr->good_bytes = hdr->args.offset + hdr->args.count -
+			hdr->io_start;
 	}
 	spin_unlock(&dreq->lock);
 }
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index aec769a500a1..89bd5581f317 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -93,7 +93,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew,
 	key = container_of(ckey, struct nfs_dns_ent, h);
 
 	kfree(new->hostname);
-	new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
+	new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL);
 	if (new->hostname) {
 		new->namelen = key->namelen;
 		nfs_dns_ent_update(cnew, ckey);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eb731d9be3e..f96367a2463e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -204,44 +204,39 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
 static int
 nfs_file_fsync_commit(struct file *file, int datasync)
 {
-	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode *inode = file_inode(file);
-	int do_resend, status;
-	int ret = 0;
+	int ret;
 
 	dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
 
 	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
-	do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
-	status = nfs_commit_inode(inode, FLUSH_SYNC);
-	if (status == 0)
-		status = file_check_and_advance_wb_err(file);
-	if (status < 0) {
-		ret = status;
-		goto out;
-	}
-	do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
-	if (do_resend)
-		ret = -EAGAIN;
-out:
-	return ret;
+	ret = nfs_commit_inode(inode, FLUSH_SYNC);
+	if (ret < 0)
+		return ret;
+	return file_check_and_advance_wb_err(file);
 }
 
 int
 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	int ret;
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode *inode = file_inode(file);
+	int ret;
 
 	trace_nfs_fsync_enter(inode);
 
-	do {
+	for (;;) {
 		ret = file_write_and_wait_range(file, start, end);
 		if (ret != 0)
 			break;
 		ret = nfs_file_fsync_commit(file, datasync);
-		if (!ret)
-			ret = pnfs_sync_inode(inode, !!datasync);
+		if (ret != 0)
+			break;
+		ret = pnfs_sync_inode(inode, !!datasync);
+		if (ret != 0)
+			break;
+		if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+			break;
 		/*
 		 * If nfs_file_fsync_commit detected a server reboot, then
 		 * resend all dirty pages that might have been covered by
@@ -249,7 +244,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		 */
 		start = 0;
 		end = LLONG_MAX;
-	} while (ret == -EAGAIN);
+	}
 
 	trace_nfs_fsync_exit(inode, ret);
 	return ret;
@@ -489,7 +484,19 @@ static int nfs_launder_page(struct page *page)
 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 						sector_t *span)
 {
+	unsigned long blocks;
+	long long isize;
 	struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
+	struct inode *inode = file->f_mapping->host;
+
+	spin_lock(&inode->i_lock);
+	blocks = inode->i_blocks;
+	isize = inode->i_size;
+	spin_unlock(&inode->i_lock);
+	if (blocks*512 < isize) {
+		pr_warn("swap activate: swapfile has holes\n");
+		return -EINVAL;
+	}
 
 	*span = sis->pages;
 
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5657b7f2611f..bb9148b83166 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1266,9 +1266,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 
 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
 					int idx, u64 offset, u64 length,
-					u32 status, int opnum, int error)
+					u32 *op_status, int opnum, int error)
 {
 	struct nfs4_ff_layout_mirror *mirror;
+	u32 status = *op_status;
 	int err;
 
 	if (status == 0) {
@@ -1286,10 +1287,10 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
 		case -ENOBUFS:
 		case -EPIPE:
 		case -EPERM:
-			status = NFS4ERR_NXIO;
+			*op_status = status = NFS4ERR_NXIO;
 			break;
 		case -EACCES:
-			status = NFS4ERR_ACCESS;
+			*op_status = status = NFS4ERR_ACCESS;
 			break;
 		default:
 			return;
@@ -1321,16 +1322,19 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
 	int new_idx = hdr->pgio_mirror_idx;
 	int err;
 
-	trace_nfs4_pnfs_read(hdr, task->tk_status);
-	if (task->tk_status < 0)
+	if (task->tk_status < 0) {
 		ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
 					    hdr->args.offset, hdr->args.count,
-					    hdr->res.op_status, OP_READ,
+					    &hdr->res.op_status, OP_READ,
 					    task->tk_status);
+		trace_ff_layout_read_error(hdr);
+	}
+
 	err = ff_layout_async_handle_error(task, hdr->args.context->state,
 					   hdr->ds_clp, hdr->lseg,
 					   hdr->pgio_mirror_idx);
 
+	trace_nfs4_pnfs_read(hdr, err);
 	clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
 	clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 	switch (err) {
@@ -1494,16 +1498,19 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
 	loff_t end_offs = 0;
 	int err;
 
-	trace_nfs4_pnfs_write(hdr, task->tk_status);
-	if (task->tk_status < 0)
+	if (task->tk_status < 0) {
 		ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
 					    hdr->args.offset, hdr->args.count,
-					    hdr->res.op_status, OP_WRITE,
+					    &hdr->res.op_status, OP_WRITE,
 					    task->tk_status);
+		trace_ff_layout_write_error(hdr);
+	}
+
 	err = ff_layout_async_handle_error(task, hdr->args.context->state,
 					   hdr->ds_clp, hdr->lseg,
 					   hdr->pgio_mirror_idx);
 
+	trace_nfs4_pnfs_write(hdr, err);
 	clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
 	clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 	switch (err) {
@@ -1537,15 +1544,18 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
 {
 	int err;
 
-	trace_nfs4_pnfs_commit_ds(data, task->tk_status);
-	if (task->tk_status < 0)
+	if (task->tk_status < 0) {
 		ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
 					    data->args.offset, data->args.count,
-					    data->res.op_status, OP_COMMIT,
+					    &data->res.op_status, OP_COMMIT,
 					    task->tk_status);
+		trace_ff_layout_commit_error(data);
+	}
+
 	err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
 					   data->lseg, data->ds_commit_index);
 
+	trace_nfs4_pnfs_commit_ds(data, err);
 	switch (err) {
 	case -NFS4ERR_RESET_TO_PNFS:
 		pnfs_generic_prepare_to_resend_writes(data);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
new file mode 100644
index 000000000000..e1b938457ab9
--- /dev/null
+++ b/fs/nfs/fs_context.c
@@ -0,0 +1,1440 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/nfs/fs_context.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ * Conversion to new mount api Copyright (C) David Howells
+ *
+ * NFS mount handling.
+ *
+ * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include "nfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_MOUNT
+
+#if IS_ENABLED(CONFIG_NFS_V3)
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
+#define NFS_MAX_CONNECTIONS 16
+
+enum nfs_param {
+	Opt_ac,
+	Opt_acdirmax,
+	Opt_acdirmin,
+	Opt_acl,
+	Opt_acregmax,
+	Opt_acregmin,
+	Opt_actimeo,
+	Opt_addr,
+	Opt_bg,
+	Opt_bsize,
+	Opt_clientaddr,
+	Opt_cto,
+	Opt_fg,
+	Opt_fscache,
+	Opt_fscache_flag,
+	Opt_hard,
+	Opt_intr,
+	Opt_local_lock,
+	Opt_lock,
+	Opt_lookupcache,
+	Opt_migration,
+	Opt_minorversion,
+	Opt_mountaddr,
+	Opt_mounthost,
+	Opt_mountport,
+	Opt_mountproto,
+	Opt_mountvers,
+	Opt_namelen,
+	Opt_nconnect,
+	Opt_port,
+	Opt_posix,
+	Opt_proto,
+	Opt_rdirplus,
+	Opt_rdma,
+	Opt_resvport,
+	Opt_retrans,
+	Opt_retry,
+	Opt_rsize,
+	Opt_sec,
+	Opt_sharecache,
+	Opt_sloppy,
+	Opt_soft,
+	Opt_softerr,
+	Opt_softreval,
+	Opt_source,
+	Opt_tcp,
+	Opt_timeo,
+	Opt_udp,
+	Opt_v,
+	Opt_vers,
+	Opt_wsize,
+};
+
+enum {
+	Opt_local_lock_all,
+	Opt_local_lock_flock,
+	Opt_local_lock_none,
+	Opt_local_lock_posix,
+};
+
+static const struct constant_table nfs_param_enums_local_lock[] = {
+	{ "all",		Opt_local_lock_all },
+	{ "flock",	Opt_local_lock_flock },
+	{ "none",		Opt_local_lock_none },
+	{}
+};
+
+enum {
+	Opt_lookupcache_all,
+	Opt_lookupcache_none,
+	Opt_lookupcache_positive,
+};
+
+static const struct constant_table nfs_param_enums_lookupcache[] = {
+	{ "all",		Opt_lookupcache_all },
+	{ "none",		Opt_lookupcache_none },
+	{ "pos",		Opt_lookupcache_positive },
+	{ "positive",		Opt_lookupcache_positive },
+	{}
+};
+
+static const struct fs_parameter_spec nfs_fs_parameters[] = {
+	fsparam_flag_no("ac",		Opt_ac),
+	fsparam_u32   ("acdirmax",	Opt_acdirmax),
+	fsparam_u32   ("acdirmin",	Opt_acdirmin),
+	fsparam_flag_no("acl",		Opt_acl),
+	fsparam_u32   ("acregmax",	Opt_acregmax),
+	fsparam_u32   ("acregmin",	Opt_acregmin),
+	fsparam_u32   ("actimeo",	Opt_actimeo),
+	fsparam_string("addr",		Opt_addr),
+	fsparam_flag  ("bg",		Opt_bg),
+	fsparam_u32   ("bsize",		Opt_bsize),
+	fsparam_string("clientaddr",	Opt_clientaddr),
+	fsparam_flag_no("cto",		Opt_cto),
+	fsparam_flag  ("fg",		Opt_fg),
+	fsparam_flag_no("fsc",		Opt_fscache_flag),
+	fsparam_string("fsc",		Opt_fscache),
+	fsparam_flag  ("hard",		Opt_hard),
+	__fsparam(NULL, "intr",		Opt_intr,
+		  fs_param_neg_with_no|fs_param_deprecated, NULL),
+	fsparam_enum  ("local_lock",	Opt_local_lock, nfs_param_enums_local_lock),
+	fsparam_flag_no("lock",		Opt_lock),
+	fsparam_enum  ("lookupcache",	Opt_lookupcache, nfs_param_enums_lookupcache),
+	fsparam_flag_no("migration",	Opt_migration),
+	fsparam_u32   ("minorversion",	Opt_minorversion),
+	fsparam_string("mountaddr",	Opt_mountaddr),
+	fsparam_string("mounthost",	Opt_mounthost),
+	fsparam_u32   ("mountport",	Opt_mountport),
+	fsparam_string("mountproto",	Opt_mountproto),
+	fsparam_u32   ("mountvers",	Opt_mountvers),
+	fsparam_u32   ("namlen",	Opt_namelen),
+	fsparam_u32   ("nconnect",	Opt_nconnect),
+	fsparam_string("nfsvers",	Opt_vers),
+	fsparam_u32   ("port",		Opt_port),
+	fsparam_flag_no("posix",	Opt_posix),
+	fsparam_string("proto",		Opt_proto),
+	fsparam_flag_no("rdirplus",	Opt_rdirplus),
+	fsparam_flag  ("rdma",		Opt_rdma),
+	fsparam_flag_no("resvport",	Opt_resvport),
+	fsparam_u32   ("retrans",	Opt_retrans),
+	fsparam_string("retry",		Opt_retry),
+	fsparam_u32   ("rsize",		Opt_rsize),
+	fsparam_string("sec",		Opt_sec),
+	fsparam_flag_no("sharecache",	Opt_sharecache),
+	fsparam_flag  ("sloppy",	Opt_sloppy),
+	fsparam_flag  ("soft",		Opt_soft),
+	fsparam_flag  ("softerr",	Opt_softerr),
+	fsparam_flag  ("softreval",	Opt_softreval),
+	fsparam_string("source",	Opt_source),
+	fsparam_flag  ("tcp",		Opt_tcp),
+	fsparam_u32   ("timeo",		Opt_timeo),
+	fsparam_flag  ("udp",		Opt_udp),
+	fsparam_flag  ("v2",		Opt_v),
+	fsparam_flag  ("v3",		Opt_v),
+	fsparam_flag  ("v4",		Opt_v),
+	fsparam_flag  ("v4.0",		Opt_v),
+	fsparam_flag  ("v4.1",		Opt_v),
+	fsparam_flag  ("v4.2",		Opt_v),
+	fsparam_string("vers",		Opt_vers),
+	fsparam_u32   ("wsize",		Opt_wsize),
+	{}
+};
+
+enum {
+	Opt_vers_2,
+	Opt_vers_3,
+	Opt_vers_4,
+	Opt_vers_4_0,
+	Opt_vers_4_1,
+	Opt_vers_4_2,
+};
+
+static const struct constant_table nfs_vers_tokens[] = {
+	{ "2",		Opt_vers_2 },
+	{ "3",		Opt_vers_3 },
+	{ "4",		Opt_vers_4 },
+	{ "4.0",	Opt_vers_4_0 },
+	{ "4.1",	Opt_vers_4_1 },
+	{ "4.2",	Opt_vers_4_2 },
+};
+
+enum {
+	Opt_xprt_rdma,
+	Opt_xprt_rdma6,
+	Opt_xprt_tcp,
+	Opt_xprt_tcp6,
+	Opt_xprt_udp,
+	Opt_xprt_udp6,
+	nr__Opt_xprt
+};
+
+static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = {
+	{ "rdma",	Opt_xprt_rdma },
+	{ "rdma6",	Opt_xprt_rdma6 },
+	{ "tcp",	Opt_xprt_tcp },
+	{ "tcp6",	Opt_xprt_tcp6 },
+	{ "udp",	Opt_xprt_udp },
+	{ "udp6",	Opt_xprt_udp6 },
+};
+
+enum {
+	Opt_sec_krb5,
+	Opt_sec_krb5i,
+	Opt_sec_krb5p,
+	Opt_sec_lkey,
+	Opt_sec_lkeyi,
+	Opt_sec_lkeyp,
+	Opt_sec_none,
+	Opt_sec_spkm,
+	Opt_sec_spkmi,
+	Opt_sec_spkmp,
+	Opt_sec_sys,
+	nr__Opt_sec
+};
+
+static const struct constant_table nfs_secflavor_tokens[] = {
+	{ "krb5",	Opt_sec_krb5 },
+	{ "krb5i",	Opt_sec_krb5i },
+	{ "krb5p",	Opt_sec_krb5p },
+	{ "lkey",	Opt_sec_lkey },
+	{ "lkeyi",	Opt_sec_lkeyi },
+	{ "lkeyp",	Opt_sec_lkeyp },
+	{ "none",	Opt_sec_none },
+	{ "null",	Opt_sec_none },
+	{ "spkm3",	Opt_sec_spkm },
+	{ "spkm3i",	Opt_sec_spkmi },
+	{ "spkm3p",	Opt_sec_spkmp },
+	{ "sys",	Opt_sec_sys },
+};
+
+/*
+ * Sanity-check a server address provided by the mount command.
+ *
+ * Address family must be initialized, and address must not be
+ * the ANY address for that family.
+ */
+static int nfs_verify_server_address(struct sockaddr *addr)
+{
+	switch (addr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+		return sa->sin_addr.s_addr != htonl(INADDR_ANY);
+	}
+	case AF_INET6: {
+		struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+		return !ipv6_addr_any(sa);
+	}
+	}
+
+	dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
+	return 0;
+}
+
+/*
+ * Sanity check the NFS transport protocol.
+ *
+ */
+static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+{
+	switch (ctx->nfs_server.protocol) {
+	case XPRT_TRANSPORT_UDP:
+	case XPRT_TRANSPORT_TCP:
+	case XPRT_TRANSPORT_RDMA:
+		break;
+	default:
+		ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+	}
+}
+
+/*
+ * For text based NFSv2/v3 mounts, the mount protocol transport default
+ * settings should depend upon the specified NFS transport.
+ */
+static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
+{
+	nfs_validate_transport_protocol(ctx);
+
+	if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
+	    ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
+			return;
+	switch (ctx->nfs_server.protocol) {
+	case XPRT_TRANSPORT_UDP:
+		ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+		break;
+	case XPRT_TRANSPORT_TCP:
+	case XPRT_TRANSPORT_RDMA:
+		ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+	}
+}
+
+/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static int nfs_auth_info_add(struct fs_context *fc,
+			     struct nfs_auth_info *auth_info,
+			     rpc_authflavor_t flavor)
+{
+	unsigned int i;
+	unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
+
+	/* make sure this flavor isn't already in the list */
+	for (i = 0; i < auth_info->flavor_len; i++) {
+		if (flavor == auth_info->flavors[i])
+			return 0;
+	}
+
+	if (auth_info->flavor_len + 1 >= max_flavor_len)
+		return nfs_invalf(fc, "NFS: too many sec= flavors");
+
+	auth_info->flavors[auth_info->flavor_len++] = flavor;
+	return 0;
+}
+
+/*
+ * Parse the value of the 'sec=' option.
+ */
+static int nfs_parse_security_flavors(struct fs_context *fc,
+				      struct fs_parameter *param)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	rpc_authflavor_t pseudoflavor;
+	char *string = param->string, *p;
+	int ret;
+
+	dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string);
+
+	while ((p = strsep(&string, ":")) != NULL) {
+		if (!*p)
+			continue;
+		switch (lookup_constant(nfs_secflavor_tokens, p, -1)) {
+		case Opt_sec_none:
+			pseudoflavor = RPC_AUTH_NULL;
+			break;
+		case Opt_sec_sys:
+			pseudoflavor = RPC_AUTH_UNIX;
+			break;
+		case Opt_sec_krb5:
+			pseudoflavor = RPC_AUTH_GSS_KRB5;
+			break;
+		case Opt_sec_krb5i:
+			pseudoflavor = RPC_AUTH_GSS_KRB5I;
+			break;
+		case Opt_sec_krb5p:
+			pseudoflavor = RPC_AUTH_GSS_KRB5P;
+			break;
+		case Opt_sec_lkey:
+			pseudoflavor = RPC_AUTH_GSS_LKEY;
+			break;
+		case Opt_sec_lkeyi:
+			pseudoflavor = RPC_AUTH_GSS_LKEYI;
+			break;
+		case Opt_sec_lkeyp:
+			pseudoflavor = RPC_AUTH_GSS_LKEYP;
+			break;
+		case Opt_sec_spkm:
+			pseudoflavor = RPC_AUTH_GSS_SPKM;
+			break;
+		case Opt_sec_spkmi:
+			pseudoflavor = RPC_AUTH_GSS_SPKMI;
+			break;
+		case Opt_sec_spkmp:
+			pseudoflavor = RPC_AUTH_GSS_SPKMP;
+			break;
+		default:
+			return nfs_invalf(fc, "NFS: sec=%s option not recognized", p);
+		}
+
+		ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int nfs_parse_version_string(struct fs_context *fc,
+				    const char *string)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+	ctx->flags &= ~NFS_MOUNT_VER3;
+	switch (lookup_constant(nfs_vers_tokens, string, -1)) {
+	case Opt_vers_2:
+		ctx->version = 2;
+		break;
+	case Opt_vers_3:
+		ctx->flags |= NFS_MOUNT_VER3;
+		ctx->version = 3;
+		break;
+	case Opt_vers_4:
+		/* Backward compatibility option. In future,
+		 * the mount program should always supply
+		 * a NFSv4 minor version number.
+		 */
+		ctx->version = 4;
+		break;
+	case Opt_vers_4_0:
+		ctx->version = 4;
+		ctx->minorversion = 0;
+		break;
+	case Opt_vers_4_1:
+		ctx->version = 4;
+		ctx->minorversion = 1;
+		break;
+	case Opt_vers_4_2:
+		ctx->version = 4;
+		ctx->minorversion = 2;
+		break;
+	default:
+		return nfs_invalf(fc, "NFS: Unsupported NFS version");
+	}
+	return 0;
+}
+
+/*
+ * Parse a single mount parameter.
+ */
+static int nfs_fs_context_parse_param(struct fs_context *fc,
+				      struct fs_parameter *param)
+{
+	struct fs_parse_result result;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	unsigned short protofamily, mountfamily;
+	unsigned int len;
+	int ret, opt;
+
+	dfprintk(MOUNT, "NFS:   parsing nfs mount option '%s'\n", param->key);
+
+	opt = fs_parse(fc, nfs_fs_parameters, param, &result);
+	if (opt < 0)
+		return ctx->sloppy ? 1 : opt;
+
+	switch (opt) {
+	case Opt_source:
+		if (fc->source)
+			return nfs_invalf(fc, "NFS: Multiple sources not supported");
+		fc->source = param->string;
+		param->string = NULL;
+		break;
+
+		/*
+		 * boolean options:  foo/nofoo
+		 */
+	case Opt_soft:
+		ctx->flags |= NFS_MOUNT_SOFT;
+		ctx->flags &= ~NFS_MOUNT_SOFTERR;
+		break;
+	case Opt_softerr:
+		ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL;
+		ctx->flags &= ~NFS_MOUNT_SOFT;
+		break;
+	case Opt_hard:
+		ctx->flags &= ~(NFS_MOUNT_SOFT |
+				NFS_MOUNT_SOFTERR |
+				NFS_MOUNT_SOFTREVAL);
+		break;
+	case Opt_softreval:
+		if (result.negated)
+			ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
+		else
+			ctx->flags &= NFS_MOUNT_SOFTREVAL;
+		break;
+	case Opt_posix:
+		if (result.negated)
+			ctx->flags &= ~NFS_MOUNT_POSIX;
+		else
+			ctx->flags |= NFS_MOUNT_POSIX;
+		break;
+	case Opt_cto:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NOCTO;
+		else
+			ctx->flags &= ~NFS_MOUNT_NOCTO;
+		break;
+	case Opt_ac:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NOAC;
+		else
+			ctx->flags &= ~NFS_MOUNT_NOAC;
+		break;
+	case Opt_lock:
+		if (result.negated) {
+			ctx->flags |= NFS_MOUNT_NONLM;
+			ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+		} else {
+			ctx->flags &= ~NFS_MOUNT_NONLM;
+			ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+		}
+		break;
+	case Opt_udp:
+		ctx->flags &= ~NFS_MOUNT_TCP;
+		ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+		break;
+	case Opt_tcp:
+		ctx->flags |= NFS_MOUNT_TCP;
+		ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+		break;
+	case Opt_rdma:
+		ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
+		ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+		xprt_load_transport(param->key);
+		break;
+	case Opt_acl:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NOACL;
+		else
+			ctx->flags &= ~NFS_MOUNT_NOACL;
+		break;
+	case Opt_rdirplus:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NORDIRPLUS;
+		else
+			ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+		break;
+	case Opt_sharecache:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_UNSHARED;
+		else
+			ctx->flags &= ~NFS_MOUNT_UNSHARED;
+		break;
+	case Opt_resvport:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NORESVPORT;
+		else
+			ctx->flags &= ~NFS_MOUNT_NORESVPORT;
+		break;
+	case Opt_fscache_flag:
+		if (result.negated)
+			ctx->options &= ~NFS_OPTION_FSCACHE;
+		else
+			ctx->options |= NFS_OPTION_FSCACHE;
+		kfree(ctx->fscache_uniq);
+		ctx->fscache_uniq = NULL;
+		break;
+	case Opt_fscache:
+		ctx->options |= NFS_OPTION_FSCACHE;
+		kfree(ctx->fscache_uniq);
+		ctx->fscache_uniq = param->string;
+		param->string = NULL;
+		break;
+	case Opt_migration:
+		if (result.negated)
+			ctx->options &= ~NFS_OPTION_MIGRATION;
+		else
+			ctx->options |= NFS_OPTION_MIGRATION;
+		break;
+
+		/*
+		 * options that take numeric values
+		 */
+	case Opt_port:
+		if (result.uint_32 > USHRT_MAX)
+			goto out_of_bounds;
+		ctx->nfs_server.port = result.uint_32;
+		break;
+	case Opt_rsize:
+		ctx->rsize = result.uint_32;
+		break;
+	case Opt_wsize:
+		ctx->wsize = result.uint_32;
+		break;
+	case Opt_bsize:
+		ctx->bsize = result.uint_32;
+		break;
+	case Opt_timeo:
+		if (result.uint_32 < 1 || result.uint_32 > INT_MAX)
+			goto out_of_bounds;
+		ctx->timeo = result.uint_32;
+		break;
+	case Opt_retrans:
+		if (result.uint_32 > INT_MAX)
+			goto out_of_bounds;
+		ctx->retrans = result.uint_32;
+		break;
+	case Opt_acregmin:
+		ctx->acregmin = result.uint_32;
+		break;
+	case Opt_acregmax:
+		ctx->acregmax = result.uint_32;
+		break;
+	case Opt_acdirmin:
+		ctx->acdirmin = result.uint_32;
+		break;
+	case Opt_acdirmax:
+		ctx->acdirmax = result.uint_32;
+		break;
+	case Opt_actimeo:
+		ctx->acregmin = result.uint_32;
+		ctx->acregmax = result.uint_32;
+		ctx->acdirmin = result.uint_32;
+		ctx->acdirmax = result.uint_32;
+		break;
+	case Opt_namelen:
+		ctx->namlen = result.uint_32;
+		break;
+	case Opt_mountport:
+		if (result.uint_32 > USHRT_MAX)
+			goto out_of_bounds;
+		ctx->mount_server.port = result.uint_32;
+		break;
+	case Opt_mountvers:
+		if (result.uint_32 < NFS_MNT_VERSION ||
+		    result.uint_32 > NFS_MNT3_VERSION)
+			goto out_of_bounds;
+		ctx->mount_server.version = result.uint_32;
+		break;
+	case Opt_minorversion:
+		if (result.uint_32 > NFS4_MAX_MINOR_VERSION)
+			goto out_of_bounds;
+		ctx->minorversion = result.uint_32;
+		break;
+
+		/*
+		 * options that take text values
+		 */
+	case Opt_v:
+		ret = nfs_parse_version_string(fc, param->key + 1);
+		if (ret < 0)
+			return ret;
+		break;
+	case Opt_vers:
+		ret = nfs_parse_version_string(fc, param->string);
+		if (ret < 0)
+			return ret;
+		break;
+	case Opt_sec:
+		ret = nfs_parse_security_flavors(fc, param);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case Opt_proto:
+		protofamily = AF_INET;
+		switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+		case Opt_xprt_udp6:
+			protofamily = AF_INET6;
+			/* fall through */
+		case Opt_xprt_udp:
+			ctx->flags &= ~NFS_MOUNT_TCP;
+			ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+			break;
+		case Opt_xprt_tcp6:
+			protofamily = AF_INET6;
+			/* fall through */
+		case Opt_xprt_tcp:
+			ctx->flags |= NFS_MOUNT_TCP;
+			ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+			break;
+		case Opt_xprt_rdma6:
+			protofamily = AF_INET6;
+			/* fall through */
+		case Opt_xprt_rdma:
+			/* vector side protocols to TCP */
+			ctx->flags |= NFS_MOUNT_TCP;
+			ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+			xprt_load_transport(param->string);
+			break;
+		default:
+			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+		}
+
+		ctx->protofamily = protofamily;
+		break;
+
+	case Opt_mountproto:
+		mountfamily = AF_INET;
+		switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+		case Opt_xprt_udp6:
+			mountfamily = AF_INET6;
+			/* fall through */
+		case Opt_xprt_udp:
+			ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+			break;
+		case Opt_xprt_tcp6:
+			mountfamily = AF_INET6;
+			/* fall through */
+		case Opt_xprt_tcp:
+			ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+			break;
+		case Opt_xprt_rdma: /* not used for side protocols */
+		default:
+			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+		}
+		ctx->mountfamily = mountfamily;
+		break;
+
+	case Opt_addr:
+		len = rpc_pton(fc->net_ns, param->string, param->size,
+			       &ctx->nfs_server.address,
+			       sizeof(ctx->nfs_server._address));
+		if (len == 0)
+			goto out_invalid_address;
+		ctx->nfs_server.addrlen = len;
+		break;
+	case Opt_clientaddr:
+		kfree(ctx->client_address);
+		ctx->client_address = param->string;
+		param->string = NULL;
+		break;
+	case Opt_mounthost:
+		kfree(ctx->mount_server.hostname);
+		ctx->mount_server.hostname = param->string;
+		param->string = NULL;
+		break;
+	case Opt_mountaddr:
+		len = rpc_pton(fc->net_ns, param->string, param->size,
+			       &ctx->mount_server.address,
+			       sizeof(ctx->mount_server._address));
+		if (len == 0)
+			goto out_invalid_address;
+		ctx->mount_server.addrlen = len;
+		break;
+	case Opt_nconnect:
+		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS)
+			goto out_of_bounds;
+		ctx->nfs_server.nconnect = result.uint_32;
+		break;
+	case Opt_lookupcache:
+		switch (result.uint_32) {
+		case Opt_lookupcache_all:
+			ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
+			break;
+		case Opt_lookupcache_positive:
+			ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
+			ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
+			break;
+		case Opt_lookupcache_none:
+			ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
+			break;
+		default:
+			goto out_invalid_value;
+		}
+		break;
+	case Opt_local_lock:
+		switch (result.uint_32) {
+		case Opt_local_lock_all:
+			ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+				       NFS_MOUNT_LOCAL_FCNTL);
+			break;
+		case Opt_local_lock_flock:
+			ctx->flags |= NFS_MOUNT_LOCAL_FLOCK;
+			break;
+		case Opt_local_lock_posix:
+			ctx->flags |= NFS_MOUNT_LOCAL_FCNTL;
+			break;
+		case Opt_local_lock_none:
+			ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+					NFS_MOUNT_LOCAL_FCNTL);
+			break;
+		default:
+			goto out_invalid_value;
+		}
+		break;
+
+		/*
+		 * Special options
+		 */
+	case Opt_sloppy:
+		ctx->sloppy = true;
+		dfprintk(MOUNT, "NFS:   relaxing parsing rules\n");
+		break;
+	}
+
+	return 0;
+
+out_invalid_value:
+	return nfs_invalf(fc, "NFS: Bad mount option value specified");
+out_invalid_address:
+	return nfs_invalf(fc, "NFS: Bad IP address specified");
+out_of_bounds:
+	return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+}
+
+/*
+ * Split fc->source into "hostname:export_path".
+ *
+ * The leftmost colon demarks the split between the server's hostname
+ * and the export path.  If the hostname starts with a left square
+ * bracket, then it may contain colons.
+ *
+ * Note: caller frees hostname and export path, even on error.
+ */
+static int nfs_parse_source(struct fs_context *fc,
+			    size_t maxnamlen, size_t maxpathlen)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	const char *dev_name = fc->source;
+	size_t len;
+	const char *end;
+
+	if (unlikely(!dev_name || !*dev_name)) {
+		dfprintk(MOUNT, "NFS: device name not specified\n");
+		return -EINVAL;
+	}
+
+	/* Is the host name protected with square brakcets? */
+	if (*dev_name == '[') {
+		end = strchr(++dev_name, ']');
+		if (end == NULL || end[1] != ':')
+			goto out_bad_devname;
+
+		len = end - dev_name;
+		end++;
+	} else {
+		const char *comma;
+
+		end = strchr(dev_name, ':');
+		if (end == NULL)
+			goto out_bad_devname;
+		len = end - dev_name;
+
+		/* kill possible hostname list: not supported */
+		comma = memchr(dev_name, ',', len);
+		if (comma)
+			len = comma - dev_name;
+	}
+
+	if (len > maxnamlen)
+		goto out_hostname;
+
+	/* N.B. caller will free nfs_server.hostname in all cases */
+	ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
+	if (!ctx->nfs_server.hostname)
+		goto out_nomem;
+	len = strlen(++end);
+	if (len > maxpathlen)
+		goto out_path;
+	ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL);
+	if (!ctx->nfs_server.export_path)
+		goto out_nomem;
+
+	dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path);
+	return 0;
+
+out_bad_devname:
+	return nfs_invalf(fc, "NFS: device name not in host:path format");
+out_nomem:
+	nfs_errorf(fc, "NFS: not enough memory to parse device name");
+	return -ENOMEM;
+out_hostname:
+	nfs_errorf(fc, "NFS: server hostname too long");
+	return -ENAMETOOLONG;
+out_path:
+	nfs_errorf(fc, "NFS: export pathname too long");
+	return -ENAMETOOLONG;
+}
+
+static inline bool is_remount_fc(struct fs_context *fc)
+{
+	return fc->root != NULL;
+}
+
+/*
+ * Parse monolithic NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ *   immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ *   mountproto=tcp after mountproto=udp, and so on
+ */
+static int nfs23_parse_monolithic(struct fs_context *fc,
+				  struct nfs_mount_data *data)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct nfs_fh *mntfh = ctx->mntfh;
+	struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+	int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	ctx->version = NFS_DEFAULT_VERSION;
+	switch (data->version) {
+	case 1:
+		data->namlen = 0; /* fall through */
+	case 2:
+		data->bsize = 0; /* fall through */
+	case 3:
+		if (data->flags & NFS_MOUNT_VER3)
+			goto out_no_v3;
+		data->root.size = NFS2_FHSIZE;
+		memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		/* Turn off security negotiation */
+		extra_flags |= NFS_MOUNT_SECFLAVOUR;
+		/* fall through */
+	case 4:
+		if (data->flags & NFS_MOUNT_SECFLAVOUR)
+			goto out_no_sec;
+		/* fall through */
+	case 5:
+		memset(data->context, 0, sizeof(data->context));
+		/* fall through */
+	case 6:
+		if (data->flags & NFS_MOUNT_VER3) {
+			if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+				goto out_invalid_fh;
+			mntfh->size = data->root.size;
+			ctx->version = 3;
+		} else {
+			mntfh->size = NFS2_FHSIZE;
+			ctx->version = 2;
+		}
+
+
+		memcpy(mntfh->data, data->root.data, mntfh->size);
+		if (mntfh->size < sizeof(mntfh->data))
+			memset(mntfh->data + mntfh->size, 0,
+			       sizeof(mntfh->data) - mntfh->size);
+
+		/*
+		 * Translate to nfs_fs_context, which nfs_fill_super
+		 * can deal with.
+		 */
+		ctx->flags	= data->flags & NFS_MOUNT_FLAGMASK;
+		ctx->flags	|= extra_flags;
+		ctx->rsize	= data->rsize;
+		ctx->wsize	= data->wsize;
+		ctx->timeo	= data->timeo;
+		ctx->retrans	= data->retrans;
+		ctx->acregmin	= data->acregmin;
+		ctx->acregmax	= data->acregmax;
+		ctx->acdirmin	= data->acdirmin;
+		ctx->acdirmax	= data->acdirmax;
+		ctx->need_mount	= false;
+
+		memcpy(sap, &data->addr, sizeof(data->addr));
+		ctx->nfs_server.addrlen = sizeof(data->addr);
+		ctx->nfs_server.port = ntohs(data->addr.sin_port);
+		if (sap->sa_family != AF_INET ||
+		    !nfs_verify_server_address(sap))
+			goto out_no_address;
+
+		if (!(data->flags & NFS_MOUNT_TCP))
+			ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+		/* N.B. caller will free nfs_server.hostname in all cases */
+		ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
+		if (!ctx->nfs_server.hostname)
+			goto out_nomem;
+
+		ctx->namlen		= data->namlen;
+		ctx->bsize		= data->bsize;
+
+		if (data->flags & NFS_MOUNT_SECFLAVOUR)
+			ctx->selected_flavor = data->pseudoflavor;
+		else
+			ctx->selected_flavor = RPC_AUTH_UNIX;
+
+		if (!(data->flags & NFS_MOUNT_NONLM))
+			ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+					 NFS_MOUNT_LOCAL_FCNTL);
+		else
+			ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+					NFS_MOUNT_LOCAL_FCNTL);
+
+		/*
+		 * The legacy version 6 binary mount data from userspace has a
+		 * field used only to transport selinux information into the
+		 * the kernel.  To continue to support that functionality we
+		 * have a touch of selinux knowledge here in the NFS code. The
+		 * userspace code converted context=blah to just blah so we are
+		 * converting back to the full string selinux understands.
+		 */
+		if (data->context[0]){
+#ifdef CONFIG_SECURITY_SELINUX
+			int ret;
+
+			data->context[NFS_MAX_CONTEXT_LEN] = '\0';
+			ret = vfs_parse_fs_string(fc, "context",
+						  data->context, strlen(data->context));
+			if (ret < 0)
+				return ret;
+#else
+			return -EINVAL;
+#endif
+		}
+
+		break;
+	default:
+		goto generic;
+	}
+
+	ctx->skip_reconfig_option_check = true;
+	return 0;
+
+generic:
+	return generic_parse_monolithic(fc, data);
+
+out_no_data:
+	if (is_remount_fc(fc)) {
+		ctx->skip_reconfig_option_check = true;
+		return 0;
+	}
+	return nfs_invalf(fc, "NFS: mount program didn't pass any mount data");
+
+out_no_v3:
+	return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3");
+
+out_no_sec:
+	return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS");
+
+out_nomem:
+	dfprintk(MOUNT, "NFS: not enough memory to handle mount options");
+	return -ENOMEM;
+
+out_no_address:
+	return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+
+out_invalid_fh:
+	return nfs_invalf(fc, "NFS: invalid root filehandle");
+}
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_parse_monolithic(struct fs_context *fc,
+				 struct nfs4_mount_data *data)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+	char *c;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	ctx->version = 4;
+
+	switch (data->version) {
+	case 1:
+		if (data->host_addrlen > sizeof(ctx->nfs_server.address))
+			goto out_no_address;
+		if (data->host_addrlen == 0)
+			goto out_no_address;
+		ctx->nfs_server.addrlen = data->host_addrlen;
+		if (copy_from_user(sap, data->host_addr, data->host_addrlen))
+			return -EFAULT;
+		if (!nfs_verify_server_address(sap))
+			goto out_no_address;
+		ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+
+		if (data->auth_flavourlen) {
+			rpc_authflavor_t pseudoflavor;
+			if (data->auth_flavourlen > 1)
+				goto out_inval_auth;
+			if (copy_from_user(&pseudoflavor,
+					   data->auth_flavours,
+					   sizeof(pseudoflavor)))
+				return -EFAULT;
+			ctx->selected_flavor = pseudoflavor;
+		} else
+			ctx->selected_flavor = RPC_AUTH_UNIX;
+
+		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		ctx->nfs_server.hostname = c;
+
+		c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		ctx->nfs_server.export_path = c;
+		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
+
+		c = strndup_user(data->client_addr.data, 16);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		ctx->client_address = c;
+
+		/*
+		 * Translate to nfs_fs_context, which nfs_fill_super
+		 * can deal with.
+		 */
+
+		ctx->flags	= data->flags & NFS4_MOUNT_FLAGMASK;
+		ctx->rsize	= data->rsize;
+		ctx->wsize	= data->wsize;
+		ctx->timeo	= data->timeo;
+		ctx->retrans	= data->retrans;
+		ctx->acregmin	= data->acregmin;
+		ctx->acregmax	= data->acregmax;
+		ctx->acdirmin	= data->acdirmin;
+		ctx->acdirmax	= data->acdirmax;
+		ctx->nfs_server.protocol = data->proto;
+		nfs_validate_transport_protocol(ctx);
+		if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+			goto out_invalid_transport_udp;
+
+		break;
+	default:
+		goto generic;
+	}
+
+	ctx->skip_reconfig_option_check = true;
+	return 0;
+
+generic:
+	return generic_parse_monolithic(fc, data);
+
+out_no_data:
+	if (is_remount_fc(fc)) {
+		ctx->skip_reconfig_option_check = true;
+		return 0;
+	}
+	return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data");
+
+out_inval_auth:
+	return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d",
+		      data->auth_flavourlen);
+
+out_no_address:
+	return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
+
+out_invalid_transport_udp:
+	return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp");
+}
+#endif
+
+/*
+ * Parse a monolithic block of data from sys_mount().
+ */
+static int nfs_fs_context_parse_monolithic(struct fs_context *fc,
+					   void *data)
+{
+	if (fc->fs_type == &nfs_fs_type)
+		return nfs23_parse_monolithic(fc, data);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+	if (fc->fs_type == &nfs4_fs_type)
+		return nfs4_parse_monolithic(fc, data);
+#endif
+
+	return nfs_invalf(fc, "NFS: Unsupported monolithic data version");
+}
+
+/*
+ * Validate the preparsed information in the config.
+ */
+static int nfs_fs_context_validate(struct fs_context *fc)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct nfs_subversion *nfs_mod;
+	struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+	int max_namelen = PAGE_SIZE;
+	int max_pathlen = NFS_MAXPATHLEN;
+	int port = 0;
+	int ret;
+
+	if (!fc->source)
+		goto out_no_device_name;
+
+	/* Check for sanity first. */
+	if (ctx->minorversion && ctx->version != 4)
+		goto out_minorversion_mismatch;
+
+	if (ctx->options & NFS_OPTION_MIGRATION &&
+	    (ctx->version != 4 || ctx->minorversion != 0))
+		goto out_migration_misuse;
+
+	/* Verify that any proto=/mountproto= options match the address
+	 * families in the addr=/mountaddr= options.
+	 */
+	if (ctx->protofamily != AF_UNSPEC &&
+	    ctx->protofamily != ctx->nfs_server.address.sa_family)
+		goto out_proto_mismatch;
+
+	if (ctx->mountfamily != AF_UNSPEC) {
+		if (ctx->mount_server.addrlen) {
+			if (ctx->mountfamily != ctx->mount_server.address.sa_family)
+				goto out_mountproto_mismatch;
+		} else {
+			if (ctx->mountfamily != ctx->nfs_server.address.sa_family)
+				goto out_mountproto_mismatch;
+		}
+	}
+
+	if (!nfs_verify_server_address(sap))
+		goto out_no_address;
+
+	if (ctx->version == 4) {
+		if (IS_ENABLED(CONFIG_NFS_V4)) {
+			if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+				port = NFS_RDMA_PORT;
+			else
+				port = NFS_PORT;
+			max_namelen = NFS4_MAXNAMLEN;
+			max_pathlen = NFS4_MAXPATHLEN;
+			nfs_validate_transport_protocol(ctx);
+			if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+				goto out_invalid_transport_udp;
+			ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
+					NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
+					NFS_MOUNT_LOCAL_FCNTL);
+		} else {
+			goto out_v4_not_compiled;
+		}
+	} else {
+		nfs_set_mount_transport_protocol(ctx);
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+	       if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+		       goto out_invalid_transport_udp;
+#endif
+		if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+			port = NFS_RDMA_PORT;
+	}
+
+	nfs_set_port(sap, &ctx->nfs_server.port, port);
+
+	ret = nfs_parse_source(fc, max_namelen, max_pathlen);
+	if (ret < 0)
+		return ret;
+
+	/* Load the NFS protocol module if we haven't done so yet */
+	if (!ctx->nfs_mod) {
+		nfs_mod = get_nfs_version(ctx->version);
+		if (IS_ERR(nfs_mod)) {
+			ret = PTR_ERR(nfs_mod);
+			goto out_version_unavailable;
+		}
+		ctx->nfs_mod = nfs_mod;
+	}
+	return 0;
+
+out_no_device_name:
+	return nfs_invalf(fc, "NFS: Device name not specified");
+out_v4_not_compiled:
+	nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
+	return -EPROTONOSUPPORT;
+out_invalid_transport_udp:
+	return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp");
+out_no_address:
+	return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+out_mountproto_mismatch:
+	return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option");
+out_proto_mismatch:
+	return nfs_invalf(fc, "NFS: Server address does not match proto= option");
+out_minorversion_mismatch:
+	return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u",
+			  ctx->version, ctx->minorversion);
+out_migration_misuse:
+	return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version");
+out_version_unavailable:
+	nfs_errorf(fc, "NFS: Version unavailable");
+	return ret;
+}
+
+/*
+ * Create an NFS superblock by the appropriate method.
+ */
+static int nfs_get_tree(struct fs_context *fc)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	int err = nfs_fs_context_validate(fc);
+
+	if (err)
+		return err;
+	if (!ctx->internal)
+		return ctx->nfs_mod->rpc_ops->try_get_tree(fc);
+	else
+		return nfs_get_tree_common(fc);
+}
+
+/*
+ * Handle duplication of a configuration.  The caller copied *src into *sc, but
+ * it can't deal with resource pointers in the filesystem context, so we have
+ * to do that.  We need to clear pointers, copy data or get extra refs as
+ * appropriate.
+ */
+static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+	struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx;
+
+	ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->mntfh = nfs_alloc_fhandle();
+	if (!ctx->mntfh) {
+		kfree(ctx);
+		return -ENOMEM;
+	}
+	nfs_copy_fh(ctx->mntfh, src->mntfh);
+
+	__module_get(ctx->nfs_mod->owner);
+	ctx->client_address		= NULL;
+	ctx->mount_server.hostname	= NULL;
+	ctx->nfs_server.export_path	= NULL;
+	ctx->nfs_server.hostname	= NULL;
+	ctx->fscache_uniq		= NULL;
+	ctx->clone_data.fattr		= NULL;
+	fc->fs_private = ctx;
+	return 0;
+}
+
+static void nfs_fs_context_free(struct fs_context *fc)
+{
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+	if (ctx) {
+		if (ctx->server)
+			nfs_free_server(ctx->server);
+		if (ctx->nfs_mod)
+			put_nfs_version(ctx->nfs_mod);
+		kfree(ctx->client_address);
+		kfree(ctx->mount_server.hostname);
+		kfree(ctx->nfs_server.export_path);
+		kfree(ctx->nfs_server.hostname);
+		kfree(ctx->fscache_uniq);
+		nfs_free_fhandle(ctx->mntfh);
+		nfs_free_fattr(ctx->clone_data.fattr);
+		kfree(ctx);
+	}
+}
+
+static const struct fs_context_operations nfs_fs_context_ops = {
+	.free			= nfs_fs_context_free,
+	.dup			= nfs_fs_context_dup,
+	.parse_param		= nfs_fs_context_parse_param,
+	.parse_monolithic	= nfs_fs_context_parse_monolithic,
+	.get_tree		= nfs_get_tree,
+	.reconfigure		= nfs_reconfigure,
+};
+
+/*
+ * Prepare superblock configuration.  We use the namespaces attached to the
+ * context.  This may be the current process's namespaces, or it may be a
+ * container's namespaces.
+ */
+static int nfs_init_fs_context(struct fs_context *fc)
+{
+	struct nfs_fs_context *ctx;
+
+	ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL);
+	if (unlikely(!ctx))
+		return -ENOMEM;
+
+	ctx->mntfh = nfs_alloc_fhandle();
+	if (unlikely(!ctx->mntfh)) {
+		kfree(ctx);
+		return -ENOMEM;
+	}
+
+	ctx->protofamily	= AF_UNSPEC;
+	ctx->mountfamily	= AF_UNSPEC;
+	ctx->mount_server.port	= NFS_UNSPEC_PORT;
+
+	if (fc->root) {
+		/* reconfigure, start with the current config */
+		struct nfs_server *nfss = fc->root->d_sb->s_fs_info;
+		struct net *net = nfss->nfs_client->cl_net;
+
+		ctx->flags		= nfss->flags;
+		ctx->rsize		= nfss->rsize;
+		ctx->wsize		= nfss->wsize;
+		ctx->retrans		= nfss->client->cl_timeout->to_retries;
+		ctx->selected_flavor	= nfss->client->cl_auth->au_flavor;
+		ctx->acregmin		= nfss->acregmin / HZ;
+		ctx->acregmax		= nfss->acregmax / HZ;
+		ctx->acdirmin		= nfss->acdirmin / HZ;
+		ctx->acdirmax		= nfss->acdirmax / HZ;
+		ctx->timeo		= 10U * nfss->client->cl_timeout->to_initval / HZ;
+		ctx->nfs_server.port	= nfss->port;
+		ctx->nfs_server.addrlen	= nfss->nfs_client->cl_addrlen;
+		ctx->version		= nfss->nfs_client->rpc_ops->version;
+		ctx->minorversion	= nfss->nfs_client->cl_minorversion;
+
+		memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+			ctx->nfs_server.addrlen);
+
+		if (fc->net_ns != net) {
+			put_net(fc->net_ns);
+			fc->net_ns = get_net(net);
+		}
+
+		ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
+		__module_get(ctx->nfs_mod->owner);
+	} else {
+		/* defaults */
+		ctx->timeo		= NFS_UNSPEC_TIMEO;
+		ctx->retrans		= NFS_UNSPEC_RETRANS;
+		ctx->acregmin		= NFS_DEF_ACREGMIN;
+		ctx->acregmax		= NFS_DEF_ACREGMAX;
+		ctx->acdirmin		= NFS_DEF_ACDIRMIN;
+		ctx->acdirmax		= NFS_DEF_ACDIRMAX;
+		ctx->nfs_server.port	= NFS_UNSPEC_PORT;
+		ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+		ctx->selected_flavor	= RPC_AUTH_MAXFLAVOR;
+		ctx->minorversion	= 0;
+		ctx->need_mount		= true;
+	}
+	fc->fs_private = ctx;
+	fc->ops = &nfs_fs_context_ops;
+	return 0;
+}
+
+struct file_system_type nfs_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= "nfs",
+	.init_fs_context	= nfs_init_fs_context,
+	.parameters		= nfs_fs_parameters,
+	.kill_sb		= nfs_kill_super,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs");
+EXPORT_SYMBOL_GPL(nfs_fs_type);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+struct file_system_type nfs4_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= "nfs4",
+	.init_fs_context	= nfs_init_fs_context,
+	.parameters		= nfs_fs_parameters,
+	.kill_sb		= nfs_kill_super,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs4");
+MODULE_ALIAS("nfs4");
+EXPORT_SYMBOL_GPL(nfs4_fs_type);
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7def925d3af5..52270bfac120 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -128,7 +128,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
 		return;
 
 	key->nfs_client = nfss->nfs_client;
-	key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
+	key->key.super.s_flags = sb->s_flags & NFS_SB_MASK;
 	key->key.nfs_server.flags = nfss->flags;
 	key->key.nfs_server.rsize = nfss->rsize;
 	key->key.nfs_server.wsize = nfss->wsize;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 878c4c5982d9..b012c2668a1f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -64,66 +64,71 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 /*
  * get an NFS2/NFS3 root dentry from the root filehandle
  */
-struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
-			    const char *devname)
+int nfs_get_root(struct super_block *s, struct fs_context *fc)
 {
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct nfs_server *server = NFS_SB(s);
 	struct nfs_fsinfo fsinfo;
-	struct dentry *ret;
+	struct dentry *root;
 	struct inode *inode;
-	void *name = kstrdup(devname, GFP_KERNEL);
-	int error;
+	char *name;
+	int error = -ENOMEM;
 
+	name = kstrdup(fc->source, GFP_KERNEL);
 	if (!name)
-		return ERR_PTR(-ENOMEM);
+		goto out;
 
 	/* get the actual root for this mount */
 	fsinfo.fattr = nfs_alloc_fattr();
-	if (fsinfo.fattr == NULL) {
-		kfree(name);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (fsinfo.fattr == NULL)
+		goto out_name;
 
-	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo);
 	if (error < 0) {
 		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		ret = ERR_PTR(error);
-		goto out;
+		nfs_errorf(fc, "NFS: Couldn't getattr on root");
+		goto out_fattr;
 	}
 
-	inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL);
+	inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
 	if (IS_ERR(inode)) {
 		dprintk("nfs_get_root: get root inode failed\n");
-		ret = ERR_CAST(inode);
-		goto out;
+		error = PTR_ERR(inode);
+		nfs_errorf(fc, "NFS: Couldn't get root inode");
+		goto out_fattr;
 	}
 
-	error = nfs_superblock_set_dummy_root(sb, inode);
-	if (error != 0) {
-		ret = ERR_PTR(error);
-		goto out;
-	}
+	error = nfs_superblock_set_dummy_root(s, inode);
+	if (error != 0)
+		goto out_fattr;
 
 	/* root dentries normally start off anonymous and get spliced in later
 	 * if the dentry tree reaches them; however if the dentry already
 	 * exists, we'll pick it up at this point and use it as the root
 	 */
-	ret = d_obtain_root(inode);
-	if (IS_ERR(ret)) {
+	root = d_obtain_root(inode);
+	if (IS_ERR(root)) {
 		dprintk("nfs_get_root: get root dentry failed\n");
-		goto out;
+		error = PTR_ERR(root);
+		nfs_errorf(fc, "NFS: Couldn't get root dentry");
+		goto out_fattr;
 	}
 
-	security_d_instantiate(ret, inode);
-	spin_lock(&ret->d_lock);
-	if (IS_ROOT(ret) && !ret->d_fsdata &&
-	    !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
-		ret->d_fsdata = name;
+	security_d_instantiate(root, inode);
+	spin_lock(&root->d_lock);
+	if (IS_ROOT(root) && !root->d_fsdata &&
+	    !(root->d_flags & DCACHE_NFSFS_RENAMED)) {
+		root->d_fsdata = name;
 		name = NULL;
 	}
-	spin_unlock(&ret->d_lock);
-out:
-	kfree(name);
+	spin_unlock(&root->d_lock);
+	fc->root = root;
+	error = 0;
+
+out_fattr:
 	nfs_free_fattr(fsinfo.fattr);
-	return ret;
+out_name:
+	kfree(name);
+out:
+	return error;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b0b4b9f303fd..1309e6f47f3d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1061,7 +1061,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
-		if (cred != NULL && pos->cred != cred)
+		if (cred != NULL && cred_fscmp(pos->cred, cred) != 0)
 			continue;
 		if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
 			continue;
@@ -1156,7 +1156,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (unsigned long long)NFS_FILEID(inode), status);
-		if (status == -ESTALE) {
+		switch (status) {
+		case -ETIMEDOUT:
+			/* A soft timeout occurred. Use cached information? */
+			if (server->flags & NFS_MOUNT_SOFTREVAL)
+				status = 0;
+			break;
+		case -ESTALE:
 			nfs_zap_caches(inode);
 			if (!S_ISDIR(inode->i_mode))
 				set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 24a65da58aa9..f80c47d5ff27 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,17 +4,19 @@
  */
 
 #include "nfs4_fs.h"
-#include <linux/mount.h>
+#include <linux/fs_context.h>
 #include <linux/security.h>
 #include <linux/crc32.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/nfs_page.h>
 #include <linux/wait_bit.h>
 
-#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
 
 extern const struct export_operations nfs_export_ops;
 
 struct nfs_string;
+struct nfs_pageio_descriptor;
 
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
@@ -31,17 +33,14 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
 	return 1;
 }
 
-struct nfs_clone_mount {
-	const struct super_block *sb;
-	const struct dentry *dentry;
-	struct nfs_fh *fh;
-	struct nfs_fattr *fattr;
-	char *hostname;
-	char *mnt_path;
-	struct sockaddr *addr;
-	size_t addrlen;
-	rpc_authflavor_t authflavor;
-};
+static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
+{
+	if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL))
+		return false;
+	if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size)
+		return false;
+	return true;
+}
 
 /*
  * Note: RFC 1813 doesn't limit the number of auth flavors that
@@ -82,12 +81,16 @@ struct nfs_client_initdata {
 /*
  * In-kernel mount arguments
  */
-struct nfs_parsed_mount_data {
-	int			flags;
+struct nfs_fs_context {
+	bool			internal;
+	bool			skip_reconfig_option_check;
+	bool			need_mount;
+	bool			sloppy;
+	unsigned int		flags;		/* NFS{,4}_MOUNT_* flags */
 	unsigned int		rsize, wsize;
 	unsigned int		timeo, retrans;
-	unsigned int		acregmin, acregmax,
-				acdirmin, acdirmax;
+	unsigned int		acregmin, acregmax;
+	unsigned int		acdirmin, acdirmax;
 	unsigned int		namlen;
 	unsigned int		options;
 	unsigned int		bsize;
@@ -97,10 +100,14 @@ struct nfs_parsed_mount_data {
 	unsigned int		version;
 	unsigned int		minorversion;
 	char			*fscache_uniq;
-	bool			need_mount;
+	unsigned short		protofamily;
+	unsigned short		mountfamily;
 
 	struct {
-		struct sockaddr_storage	address;
+		union {
+			struct sockaddr	address;
+			struct sockaddr_storage	_address;
+		};
 		size_t			addrlen;
 		char			*hostname;
 		u32			version;
@@ -109,19 +116,41 @@ struct nfs_parsed_mount_data {
 	} mount_server;
 
 	struct {
-		struct sockaddr_storage	address;
+		union {
+			struct sockaddr	address;
+			struct sockaddr_storage	_address;
+		};
 		size_t			addrlen;
 		char			*hostname;
 		char			*export_path;
 		int			port;
 		unsigned short		protocol;
 		unsigned short		nconnect;
+		unsigned short		export_path_len;
 	} nfs_server;
 
-	void			*lsm_opts;
-	struct net		*net;
+	struct nfs_fh		*mntfh;
+	struct nfs_server	*server;
+	struct nfs_subversion	*nfs_mod;
+
+	/* Information for a cloned mount. */
+	struct nfs_clone_mount {
+		struct super_block	*sb;
+		struct dentry		*dentry;
+		struct nfs_fattr	*fattr;
+		unsigned int		inherited_bsize;
+	} clone_data;
 };
 
+#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__)
+#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
+#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__)
+
+static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
+{
+	return fc->fs_private;
+}
+
 /* mount_clnt.c */
 struct nfs_mount_request {
 	struct sockaddr		*sap;
@@ -137,14 +166,6 @@ struct nfs_mount_request {
 	struct net		*net;
 };
 
-struct nfs_mount_info {
-	void (*fill_super)(struct super_block *, struct nfs_mount_info *);
-	int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
-	struct nfs_parsed_mount_data *parsed;
-	struct nfs_clone_mount *cloned;
-	struct nfs_fh *mntfh;
-};
-
 extern int nfs_mount(struct nfs_mount_request *info);
 extern void nfs_umount(const struct nfs_mount_request *info);
 
@@ -170,13 +191,9 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
 extern struct nfs_client *
 nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
 				struct nfs4_sessionid *, u32);
-extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
-					struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_server(
-					struct nfs_mount_info *,
-					struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
-						      struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
 extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
 					struct sockaddr *sap, size_t salen,
 					struct net *net);
@@ -227,7 +244,9 @@ static inline void nfs_fs_proc_exit(void)
 extern const struct svc_version nfs4_callback_version1;
 extern const struct svc_version nfs4_callback_version4;
 
-struct nfs_pageio_descriptor;
+/* fs_context.c */
+extern struct file_system_type nfs_fs_type;
+
 /* pagelist.c */
 extern int __init nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
@@ -387,23 +406,10 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
 
 /* super.c */
 extern const struct super_operations nfs_sops;
-extern struct file_system_type nfs_fs_type;
-extern struct file_system_type nfs_xdev_fs_type;
-#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct file_system_type nfs4_referral_fs_type;
-#endif
 bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
-struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
-			struct nfs_subversion *);
-int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
-				   struct nfs_mount_info *, struct nfs_subversion *);
-struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *);
-struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
-		const char *, struct nfs_mount_info *);
+int nfs_try_get_tree(struct fs_context *);
+int nfs_get_tree_common(struct fs_context *);
 void nfs_kill_super(struct super_block *);
-void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
 
 extern struct rpc_stat nfs_rpcstat;
 
@@ -430,18 +436,12 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
 extern char *nfs_path(char **p, struct dentry *dentry,
 		      char *buffer, ssize_t buflen, unsigned flags);
 extern struct vfsmount *nfs_d_automount(struct path *path);
-struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
-			      struct nfs_fh *, struct nfs_fattr *);
-struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
-				 struct nfs_fattr *, rpc_authflavor_t);
+int nfs_submount(struct fs_context *, struct nfs_server *);
+int nfs_do_submount(struct fs_context *);
 
 /* getroot.c */
-extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
-				   const char *);
+extern int nfs_get_root(struct super_block *s, struct fs_context *fc);
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
-				    const char *);
-
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
 #endif
 
@@ -460,7 +460,7 @@ int  nfs_show_options(struct seq_file *, struct dentry *);
 int  nfs_show_devname(struct seq_file *, struct dentry *);
 int  nfs_show_path(struct seq_file *, struct dentry *);
 int  nfs_show_stats(struct seq_file *, struct dentry *);
-int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
+int  nfs_reconfigure(struct fs_context *);
 
 /* write.c */
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -706,9 +706,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
 }
 
 /*
- * Convert a struct timespec into a 64-bit change attribute
+ * Convert a struct timespec64 into a 64-bit change attribute
  *
- * This does approximately the same thing as timespec_to_ns(),
+ * This does approximately the same thing as timespec64_to_ns(),
  * but for calculation efficiency, we multiply the seconds by
  * 1024*1024*1024.
  */
@@ -777,3 +777,16 @@ static inline bool nfs_error_is_fatal_on_server(int err)
 	}
 	return nfs_error_is_fatal(err);
 }
+
+/*
+ * Select between a default port value and a user-specified port value.
+ * If a zero value is set, then autobind will be used.
+ */
+static inline void nfs_set_port(struct sockaddr *sap, int *port,
+				const unsigned short default_port)
+{
+	if (*port == NFS_UNSPEC_PORT)
+		*port = default_port;
+
+	rpc_set_port(sap, *port);
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index cb7c10e9721e..35c8cb2d7637 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -29,9 +29,7 @@
  */
 #define encode_dirpath_sz	(1 + XDR_QUADLEN(MNTPATHLEN))
 #define MNT_status_sz		(1)
-#define MNT_fhs_status_sz	(1)
 #define MNT_fhandle_sz		XDR_QUADLEN(NFS2_FHSIZE)
-#define MNT_fhandle3_sz		(1 + XDR_QUADLEN(NFS3_FHSIZE))
 #define MNT_authflav3_sz	(1 + NFS_MAX_SECFLAVORS)
 
 /*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5e0e9d29f5c5..ad6077404947 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -19,6 +19,7 @@
 #include <linux/vfs.h>
 #include <linux/sunrpc/gss_api.h>
 #include "internal.h"
+#include "nfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -139,34 +140,65 @@ EXPORT_SYMBOL_GPL(nfs_path);
  */
 struct vfsmount *nfs_d_automount(struct path *path)
 {
-	struct vfsmount *mnt;
+	struct nfs_fs_context *ctx;
+	struct fs_context *fc;
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
-	struct nfs_fh *fh = NULL;
-	struct nfs_fattr *fattr = NULL;
+	struct nfs_client *client = server->nfs_client;
+	int ret;
 
 	if (IS_ROOT(path->dentry))
 		return ERR_PTR(-ESTALE);
 
-	mnt = ERR_PTR(-ENOMEM);
-	fh = nfs_alloc_fhandle();
-	fattr = nfs_alloc_fattr();
-	if (fh == NULL || fattr == NULL)
-		goto out;
+	/* Open a new filesystem context, transferring parameters from the
+	 * parent superblock, including the network namespace.
+	 */
+	fc = fs_context_for_submount(&nfs_fs_type, path->dentry);
+	if (IS_ERR(fc))
+		return ERR_CAST(fc);
 
-	mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
+	ctx = nfs_fc2context(fc);
+	ctx->clone_data.dentry	= path->dentry;
+	ctx->clone_data.sb	= path->dentry->d_sb;
+	ctx->clone_data.fattr	= nfs_alloc_fattr();
+	if (!ctx->clone_data.fattr)
+		goto out_fc;
+
+	if (fc->net_ns != client->cl_net) {
+		put_net(fc->net_ns);
+		fc->net_ns = get_net(client->cl_net);
+	}
+
+	/* for submounts we want the same server; referrals will reassign */
+	memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+	ctx->nfs_server.addrlen	= client->cl_addrlen;
+	ctx->nfs_server.port	= server->port;
+
+	ctx->version		= client->rpc_ops->version;
+	ctx->minorversion	= client->cl_minorversion;
+	ctx->nfs_mod		= client->cl_nfs_mod;
+	__module_get(ctx->nfs_mod->owner);
+
+	ret = client->rpc_ops->submount(fc, server);
+	if (ret < 0) {
+		mnt = ERR_PTR(ret);
+		goto out_fc;
+	}
+
+	up_write(&fc->root->d_sb->s_umount);
+	mnt = vfs_create_mount(fc);
 	if (IS_ERR(mnt))
-		goto out;
+		goto out_fc;
 
 	if (nfs_mountpoint_expiry_timeout < 0)
-		goto out;
+		goto out_fc;
 
 	mntget(mnt); /* prevent immediate expiration */
 	mnt_set_expiry(mnt, &nfs_automount_list);
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 
-out:
-	nfs_free_fattr(fattr);
-	nfs_free_fhandle(fh);
+out_fc:
+	put_fs_context(fc);
 	return mnt;
 }
 
@@ -213,16 +245,6 @@ void nfs_release_automount_timer(void)
 		cancel_delayed_work(&nfs_automount_task);
 }
 
-/*
- * Clone a mountpoint of the appropriate type
- */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
-					   const char *devname,
-					   struct nfs_clone_mount *mountdata)
-{
-	return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
-}
-
 /**
  * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
  * @dentry: parent directory
@@ -231,46 +253,62 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
  * @authflavor: security flavor to use when performing the mount
  *
  */
-struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
-				 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
+int nfs_do_submount(struct fs_context *fc)
 {
-	struct nfs_clone_mount mountdata = {
-		.sb = dentry->d_sb,
-		.dentry = dentry,
-		.fh = fh,
-		.fattr = fattr,
-		.authflavor = authflavor,
-	};
-	struct vfsmount *mnt;
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *devname;
-
-	if (page == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	devname = nfs_devname(dentry, page, PAGE_SIZE);
-	if (IS_ERR(devname))
-		mnt = ERR_CAST(devname);
-	else
-		mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
-
-	free_page((unsigned long)page);
-	return mnt;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct dentry *dentry = ctx->clone_data.dentry;
+	struct nfs_server *server;
+	char *buffer, *p;
+	int ret;
+
+	/* create a new volume representation */
+	server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb),
+						     ctx->mntfh,
+						     ctx->clone_data.fattr,
+						     ctx->selected_flavor);
+
+	if (IS_ERR(server))
+		return PTR_ERR(server);
+
+	ctx->server = server;
+
+	buffer = kmalloc(4096, GFP_USER);
+	if (!buffer)
+		return -ENOMEM;
+
+	ctx->internal		= true;
+	ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits;
+
+	p = nfs_devname(dentry, buffer, 4096);
+	if (IS_ERR(p)) {
+		nfs_errorf(fc, "NFS: Couldn't determine submount pathname");
+		ret = PTR_ERR(p);
+	} else {
+		ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p);
+		if (!ret)
+			ret = vfs_get_tree(fc);
+	}
+	kfree(buffer);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nfs_do_submount);
 
-struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
-			      struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs_submount(struct fs_context *fc, struct nfs_server *server)
 {
-	int err;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct dentry *dentry = ctx->clone_data.dentry;
 	struct dentry *parent = dget_parent(dentry);
+	int err;
 
 	/* Look it up again to get its attributes */
-	err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL);
+	err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+						  ctx->mntfh, ctx->clone_data.fattr,
+						  NULL);
 	dput(parent);
 	if (err != 0)
-		return ERR_PTR(err);
+		return err;
 
-	return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+	ctx->selected_flavor = server->client->cl_auth->au_flavor;
+	return nfs_do_submount(fc);
 }
 EXPORT_SYMBOL_GPL(nfs_submount);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d94c7abdf25a..f6676af37d5d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -360,17 +360,17 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
 	else
 		*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
 
-	if (attr->ia_valid & ATTR_ATIME_SET) {
+	if (attr->ia_valid & ATTR_ATIME_SET)
 		p = xdr_encode_time(p, &attr->ia_atime);
-	} else if (attr->ia_valid & ATTR_ATIME) {
+	else if (attr->ia_valid & ATTR_ATIME)
 		p = xdr_encode_current_server_time(p, &attr->ia_atime);
-	} else
+	else
 		p = xdr_time_not_set(p);
-	if (attr->ia_valid & ATTR_MTIME_SET) {
+	if (attr->ia_valid & ATTR_MTIME_SET)
 		xdr_encode_time(p, &attr->ia_mtime);
-	} else if (attr->ia_valid & ATTR_MTIME) {
+	else if (attr->ia_valid & ATTR_MTIME)
 		xdr_encode_current_server_time(p, &attr->ia_mtime);
-	} else
+	else
 		xdr_time_not_set(p);
 }
 
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index f82e11c4cb56..1b950b66b3bb 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 #endif /* CONFIG_NFS_V3_ACL */
 
 /* nfs3client.c */
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
+struct nfs_server *nfs3_create_server(struct fs_context *);
 struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
 				     struct nfs_fattr *, rpc_authflavor_t);
 
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 223904bc40a7..5601e47360c2 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -46,10 +46,10 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server)
 }
 #endif
 
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,
-				      struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs3_create_server(struct fs_context *fc)
 {
-	struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);
+	struct nfs_server *server = nfs_create_server(fc);
+
 	/* Create a client RPC handle for the NFS v3 ACL management interface */
 	if (!IS_ERR(server))
 		nfs_init_server_aclclient(server);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 9eb2f1a503ab..a46d1d5d16d8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -110,10 +110,15 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp	= fattr,
 	};
 	int	status;
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+		task_flags |= RPC_TASK_TIMEOUT;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = rpc_call_sync(server->client, &msg, task_flags);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -140,23 +145,23 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0) {
+		nfs_setattr_update_inode(inode, sattr, fattr);
 		if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
 			nfs_zap_acl_cache(inode);
-		nfs_setattr_update_inode(inode, sattr, fattr);
 	}
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
 
 static int
-nfs3_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr,
 		 struct nfs4_label *label)
 {
 	struct nfs3_diropargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len
 	};
 	struct nfs3_diropres	res = {
 		.fh		= fhandle,
@@ -168,20 +173,25 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name,
 		.rpc_resp	= &res,
 	};
 	int			status;
+	unsigned short task_flags = 0;
 
-	dprintk("NFS call  lookup %s\n", name->name);
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (nfs_lookup_is_soft_revalidate(dentry))
+		task_flags |= RPC_TASK_TIMEOUT;
+
+	dprintk("NFS call  lookup %pd2\n", dentry);
 	res.dir_attr = nfs_alloc_fattr();
 	if (res.dir_attr == NULL)
 		return -ENOMEM;
 
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
 	nfs_refresh_inode(dir, res.dir_attr);
 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
 		msg.rpc_argp = fhandle;
 		msg.rpc_resp = fattr;
-		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
 	}
 	nfs_free_fattr(res.dir_attr);
 	dprintk("NFS reply lookup: %d\n", status);
@@ -990,7 +1000,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.nlmclnt_ops	= &nlmclnt_fl_close_lock_ops,
 	.getroot	= nfs3_proc_get_root,
 	.submount	= nfs_submount,
-	.try_mount	= nfs_try_mount,
+	.try_get_tree	= nfs_try_get_tree,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
 	.lookup		= nfs3_proc_lookup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 927eb680f161..69971f6c840d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   void *data)
 {
 	struct nfs_commitres *result = data;
+	struct nfs_writeverf *verf = result->verf;
 	enum nfs_stat status;
 	int error;
 
@@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 	result->op_status = status;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_writeverf3(xdr, &result->verf->verifier);
+	error = decode_writeverf3(xdr, &verf->verifier);
+	if (!error)
+		verf->committed = NFS_FILE_SYNC;
 out:
 	return error;
 out_status:
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1fe83e0f663e..e2ae54b35dfe 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
 
 	status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
 			lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	res.falloc_fattr = nfs_alloc_fattr();
 	if (!res.falloc_fattr)
@@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	} else {
 		status = nfs4_set_rw_stateid(&args->src_stateid,
 				src_lock->open_context, src_lock, FMODE_READ);
-		if (status)
+		if (status) {
+			if (status == -EAGAIN)
+				status = -NFS4ERR_BAD_STATEID;
 			return status;
+		}
 	}
 	status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
 			pos_src, pos_src + (loff_t)count - 1);
@@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 
 	status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
 				     dst_lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs_sync_inode(dst_inode);
 	if (status)
@@ -334,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 		status = handle_async_copy(res, dst_server, src_server, src,
 				dst, &args->src_stateid, restart);
 		if (status)
-			return status;
+			goto out;
 	}
 
 	if ((!res->synchronous || !args->sync) &&
 			res->write_res.verifier.committed != NFS_FILE_SYNC) {
 		status = process_copy_commit(dst, pos_dst, res);
 		if (status)
-			return status;
+			goto out;
 	}
 
 	truncate_pagecache_range(dst_inode, pos_dst,
@@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
 	status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
 				     FMODE_READ);
 	nfs_put_lock_context(l_ctx);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs4_call_sync(src_server->client, src_server, &msg,
 				&args->cna_seq_args, &res->cnr_seq_res, 0);
@@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
 
 	status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
 			lock, FMODE_READ);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs_filemap_write_and_wait_range(inode->i_mapping,
 			offset, LLONG_MAX);
@@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
 
 	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
 			src_lock, FMODE_READ);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
-
+	}
 	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
 			dst_lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	res.dst_fattr = nfs_alloc_fattr();
 	if (!res.dst_fattr)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a7a73b1d1fec..8be1ba7c62bb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -268,14 +268,13 @@ extern const struct dentry_operations nfs4_dentry_operations;
 int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
 		    unsigned, umode_t);
 
-/* super.c */
+/* fs_context.c */
 extern struct file_system_type nfs4_fs_type;
 
 /* nfs4namespace.c */
 struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
 					 const struct qstr *);
-struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
-			       struct nfs_fh *, struct nfs_fattr *);
+int nfs4_submount(struct fs_context *, struct nfs_server *);
 int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
 
@@ -303,8 +302,10 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc
 extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
 		struct page *page, const struct cred *);
 extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
-extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *,
-			    struct nfs_fh *, struct nfs_fattr *);
+extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
+						    struct dentry *,
+						    struct nfs_fh *,
+						    struct nfs_fattr *);
 extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
 extern const struct xattr_handler *nfs4_xattr_handlers[];
 extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
@@ -446,9 +447,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(struct work_struct *);
-extern void nfs4_set_lease_period(struct nfs_client *clp,
-		unsigned long lease,
-		unsigned long lastrenewed);
+extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease);
 
 
 /* nfs4state.c */
@@ -526,7 +525,6 @@ extern const nfs4_stateid invalid_stateid;
 /* nfs4super.c */
 struct nfs_mount_info;
 extern struct nfs_subversion nfs_v4;
-struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *);
 extern bool nfs4_disable_idmapping;
 extern unsigned short max_session_slots;
 extern unsigned short max_session_cb_slots;
@@ -536,6 +534,9 @@ extern bool recover_lost_locks;
 #define NFS4_CLIENT_ID_UNIQ_LEN		(64)
 extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
 
+extern int nfs4_try_get_tree(struct fs_context *);
+extern int nfs4_get_referral_tree(struct fs_context *);
+
 /* nfs4sysctl.c */
 #ifdef CONFIG_SYSCTL
 int nfs4_register_sysctl(void);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 460d6251c405..0cd767e5c977 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1055,66 +1055,64 @@ out:
 /*
  * Create a version 4 volume record
  */
-static int nfs4_init_server(struct nfs_server *server,
-		struct nfs_parsed_mount_data *data)
+static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct rpc_timeout timeparms;
 	int error;
 
-	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
-			data->timeo, data->retrans);
+	nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+				ctx->timeo, ctx->retrans);
 
 	/* Initialise the client representation from the mount data */
-	server->flags = data->flags;
-	server->options = data->options;
-	server->auth_info = data->auth_info;
+	server->flags = ctx->flags;
+	server->options = ctx->options;
+	server->auth_info = ctx->auth_info;
 
 	/* Use the first specified auth flavor. If this flavor isn't
 	 * allowed by the server, use the SECINFO path to try the
 	 * other specified flavors */
-	if (data->auth_info.flavor_len >= 1)
-		data->selected_flavor = data->auth_info.flavors[0];
+	if (ctx->auth_info.flavor_len >= 1)
+		ctx->selected_flavor = ctx->auth_info.flavors[0];
 	else
-		data->selected_flavor = RPC_AUTH_UNIX;
+		ctx->selected_flavor = RPC_AUTH_UNIX;
 
 	/* Get a client record */
 	error = nfs4_set_client(server,
-			data->nfs_server.hostname,
-			(const struct sockaddr *)&data->nfs_server.address,
-			data->nfs_server.addrlen,
-			data->client_address,
-			data->nfs_server.protocol,
-			&timeparms,
-			data->minorversion,
-			data->nfs_server.nconnect,
-			data->net);
+				ctx->nfs_server.hostname,
+				&ctx->nfs_server.address,
+				ctx->nfs_server.addrlen,
+				ctx->client_address,
+				ctx->nfs_server.protocol,
+				&timeparms,
+				ctx->minorversion,
+				ctx->nfs_server.nconnect,
+				fc->net_ns);
 	if (error < 0)
 		return error;
 
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
+	if (ctx->rsize)
+		server->rsize = nfs_block_size(ctx->rsize, NULL);
+	if (ctx->wsize)
+		server->wsize = nfs_block_size(ctx->wsize, NULL);
 
-	server->acregmin = data->acregmin * HZ;
-	server->acregmax = data->acregmax * HZ;
-	server->acdirmin = data->acdirmin * HZ;
-	server->acdirmax = data->acdirmax * HZ;
-	server->port     = data->nfs_server.port;
+	server->acregmin = ctx->acregmin * HZ;
+	server->acregmax = ctx->acregmax * HZ;
+	server->acdirmin = ctx->acdirmin * HZ;
+	server->acdirmax = ctx->acdirmax * HZ;
+	server->port     = ctx->nfs_server.port;
 
 	return nfs_init_server_rpcclient(server, &timeparms,
-					 data->selected_flavor);
+					 ctx->selected_flavor);
 }
 
 /*
  * Create a version 4 volume record
  * - keyed on server and FSID
  */
-/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
-				      struct nfs_fh *mntfh)*/
-struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
-				      struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs4_create_server(struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct nfs_server *server;
 	bool auth_probe;
 	int error;
@@ -1125,14 +1123,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 
 	server->cred = get_cred(current_cred());
 
-	auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
+	auth_probe = ctx->auth_info.flavor_len < 1;
 
 	/* set up the general RPC client */
-	error = nfs4_init_server(server, mount_info->parsed);
+	error = nfs4_init_server(server, fc);
 	if (error < 0)
 		goto error;
 
-	error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe);
+	error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
 	if (error < 0)
 		goto error;
 
@@ -1146,9 +1144,9 @@ error:
 /*
  * Create an NFS4 referral server record
  */
-struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
-					       struct nfs_fh *mntfh)
+struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct nfs_client *parent_client;
 	struct nfs_server *server, *parent_server;
 	bool auth_probe;
@@ -1158,7 +1156,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	parent_server = NFS_SB(data->sb);
+	parent_server = NFS_SB(ctx->clone_data.sb);
 	parent_client = parent_server->nfs_client;
 
 	server->cred = get_cred(parent_server->cred);
@@ -1168,10 +1166,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 
 	/* Get a client representation */
 #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
-	rpc_set_port(data->addr, NFS_RDMA_PORT);
-	error = nfs4_set_client(server, data->hostname,
-				data->addr,
-				data->addrlen,
+	rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
+	error = nfs4_set_client(server,
+				ctx->nfs_server.hostname,
+				&ctx->nfs_server.address,
+				ctx->nfs_server.addrlen,
 				parent_client->cl_ipaddr,
 				XPRT_TRANSPORT_RDMA,
 				parent_server->client->cl_timeout,
@@ -1182,10 +1181,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 		goto init_server;
 #endif	/* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
 
-	rpc_set_port(data->addr, NFS_PORT);
-	error = nfs4_set_client(server, data->hostname,
-				data->addr,
-				data->addrlen,
+	rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
+	error = nfs4_set_client(server,
+				ctx->nfs_server.hostname,
+				&ctx->nfs_server.address,
+				ctx->nfs_server.addrlen,
 				parent_client->cl_ipaddr,
 				XPRT_TRANSPORT_TCP,
 				parent_server->client->cl_timeout,
@@ -1198,13 +1198,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
 init_server:
 #endif
-	error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
+	error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout,
+					  ctx->selected_flavor);
 	if (error < 0)
 		goto error;
 
 	auth_probe = parent_server->auth_info.flavor_len < 1;
 
-	error = nfs4_server_common_setup(server, mntfh, auth_probe);
+	error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 620de905cba9..be4eb720d5b6 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/falloc.h>
+#include <linux/mount.h>
 #include <linux/nfs_fs.h>
 #include "delegation.h"
 #include "internal.h"
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2e460c33ae48..84026e7b8a5f 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -8,6 +8,7 @@
  * NFSv4 namespace
  */
 
+#include <linux/module.h>
 #include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
@@ -21,37 +22,64 @@
 #include <linux/inet.h>
 #include "internal.h"
 #include "nfs4_fs.h"
+#include "nfs.h"
 #include "dns_resolve.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
 /*
+ * Work out the length that an NFSv4 path would render to as a standard posix
+ * path, with a leading slash but no terminating slash.
+ */
+static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname)
+{
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; i < pathname->ncomponents; i++) {
+		const struct nfs4_string *component = &pathname->components[i];
+
+		if (component->len > NAME_MAX)
+			goto too_long;
+		len += 1 + component->len; /* Adding "/foo" */
+		if (len > PATH_MAX)
+			goto too_long;
+	}
+	return len;
+
+too_long:
+	return -ENAMETOOLONG;
+}
+
+/*
  * Convert the NFSv4 pathname components into a standard posix path.
- *
- * Note that the resulting string will be placed at the end of the buffer
  */
-static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
-					 char *buffer, ssize_t buflen)
+static char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
+				  unsigned short *_len)
 {
-	char *end = buffer + buflen;
-	int n;
+	ssize_t len;
+	char *buf, *p;
+	int i;
+
+	len = nfs4_pathname_len(pathname);
+	if (len < 0)
+		return ERR_PTR(len);
+	*_len = len;
+
+	p = buf = kmalloc(len + 1, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < pathname->ncomponents; i++) {
+		const struct nfs4_string *component = &pathname->components[i];
 
-	*--end = '\0';
-	buflen--;
-
-	n = pathname->ncomponents;
-	while (--n >= 0) {
-		const struct nfs4_string *component = &pathname->components[n];
-		buflen -= component->len + 1;
-		if (buflen < 0)
-			goto Elong;
-		end -= component->len;
-		memcpy(end, component->data, component->len);
-		*--end = '/';
+		*p++ = '/';
+		memcpy(p, component->data, component->len);
+		p += component->len;
 	}
-	return end;
-Elong:
-	return ERR_PTR(-ENAMETOOLONG);
+
+	*p = 0;
+	return buf;
 }
 
 /*
@@ -100,21 +128,36 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
  */
 static int nfs4_validate_fspath(struct dentry *dentry,
 				const struct nfs4_fs_locations *locations,
-				char *page, char *page2)
+				struct nfs_fs_context *ctx)
 {
-	const char *path, *fs_path;
+	const char *path;
+	char *fs_path;
+	unsigned short len;
+	char *buf;
+	int n;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
+	buf = kmalloc(4096, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	path = nfs4_path(dentry, buf, 4096);
+	if (IS_ERR(path)) {
+		kfree(buf);
 		return PTR_ERR(path);
+	}
 
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
+	fs_path = nfs4_pathname_string(&locations->fs_path, &len);
+	if (IS_ERR(fs_path)) {
+		kfree(buf);
 		return PTR_ERR(fs_path);
+	}
 
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+	n = strncmp(path, fs_path, len);
+	kfree(buf);
+	kfree(fs_path);
+	if (n != 0) {
 		dprintk("%s: path %s does not begin with fsroot %s\n",
-			__func__, path, fs_path);
+			__func__, path, ctx->nfs_server.export_path);
 		return -ENOENT;
 	}
 
@@ -236,55 +279,77 @@ out:
 	return new;
 }
 
-static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
-				     char *page, char *page2,
-				     const struct nfs4_fs_location *location)
+static int try_location(struct fs_context *fc,
+			const struct nfs4_fs_location *location)
 {
-	const size_t addr_bufsize = sizeof(struct sockaddr_storage);
-	struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
-	char *mnt_path;
-	unsigned int maxbuflen;
-	unsigned int s;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	unsigned int len, s;
+	char *export_path, *source, *p;
+	int ret = -ENOENT;
+
+	/* Allocate a buffer big enough to hold any of the hostnames plus a
+	 * terminating char and also a buffer big enough to hold the hostname
+	 * plus a colon plus the path.
+	 */
+	len = 0;
+	for (s = 0; s < location->nservers; s++) {
+		const struct nfs4_string *buf = &location->servers[s];
+		if (buf->len > len)
+			len = buf->len;
+	}
 
-	mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
-	if (IS_ERR(mnt_path))
-		return ERR_CAST(mnt_path);
-	mountdata->mnt_path = mnt_path;
-	maxbuflen = mnt_path - 1 - page2;
+	kfree(ctx->nfs_server.hostname);
+	ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL);
+	if (!ctx->nfs_server.hostname)
+		return -ENOMEM;
 
-	mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
-	if (mountdata->addr == NULL)
-		return ERR_PTR(-ENOMEM);
+	export_path = nfs4_pathname_string(&location->rootpath,
+					   &ctx->nfs_server.export_path_len);
+	if (IS_ERR(export_path))
+		return PTR_ERR(export_path);
+
+	ctx->nfs_server.export_path = export_path;
+
+	source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
+			 GFP_KERNEL);
+	if (!source)
+		return -ENOMEM;
 
+	kfree(fc->source);
+	fc->source = source;
 	for (s = 0; s < location->nservers; s++) {
 		const struct nfs4_string *buf = &location->servers[s];
 
-		if (buf->len <= 0 || buf->len >= maxbuflen)
-			continue;
-
 		if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
 			continue;
 
-		mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
-				mountdata->addr, addr_bufsize, net);
-		if (mountdata->addrlen == 0)
+		ctx->nfs_server.addrlen =
+			nfs_parse_server_name(buf->data, buf->len,
+					      &ctx->nfs_server.address,
+					      sizeof(ctx->nfs_server._address),
+					      fc->net_ns);
+		if (ctx->nfs_server.addrlen == 0)
 			continue;
 
-		memcpy(page2, buf->data, buf->len);
-		page2[buf->len] = '\0';
-		mountdata->hostname = page2;
+		rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
 
-		snprintf(page, PAGE_SIZE, "%s:%s",
-				mountdata->hostname,
-				mountdata->mnt_path);
+		memcpy(ctx->nfs_server.hostname, buf->data, buf->len);
+		ctx->nfs_server.hostname[buf->len] = '\0';
 
-		mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
-		if (!IS_ERR(mnt))
-			break;
+		p = source;
+		memcpy(p, buf->data, buf->len);
+		p += buf->len;
+		*p++ = ':';
+		memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len);
+		p += ctx->nfs_server.export_path_len;
+		*p = 0;
+
+		ret = nfs4_get_referral_tree(fc);
+		if (ret == 0)
+			return 0;
 	}
-	kfree(mountdata->addr);
-	return mnt;
+
+	return ret;
 }
 
 /**
@@ -293,38 +358,23 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
  * @locations: array of NFSv4 server location information
  *
  */
-static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
-					    const struct nfs4_fs_locations *locations)
+static int nfs_follow_referral(struct fs_context *fc,
+			       const struct nfs4_fs_locations *locations)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
-	struct nfs_clone_mount mountdata = {
-		.sb = dentry->d_sb,
-		.dentry = dentry,
-		.authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
-	};
-	char *page = NULL, *page2 = NULL;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	int loc, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
-		goto out;
-
-	dprintk("%s: referral at %pd2\n", __func__, dentry);
-
-	page = (char *) __get_free_page(GFP_USER);
-	if (!page)
-		goto out;
+		return -ENOENT;
 
-	page2 = (char *) __get_free_page(GFP_USER);
-	if (!page2)
-		goto out;
+	dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry);
 
 	/* Ensure fs path is a prefix of current dentry path */
-	error = nfs4_validate_fspath(dentry, locations, page, page2);
-	if (error < 0) {
-		mnt = ERR_PTR(error);
-		goto out;
-	}
+	error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx);
+	if (error < 0)
+		return error;
 
+	error = -ENOENT;
 	for (loc = 0; loc < locations->nlocations; loc++) {
 		const struct nfs4_fs_location *location = &locations->locations[loc];
 
@@ -332,15 +382,12 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
 		    location->rootpath.ncomponents == 0)
 			continue;
 
-		mnt = try_location(&mountdata, page, page2, location);
-		if (!IS_ERR(mnt))
-			break;
+		error = try_location(fc, location);
+		if (error == 0)
+			return 0;
 	}
 
-out:
-	free_page((unsigned long) page);
-	free_page((unsigned long) page2);
-	return mnt;
+	return error;
 }
 
 /*
@@ -348,71 +395,72 @@ out:
  * @dentry - dentry of referral
  *
  */
-static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
-	struct dentry *parent;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct dentry *dentry, *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
-	int err;
+	int err = -ENOMEM;
 
 	/* BUG_ON(IS_ROOT(dentry)); */
 	page = alloc_page(GFP_KERNEL);
-	if (page == NULL)
-		return mnt;
+	if (!page)
+		return -ENOMEM;
 
 	fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
-	if (fs_locations == NULL)
+	if (!fs_locations)
 		goto out_free;
 
 	/* Get locations */
-	mnt = ERR_PTR(-ENOENT);
-
+	dentry = ctx->clone_data.dentry;
 	parent = dget_parent(dentry);
 	dprintk("%s: getting locations for %pd2\n",
 		__func__, dentry);
 
 	err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
 	dput(parent);
-	if (err != 0 ||
-	    fs_locations->nlocations <= 0 ||
+	if (err != 0)
+		goto out_free_2;
+
+	err = -ENOENT;
+	if (fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
-		goto out_free;
+		goto out_free_2;
 
-	mnt = nfs_follow_referral(dentry, fs_locations);
+	err = nfs_follow_referral(fc, fs_locations);
+out_free_2:
+	kfree(fs_locations);
 out_free:
 	__free_page(page);
-	kfree(fs_locations);
-	return mnt;
+	return err;
 }
 
-struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
-			       struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs4_submount(struct fs_context *fc, struct nfs_server *server)
 {
-	rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct dentry *dentry = ctx->clone_data.dentry;
 	struct dentry *parent = dget_parent(dentry);
 	struct inode *dir = d_inode(parent);
-	const struct qstr *name = &dentry->d_name;
 	struct rpc_clnt *client;
-	struct vfsmount *mnt;
+	int ret;
 
 	/* Look it up again to get its attributes and sec flavor */
-	client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
+	client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh,
+					     ctx->clone_data.fattr);
 	dput(parent);
 	if (IS_ERR(client))
-		return ERR_CAST(client);
+		return PTR_ERR(client);
 
-	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
-		mnt = nfs_do_refmount(client, dentry);
-		goto out;
+	ctx->selected_flavor = client->cl_auth->au_flavor;
+	if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		ret = nfs_do_refmount(fc, client);
+	} else {
+		ret = nfs_do_submount(fc);
 	}
 
-	if (client->cl_auth->au_flavor != flavor)
-		flavor = client->cl_auth->au_flavor;
-	mnt = nfs_do_submount(dentry, fh, fattr, flavor);
-out:
 	rpc_shutdown_client(client);
-	return mnt;
+	return ret;
 }
 
 /*
@@ -453,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
 		rpc_set_port(sap, NFS_PORT);
 
 		error = -ENOMEM;
-		hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
+		hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
 		if (hostname == NULL)
 			break;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 76d37161409a..95d07a3dc5d1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1097,11 +1097,12 @@ static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup)
 	return ret;
 }
 
-static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
-				   struct nfs_server *server,
-				   struct rpc_message *msg,
-				   struct nfs4_sequence_args *args,
-				   struct nfs4_sequence_res *res)
+static int nfs4_do_call_sync(struct rpc_clnt *clnt,
+			     struct nfs_server *server,
+			     struct rpc_message *msg,
+			     struct nfs4_sequence_args *args,
+			     struct nfs4_sequence_res *res,
+			     unsigned short task_flags)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_call_sync_data data = {
@@ -1113,12 +1114,23 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 		.rpc_client = clnt,
 		.rpc_message = msg,
 		.callback_ops = clp->cl_mvops->call_sync_ops,
-		.callback_data = &data
+		.callback_data = &data,
+		.flags = task_flags,
 	};
 
 	return nfs4_call_sync_custom(&task_setup);
 }
 
+static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
+				   struct nfs_server *server,
+				   struct rpc_message *msg,
+				   struct nfs4_sequence_args *args,
+				   struct nfs4_sequence_res *res)
+{
+	return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
+}
+
+
 int nfs4_call_sync(struct rpc_clnt *clnt,
 		   struct nfs_server *server,
 		   struct rpc_message *msg,
@@ -3187,6 +3199,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
 			exception.retry = 1;
 			continue;
 		}
+		if (status == -NFS4ERR_EXPIRED) {
+			nfs4_schedule_lease_recovery(server->nfs_client);
+			exception.retry = 1;
+			continue;
+		}
 		if (status == -EAGAIN) {
 			/* We must have found a delegation */
 			exception.retry = 1;
@@ -3239,6 +3256,8 @@ static int _nfs4_do_setattr(struct inode *inode,
 		nfs_put_lock_context(l_ctx);
 		if (status == -EIO)
 			return -EBADF;
+		else if (status == -EAGAIN)
+			goto zero_stateid;
 	} else {
 zero_stateid:
 		nfs4_stateid_copy(&arg->stateid, &zero_stateid);
@@ -4064,11 +4083,18 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+		task_flags |= RPC_TASK_TIMEOUT;
 
 	nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
 
 	nfs_fattr_init(fattr);
-	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+	return nfs4_do_call_sync(server->client, server, &msg,
+			&args.seq_args, &res.seq_res, task_flags);
 }
 
 int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -4156,7 +4182,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
-		const struct qstr *name, struct nfs_fh *fhandle,
+		struct dentry *dentry, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr, struct nfs4_label *label)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
@@ -4164,7 +4190,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
 	struct nfs4_lookup_arg args = {
 		.bitmask = server->attr_bitmask,
 		.dir_fh = NFS_FH(dir),
-		.name = name,
+		.name = &dentry->d_name,
 	};
 	struct nfs4_lookup_res res = {
 		.server = server,
@@ -4177,13 +4203,20 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (nfs_lookup_is_soft_revalidate(dentry))
+		task_flags |= RPC_TASK_TIMEOUT;
 
 	args.bitmask = nfs4_bitmask(server, label);
 
 	nfs_fattr_init(fattr);
 
-	dprintk("NFS call  lookup %s\n", name->name);
-	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
+	dprintk("NFS call  lookup %pd2\n", dentry);
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+	status = nfs4_do_call_sync(clnt, server, &msg,
+			&args.seq_args, &res.seq_res, task_flags);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -4197,16 +4230,17 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
 }
 
 static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
-				   const struct qstr *name, struct nfs_fh *fhandle,
+				   struct dentry *dentry, struct nfs_fh *fhandle,
 				   struct nfs_fattr *fattr, struct nfs4_label *label)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
 	struct rpc_clnt *client = *clnt;
+	const struct qstr *name = &dentry->d_name;
 	int err;
 	do {
-		err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label);
+		err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label);
 		trace_nfs4_lookup(dir, name, err);
 		switch (err) {
 		case -NFS4ERR_BADNAME:
@@ -4241,14 +4275,14 @@ out:
 	return err;
 }
 
-static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
 			    struct nfs_fh *fhandle, struct nfs_fattr *fattr,
 			    struct nfs4_label *label)
 {
 	int status;
 	struct rpc_clnt *client = NFS_CLIENT(dir);
 
-	status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label);
+	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label);
 	if (client != NFS_CLIENT(dir)) {
 		rpc_shutdown_client(client);
 		nfs_fixup_secinfo_attributes(fattr);
@@ -4257,13 +4291,13 @@ static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
 }
 
 struct rpc_clnt *
-nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
+nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
 			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct rpc_clnt *client = NFS_CLIENT(dir);
 	int status;
 
-	status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
+	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL);
 	if (status < 0)
 		return ERR_PTR(status);
 	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -5019,16 +5053,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
-	unsigned long now = jiffies;
 	int err;
 
 	do {
 		err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
 		trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
 		if (err == 0) {
-			nfs4_set_lease_period(server->nfs_client,
-					fsinfo->lease_time * HZ,
-					now);
+			nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ);
 			break;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -5582,10 +5613,9 @@ out:
  */
 static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-	struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
+	struct page **pages;
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
-		.acl_pages = pages,
 		.acl_len = buflen,
 	};
 	struct nfs_getaclres res = {
@@ -5596,11 +5626,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+	unsigned int npages;
 	int ret = -ENOMEM, i;
+	struct nfs_server *server = NFS_SERVER(inode);
 
-	if (npages > ARRAY_SIZE(pages))
-		return -ERANGE;
+	if (buflen == 0)
+		buflen = server->rsize;
+
+	npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+	pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS);
+	if (!pages)
+		return -ENOMEM;
+
+	args.acl_pages = pages;
 
 	for (i = 0; i < npages; i++) {
 		pages[i] = alloc_page(GFP_KERNEL);
@@ -5646,6 +5684,7 @@ out_free:
 			__free_page(pages[i]);
 	if (res.acl_scratch)
 		__free_page(res.acl_scratch);
+	kfree(pages);
 	return ret;
 }
 
@@ -6084,6 +6123,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		.callback_data = &setclientid,
 		.flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN,
 	};
+	unsigned long now = jiffies;
 	int status;
 
 	/* nfs_client_id4 */
@@ -6116,6 +6156,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
 		put_rpccred(setclientid.sc_cred);
 	}
+
+	if (status == 0)
+		do_renew_lease(clp, now);
 out:
 	trace_nfs4_setclientid(clp, status);
 	dprintk("NFS reply setclientid: %d\n", status);
@@ -6859,7 +6902,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_
 	case -NFS4ERR_STALE_STATEID:
 		lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
 		nfs4_schedule_lease_recovery(server->nfs_client);
-	};
+	}
 }
 
 static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
@@ -8203,6 +8246,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
 	struct rpc_task *task;
 	struct nfs41_exchange_id_args *argp;
 	struct nfs41_exchange_id_res *resp;
+	unsigned long now = jiffies;
 	int status;
 
 	task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL);
@@ -8223,6 +8267,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
 	if (status != 0)
 		goto out;
 
+	do_renew_lease(clp, now);
+
 	clp->cl_clientid = resp->clientid;
 	clp->cl_exchange_flags = resp->flags;
 	clp->cl_seqid = resp->seqid;
@@ -8626,7 +8672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
 	case -EACCES:
 	case -EAGAIN:
 		goto out;
-	};
+	}
 
 	clp->cl_seqid++;
 	if (!status) {
@@ -10001,7 +10047,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.file_ops	= &nfs4_file_operations,
 	.getroot	= nfs4_proc_get_root,
 	.submount	= nfs4_submount,
-	.try_mount	= nfs4_try_mount,
+	.try_get_tree	= nfs4_try_get_tree,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.lookup		= nfs4_proc_lookup,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 6ea431b067dd..ff876dda7f06 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp)
  *
  * @clp: pointer to nfs_client
  * @lease: new value for lease period
- * @lastrenewed: time at which lease was last renewed
  */
 void nfs4_set_lease_period(struct nfs_client *clp,
-		unsigned long lease,
-		unsigned long lastrenewed)
+		unsigned long lease)
 {
 	spin_lock(&clp->cl_lock);
 	clp->cl_lease_time = lease;
-	clp->cl_last_renewal = lastrenewed;
 	spin_unlock(&clp->cl_lock);
 
 	/* Cap maximum reconnect timeout at 1/2 lease period */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 34552329233d..f7723d221945 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp)
 {
 	int status;
 	struct nfs_fsinfo fsinfo;
-	unsigned long now;
 
 	if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
 		nfs4_schedule_state_renewal(clp);
 		return 0;
 	}
 
-	now = jiffies;
 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
 	if (status == 0) {
-		nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
+		nfs4_set_lease_period(clp, fsinfo.lease_time * HZ);
 		nfs4_schedule_state_renewal(clp);
 	}
 
@@ -766,6 +764,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
 	list_del(&state->open_states);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&owner->so_lock);
+	nfs4_inode_return_delegation_on_close(inode);
 	iput(inode);
 	nfs4_free_open_state(state);
 	nfs4_put_state_owner(owner);
@@ -1135,7 +1134,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 		case -NFS4ERR_MOVED:
 			/* Non-seqid mutating errors */
 			return;
-	};
+	}
 	/*
 	 * Note: no locking needed as we are guaranteed to be first
 	 * on the sequence list
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 2c9cbade561a..1475f932d7da 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -4,6 +4,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/nfs_fs.h>
 #include "delegation.h"
@@ -18,36 +19,6 @@
 
 static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
 static void nfs4_evict_inode(struct inode *inode);
-static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data);
-
-static struct file_system_type nfs4_remote_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs4",
-	.mount		= nfs4_remote_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-static struct file_system_type nfs4_remote_referral_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs4",
-	.mount		= nfs4_remote_referral_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-struct file_system_type nfs4_referral_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs4",
-	.mount		= nfs4_referral_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
 
 static const struct super_operations nfs4_sops = {
 	.alloc_inode	= nfs_alloc_inode,
@@ -61,16 +32,15 @@ static const struct super_operations nfs4_sops = {
 	.show_devname	= nfs_show_devname,
 	.show_path	= nfs_show_path,
 	.show_stats	= nfs_show_stats,
-	.remount_fs	= nfs_remount,
 };
 
 struct nfs_subversion nfs_v4 = {
-	.owner = THIS_MODULE,
-	.nfs_fs   = &nfs4_fs_type,
-	.rpc_vers = &nfs_version4,
-	.rpc_ops  = &nfs_v4_clientops,
-	.sops     = &nfs4_sops,
-	.xattr    = nfs4_xattr_handlers,
+	.owner		= THIS_MODULE,
+	.nfs_fs		= &nfs4_fs_type,
+	.rpc_vers	= &nfs_version4,
+	.rpc_ops	= &nfs_v4_clientops,
+	.sops		= &nfs4_sops,
+	.xattr		= nfs4_xattr_handlers,
 };
 
 static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -101,53 +71,6 @@ static void nfs4_evict_inode(struct inode *inode)
 	nfs_clear_inode(inode);
 }
 
-/*
- * Get the superblock for the NFS4 root partition
- */
-static struct dentry *
-nfs4_remote_mount(struct file_system_type *fs_type, int flags,
-		  const char *dev_name, void *info)
-{
-	struct nfs_mount_info *mount_info = info;
-	struct nfs_server *server;
-	struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
-	mount_info->set_security = nfs_set_sb_security;
-
-	/* Get a volume representation */
-	server = nfs4_create_server(mount_info, &nfs_v4);
-	if (IS_ERR(server)) {
-		mntroot = ERR_CAST(server);
-		goto out;
-	}
-
-	mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
-
-out:
-	return mntroot;
-}
-
-static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
-		int flags, void *data, const char *hostname)
-{
-	struct vfsmount *root_mnt;
-	char *root_devname;
-	size_t len;
-
-	len = strlen(hostname) + 5;
-	root_devname = kmalloc(len, GFP_KERNEL);
-	if (root_devname == NULL)
-		return ERR_PTR(-ENOMEM);
-	/* Does hostname needs to be enclosed in brackets? */
-	if (strchr(hostname, ':'))
-		snprintf(root_devname, len, "[%s]:/", hostname);
-	else
-		snprintf(root_devname, len, "%s:/", hostname);
-	root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
-	kfree(root_devname);
-	return root_mnt;
-}
-
 struct nfs_referral_count {
 	struct list_head list;
 	const struct task_struct *task;
@@ -214,111 +137,125 @@ static void nfs_referral_loop_unprotect(void)
 	kfree(p);
 }
 
-static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
-		const char *export_path)
+static int do_nfs4_mount(struct nfs_server *server,
+			 struct fs_context *fc,
+			 const char *hostname,
+			 const char *export_path)
 {
+	struct nfs_fs_context *root_ctx;
+	struct fs_context *root_fc;
+	struct vfsmount *root_mnt;
 	struct dentry *dentry;
-	int err;
+	size_t len;
+	int ret;
 
-	if (IS_ERR(root_mnt))
-		return ERR_CAST(root_mnt);
+	struct fs_parameter param = {
+		.key	= "source",
+		.type	= fs_value_is_string,
+		.dirfd	= -1,
+	};
 
-	err = nfs_referral_loop_protect();
-	if (err) {
-		mntput(root_mnt);
-		return ERR_PTR(err);
+	if (IS_ERR(server))
+		return PTR_ERR(server);
+
+	root_fc = vfs_dup_fs_context(fc);
+	if (IS_ERR(root_fc)) {
+		nfs_free_server(server);
+		return PTR_ERR(root_fc);
 	}
+	kfree(root_fc->source);
+	root_fc->source = NULL;
 
-	dentry = mount_subtree(root_mnt, export_path);
-	nfs_referral_loop_unprotect();
+	root_ctx = nfs_fc2context(root_fc);
+	root_ctx->internal = true;
+	root_ctx->server = server;
+	/* We leave export_path unset as it's not used to find the root. */
 
-	return dentry;
-}
+	len = strlen(hostname) + 5;
+	param.string = kmalloc(len, GFP_KERNEL);
+	if (param.string == NULL) {
+		put_fs_context(root_fc);
+		return -ENOMEM;
+	}
 
-struct dentry *nfs4_try_mount(int flags, const char *dev_name,
-			      struct nfs_mount_info *mount_info,
-			      struct nfs_subversion *nfs_mod)
-{
-	char *export_path;
-	struct vfsmount *root_mnt;
-	struct dentry *res;
-	struct nfs_parsed_mount_data *data = mount_info->parsed;
+	/* Does hostname needs to be enclosed in brackets? */
+	if (strchr(hostname, ':'))
+		param.size = snprintf(param.string, len, "[%s]:/", hostname);
+	else
+		param.size = snprintf(param.string, len, "%s:/", hostname);
+	ret = vfs_parse_fs_param(root_fc, &param);
+	kfree(param.string);
+	if (ret < 0) {
+		put_fs_context(root_fc);
+		return ret;
+	}
+	root_mnt = fc_mount(root_fc);
+	put_fs_context(root_fc);
+
+	if (IS_ERR(root_mnt))
+		return PTR_ERR(root_mnt);
 
-	dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+	ret = nfs_referral_loop_protect();
+	if (ret) {
+		mntput(root_mnt);
+		return ret;
+	}
 
-	export_path = data->nfs_server.export_path;
-	data->nfs_server.export_path = "/";
-	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
-			data->nfs_server.hostname);
-	data->nfs_server.export_path = export_path;
+	dentry = mount_subtree(root_mnt, export_path);
+	nfs_referral_loop_unprotect();
 
-	res = nfs_follow_remote_path(root_mnt, export_path);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
 
-	dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
-		 PTR_ERR_OR_ZERO(res),
-		 IS_ERR(res) ? " [error]" : "");
-	return res;
+	fc->root = dentry;
+	return 0;
 }
 
-static struct dentry *
-nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
-			   const char *dev_name, void *raw_data)
+int nfs4_try_get_tree(struct fs_context *fc)
 {
-	struct nfs_mount_info mount_info = {
-		.fill_super = nfs_fill_super,
-		.set_security = nfs_clone_sb_security,
-		.cloned = raw_data,
-	};
-	struct nfs_server *server;
-	struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
-	dprintk("--> nfs4_referral_get_sb()\n");
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	int err;
 
-	mount_info.mntfh = nfs_alloc_fhandle();
-	if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
-		goto out;
+	dfprintk(MOUNT, "--> nfs4_try_get_tree()\n");
 
-	/* create a new volume representation */
-	server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
-	if (IS_ERR(server)) {
-		mntroot = ERR_CAST(server);
-		goto out;
+	/* We create a mount for the server's root, walk to the requested
+	 * location and then create another mount for that.
+	 */
+	err= do_nfs4_mount(nfs4_create_server(fc),
+			   fc, ctx->nfs_server.hostname,
+			   ctx->nfs_server.export_path);
+	if (err) {
+		nfs_errorf(fc, "NFS4: Couldn't follow remote path");
+		dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err);
+	} else {
+		dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n");
 	}
-
-	mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
-out:
-	nfs_free_fhandle(mount_info.mntfh);
-	return mntroot;
+	return err;
 }
 
 /*
  * Create an NFS4 server record on referral traversal
  */
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data)
+int nfs4_get_referral_tree(struct fs_context *fc)
 {
-	struct nfs_clone_mount *data = raw_data;
-	char *export_path;
-	struct vfsmount *root_mnt;
-	struct dentry *res;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	int err;
 
 	dprintk("--> nfs4_referral_mount()\n");
 
-	export_path = data->mnt_path;
-	data->mnt_path = "/";
-
-	root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
-			flags, data, data->hostname);
-	data->mnt_path = export_path;
-
-	res = nfs_follow_remote_path(root_mnt, export_path);
-	dprintk("<-- nfs4_referral_mount() = %d%s\n",
-		PTR_ERR_OR_ZERO(res),
-		IS_ERR(res) ? " [error]" : "");
-	return res;
+	/* create a new volume representation */
+	err = do_nfs4_mount(nfs4_create_referral_server(fc),
+			    fc, ctx->nfs_server.hostname,
+			    ctx->nfs_server.export_path);
+	if (err) {
+		nfs_errorf(fc, "NFS4: Couldn't follow remote path");
+		dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err);
+	} else {
+		dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n");
+	}
+	return err;
 }
 
-
 static int __init init_nfs_v4(void)
 {
 	int err;
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index 1a8f376b3f73..d9ac556bebcf 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -24,4 +24,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done);
 EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done);
 EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist);
 EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
 #endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index e60b6fbd5ada..1e97e5e04cb4 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -155,6 +155,9 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
 TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
 TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
 
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+
 #define show_nfsv4_errors(error) \
 	__print_symbolic(error, \
 		{ NFS4_OK, "OK" }, \
@@ -305,7 +308,10 @@ TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
 		{ NFS4ERR_WRONGSEC, "WRONGSEC" }, \
 		{ NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
 		{ NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
-		{ NFS4ERR_XDEV, "XDEV" })
+		{ NFS4ERR_XDEV, "XDEV" }, \
+		/* ***** Internal to Linux NFS client ***** */ \
+		{ NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+		{ NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
 
 #define show_open_flags(flags) \
 	__print_flags(flags, "|", \
@@ -352,7 +358,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
 		),
 
 		TP_fast_assign(
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__assign_str(dstaddr, clp->cl_hostname);
 		),
 
@@ -432,7 +438,8 @@ TRACE_EVENT(nfs4_sequence_done,
 			__entry->target_highest_slotid =
 					res->sr_target_highest_slotid;
 			__entry->status_flags = res->sr_status_flags;
-			__entry->error = res->sr_status;
+			__entry->error = res->sr_status < 0 ?
+					-res->sr_status : 0;
 		),
 		TP_printk(
 			"error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
@@ -640,7 +647,7 @@ TRACE_EVENT(nfs4_state_mgr_failed,
 		),
 
 		TP_fast_assign(
-			__entry->error = status;
+			__entry->error = status < 0 ? -status : 0;
 			__entry->state = clp->cl_state;
 			__assign_str(hostname, clp->cl_hostname);
 			__assign_str(section, section);
@@ -659,7 +666,7 @@ TRACE_EVENT(nfs4_xdr_status,
 		TP_PROTO(
 			const struct xdr_stream *xdr,
 			u32 op,
-			int error
+			u32 error
 		),
 
 		TP_ARGS(xdr, op, error),
@@ -691,6 +698,41 @@ TRACE_EVENT(nfs4_xdr_status,
 		)
 );
 
+DECLARE_EVENT_CLASS(nfs4_cb_error_class,
+		TP_PROTO(
+			__be32 xid,
+			u32 cb_ident
+		),
+
+		TP_ARGS(xid, cb_ident),
+
+		TP_STRUCT__entry(
+			__field(u32, xid)
+			__field(u32, cbident)
+		),
+
+		TP_fast_assign(
+			__entry->xid = be32_to_cpu(xid);
+			__entry->cbident = cb_ident;
+		),
+
+		TP_printk(
+			"xid=0x%08x cb_ident=0x%08x",
+			__entry->xid, __entry->cbident
+		)
+);
+
+#define DEFINE_CB_ERROR_EVENT(name) \
+	DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \
+			TP_PROTO( \
+				__be32 xid, \
+				u32 cb_ident \
+			), \
+			TP_ARGS(xid, cb_ident))
+
+DEFINE_CB_ERROR_EVENT(no_clp);
+DEFINE_CB_ERROR_EVENT(badprinc);
+
 DECLARE_EVENT_CLASS(nfs4_open_event,
 		TP_PROTO(
 			const struct nfs_open_context *ctx,
@@ -849,7 +891,7 @@ TRACE_EVENT(nfs4_close,
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->fmode = (__force unsigned int)state->state;
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(args->stateid.seqid);
 			__entry->stateid_hash =
@@ -914,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
 		TP_fast_assign(
 			const struct inode *inode = state->inode;
 
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->cmd = cmd;
 			__entry->type = request->fl_type;
 			__entry->start = request->fl_start;
@@ -986,7 +1028,7 @@ TRACE_EVENT(nfs4_set_lock,
 		TP_fast_assign(
 			const struct inode *inode = state->inode;
 
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->cmd = cmd;
 			__entry->type = request->fl_type;
 			__entry->start = request->fl_start;
@@ -1164,7 +1206,7 @@ TRACE_EVENT(nfs4_delegreturn_exit,
 		TP_fast_assign(
 			__entry->dev = res->server->s_dev;
 			__entry->fhandle = nfs_fhandle_hash(args->fhandle);
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(args->stateid->seqid);
 			__entry->stateid_hash =
@@ -1204,7 +1246,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
 		TP_fast_assign(
 			const struct inode *inode = state->inode;
 
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -1306,7 +1348,7 @@ TRACE_EVENT(nfs4_lookupp,
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->ino = NFS_FILEID(inode);
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 		),
 
 		TP_printk(
@@ -1342,7 +1384,7 @@ TRACE_EVENT(nfs4_rename,
 			__entry->dev = olddir->i_sb->s_dev;
 			__entry->olddir = NFS_FILEID(olddir);
 			__entry->newdir = NFS_FILEID(newdir);
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__assign_str(oldname, oldname->name);
 			__assign_str(newname, newname->name);
 		),
@@ -1433,7 +1475,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(stateid->seqid);
 			__entry->stateid_hash =
@@ -1489,7 +1531,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event,
 			__entry->valid = fattr->valid;
 			__entry->fhandle = nfs_fhandle_hash(fhandle);
 			__entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0;
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 		),
 
 		TP_printk(
@@ -1536,7 +1578,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
 		),
 
 		TP_fast_assign(
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->fhandle = nfs_fhandle_hash(fhandle);
 			if (!IS_ERR_OR_NULL(inode)) {
 				__entry->fileid = NFS_FILEID(inode);
@@ -1593,7 +1635,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
 		),
 
 		TP_fast_assign(
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->fhandle = nfs_fhandle_hash(fhandle);
 			if (!IS_ERR_OR_NULL(inode)) {
 				__entry->fileid = NFS_FILEID(inode);
@@ -1694,7 +1736,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 			__field(u32, fhandle)
 			__field(u64, fileid)
 			__field(loff_t, offset)
-			__field(size_t, count)
+			__field(u32, arg_count)
+			__field(u32, res_count)
 			__field(unsigned long, error)
 			__field(int, stateid_seq)
 			__field(u32, stateid_hash)
@@ -1702,13 +1745,18 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 
 		TP_fast_assign(
 			const struct inode *inode = hdr->inode;
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
 			const struct nfs4_state *state =
 				hdr->args.context->state;
+
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
-			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = hdr->args.offset;
-			__entry->count = hdr->args.count;
+			__entry->arg_count = hdr->args.count;
+			__entry->res_count = hdr->res.count;
 			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
@@ -1718,14 +1766,14 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%zu stateid=%d:0x%08x",
+			"offset=%lld count=%u res=%u stateid=%d:0x%08x",
 			-__entry->error,
 			show_nfsv4_errors(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
 			(long long)__entry->offset,
-			__entry->count,
+			__entry->arg_count, __entry->res_count,
 			__entry->stateid_seq, __entry->stateid_hash
 		)
 );
@@ -1754,7 +1802,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 			__field(u32, fhandle)
 			__field(u64, fileid)
 			__field(loff_t, offset)
-			__field(size_t, count)
+			__field(u32, arg_count)
+			__field(u32, res_count)
 			__field(unsigned long, error)
 			__field(int, stateid_seq)
 			__field(u32, stateid_hash)
@@ -1762,13 +1811,18 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 
 		TP_fast_assign(
 			const struct inode *inode = hdr->inode;
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
 			const struct nfs4_state *state =
 				hdr->args.context->state;
+
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
-			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = hdr->args.offset;
-			__entry->count = hdr->args.count;
+			__entry->arg_count = hdr->args.count;
+			__entry->res_count = hdr->res.count;
 			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
@@ -1778,14 +1832,14 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%zu stateid=%d:0x%08x",
+			"offset=%lld count=%u res=%u stateid=%d:0x%08x",
 			-__entry->error,
 			show_nfsv4_errors(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
 			(long long)__entry->offset,
-			__entry->count,
+			__entry->arg_count, __entry->res_count,
 			__entry->stateid_seq, __entry->stateid_hash
 		)
 );
@@ -1814,24 +1868,28 @@ DECLARE_EVENT_CLASS(nfs4_commit_event,
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
-			__field(loff_t, offset)
-			__field(size_t, count)
 			__field(unsigned long, error)
+			__field(loff_t, offset)
+			__field(u32, count)
 		),
 
 		TP_fast_assign(
 			const struct inode *inode = data->inode;
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = data->args.fh ?
+						  data->args.fh : &nfsi->fh;
+
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
-			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = data->args.offset;
 			__entry->count = data->args.count;
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 		),
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%zu",
+			"offset=%lld count=%u",
 			-__entry->error,
 			show_nfsv4_errors(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1896,7 +1954,7 @@ TRACE_EVENT(nfs4_layoutget,
 			__entry->iomode = args->iomode;
 			__entry->offset = args->offset;
 			__entry->count = args->length;
-			__entry->error = error;
+			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
 			__entry->stateid_hash =
@@ -2094,6 +2152,115 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done);
 DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
 DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
 
+DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
+		TP_PROTO(
+			const struct nfs_pgio_header *hdr
+		),
+
+		TP_ARGS(hdr),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, error)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, count)
+			__field(int, stateid_seq)
+			__field(u32, stateid_hash)
+			__string(dstaddr, hdr->ds_clp ?
+				rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+					RPC_DISPLAY_ADDR) : "unknown")
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = hdr->inode;
+
+			__entry->error = hdr->res.op_status;
+			__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
+			__entry->stateid_seq =
+				be32_to_cpu(hdr->args.stateid.seqid);
+			__entry->stateid_hash =
+				nfs_stateid_hash(&hdr->args.stateid);
+			__assign_str(dstaddr, hdr->ds_clp ?
+				rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+					RPC_DISPLAY_ADDR) : "unknown");
+		),
+
+		TP_printk(
+			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+			-__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			__entry->offset, __entry->count,
+			__entry->stateid_seq, __entry->stateid_hash,
+			__get_str(dstaddr)
+		)
+);
+
+#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
+	DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
+			TP_PROTO( \
+				const struct nfs_pgio_header *hdr \
+			), \
+			TP_ARGS(hdr))
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
+
+TRACE_EVENT(ff_layout_commit_error,
+		TP_PROTO(
+			const struct nfs_commit_data *data
+		),
+
+		TP_ARGS(data),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, error)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, count)
+			__string(dstaddr, data->ds_clp ?
+				rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+					RPC_DISPLAY_ADDR) : "unknown")
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = data->inode;
+
+			__entry->error = data->res.op_status;
+			__entry->fhandle = nfs_fhandle_hash(data->args.fh);
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->offset = data->args.offset;
+			__entry->count = data->args.count;
+			__assign_str(dstaddr, data->ds_clp ?
+				rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+					RPC_DISPLAY_ADDR) : "unknown");
+		),
+
+		TP_printk(
+			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"offset=%llu count=%u dstaddr=%s",
+			-__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			__entry->offset, __entry->count,
+			__get_str(dstaddr)
+		)
+);
+
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 728d88b6a698..47817ef0aadb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1061,7 +1061,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
 static __be32 *
 xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t)
 {
-	p = xdr_encode_hyper(p, (__s64)t->tv_sec);
+	p = xdr_encode_hyper(p, t->tv_sec);
 	*p++ = cpu_to_be32(t->tv_nsec);
 	return p;
 }
@@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi
 
 static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
 {
+	struct nfs_writeverf *verf = res->verf;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_COMMIT);
 	if (!status)
-		status = decode_write_verifier(xdr, &res->verf->verifier);
+		status = decode_write_verifier(xdr, &verf->verifier);
+	if (!status)
+		verf->committed = NFS_FILE_SYNC;
 	return status;
 }
 
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 2a82dcce5fc1..a9588d19a5ae 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -198,7 +198,66 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
 DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
 DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
 DEFINE_NFS_INODE_EVENT(nfs_access_enter);
-DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
+
+TRACE_EVENT(nfs_access_exit,
+		TP_PROTO(
+			const struct inode *inode,
+			unsigned int mask,
+			unsigned int permitted,
+			int error
+		),
+
+		TP_ARGS(inode, mask, permitted, error),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, error)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(unsigned char, type)
+			__field(u64, fileid)
+			__field(u64, version)
+			__field(loff_t, size)
+			__field(unsigned long, nfsi_flags)
+			__field(unsigned long, cache_validity)
+			__field(unsigned int, mask)
+			__field(unsigned int, permitted)
+		),
+
+		TP_fast_assign(
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			__entry->error = error < 0 ? -error : 0;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->type = nfs_umode_to_dtype(inode->i_mode);
+			__entry->version = inode_peek_iversion_raw(inode);
+			__entry->size = i_size_read(inode);
+			__entry->nfsi_flags = nfsi->flags;
+			__entry->cache_validity = nfsi->cache_validity;
+			__entry->mask = mask;
+			__entry->permitted = permitted;
+		),
+
+		TP_printk(
+			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"type=%u (%s) version=%llu size=%lld "
+			"cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) "
+			"mask=0x%x permitted=0x%x",
+			-__entry->error, nfs_show_status(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			__entry->type,
+			nfs_show_file_type(__entry->type),
+			(unsigned long long)__entry->version,
+			(long long)__entry->size,
+			__entry->cache_validity,
+			nfs_show_cache_validity(__entry->cache_validity),
+			__entry->nfsi_flags,
+			nfs_show_nfsi_flags(__entry->nfsi_flags),
+			__entry->mask, __entry->permitted
+		)
+);
 
 TRACE_DEFINE_ENUM(LOOKUP_FOLLOW);
 TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY);
@@ -818,75 +877,85 @@ TRACE_EVENT(nfs_sillyrename_unlink,
 
 TRACE_EVENT(nfs_initiate_read,
 		TP_PROTO(
-			const struct inode *inode,
-			loff_t offset, unsigned long count
+			const struct nfs_pgio_header *hdr
 		),
 
-		TP_ARGS(inode, offset, count),
+		TP_ARGS(hdr),
 
 		TP_STRUCT__entry(
-			__field(loff_t, offset)
-			__field(unsigned long, count)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, count)
 		),
 
 		TP_fast_assign(
+			const struct inode *inode = hdr->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
 
-			__entry->offset = offset;
-			__entry->count = count;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%lu",
+			"offset=%lld count=%u",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->count
+			(long long)__entry->offset, __entry->count
 		)
 );
 
 TRACE_EVENT(nfs_readpage_done,
 		TP_PROTO(
-			const struct inode *inode,
-			int status, loff_t offset, bool eof
+			const struct rpc_task *task,
+			const struct nfs_pgio_header *hdr
 		),
 
-		TP_ARGS(inode, status, offset, eof),
+		TP_ARGS(task, hdr),
 
 		TP_STRUCT__entry(
-			__field(int, status)
-			__field(loff_t, offset)
-			__field(bool, eof)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, arg_count)
+			__field(u32, res_count)
+			__field(bool, eof)
+			__field(int, status)
 		),
 
 		TP_fast_assign(
+			const struct inode *inode = hdr->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
-
-			__entry->status = status;
-			__entry->offset = offset;
-			__entry->eof = eof;
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
+
+			__entry->status = task->tk_status;
+			__entry->offset = hdr->args.offset;
+			__entry->arg_count = hdr->args.count;
+			__entry->res_count = hdr->res.count;
+			__entry->eof = hdr->res.eof;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld status=%d%s",
+			"offset=%lld count=%u res=%u status=%d%s",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->status,
+			(long long)__entry->offset, __entry->arg_count,
+			__entry->res_count, __entry->status,
 			__entry->eof ? " eof" : ""
 		)
 );
@@ -903,90 +972,144 @@ TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
 
 TRACE_EVENT(nfs_initiate_write,
 		TP_PROTO(
-			const struct inode *inode,
-			loff_t offset, unsigned long count,
-			enum nfs3_stable_how stable
+			const struct nfs_pgio_header *hdr
 		),
 
-		TP_ARGS(inode, offset, count, stable),
+		TP_ARGS(hdr),
 
 		TP_STRUCT__entry(
-			__field(loff_t, offset)
-			__field(unsigned long, count)
-			__field(enum nfs3_stable_how, stable)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, count)
+			__field(enum nfs3_stable_how, stable)
 		),
 
 		TP_fast_assign(
+			const struct inode *inode = hdr->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
 
-			__entry->offset = offset;
-			__entry->count = count;
-			__entry->stable = stable;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
+			__entry->stable = hdr->args.stable;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%lu stable=%s",
+			"offset=%lld count=%u stable=%s",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->count,
+			(long long)__entry->offset, __entry->count,
 			nfs_show_stable(__entry->stable)
 		)
 );
 
 TRACE_EVENT(nfs_writeback_done,
 		TP_PROTO(
-			const struct inode *inode,
-			int status,
-			loff_t offset,
-			struct nfs_writeverf *writeverf
+			const struct rpc_task *task,
+			const struct nfs_pgio_header *hdr
 		),
 
-		TP_ARGS(inode, status, offset, writeverf),
+		TP_ARGS(task, hdr),
 
 		TP_STRUCT__entry(
-			__field(int, status)
-			__field(loff_t, offset)
-			__field(enum nfs3_stable_how, stable)
-			__field(unsigned long long, verifier)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, arg_count)
+			__field(u32, res_count)
+			__field(int, status)
+			__field(enum nfs3_stable_how, stable)
+			__array(char, verifier, NFS4_VERIFIER_SIZE)
 		),
 
 		TP_fast_assign(
+			const struct inode *inode = hdr->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
-
-			__entry->status = status;
-			__entry->offset = offset;
-			__entry->stable = writeverf->committed;
-			memcpy(&__entry->verifier, &writeverf->verifier,
-			       sizeof(__entry->verifier));
+			const struct nfs_fh *fh = hdr->args.fh ?
+						  hdr->args.fh : &nfsi->fh;
+			const struct nfs_writeverf *verf = hdr->res.verf;
+
+			__entry->status = task->tk_status;
+			__entry->offset = hdr->args.offset;
+			__entry->arg_count = hdr->args.count;
+			__entry->res_count = hdr->res.count;
+			__entry->stable = verf->committed;
+			memcpy(__entry->verifier,
+				&verf->verifier,
+				NFS4_VERIFIER_SIZE);
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld status=%d stable=%s "
-			"verifier 0x%016llx",
+			"offset=%lld count=%u res=%u status=%d stable=%s "
+			"verifier=%s",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->status,
+			(long long)__entry->offset, __entry->arg_count,
+			__entry->res_count, __entry->status,
 			nfs_show_stable(__entry->stable),
-			__entry->verifier
+			__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
 		)
 );
 
+DECLARE_EVENT_CLASS(nfs_page_error_class,
+		TP_PROTO(
+			const struct nfs_page *req,
+			int error
+		),
+
+		TP_ARGS(req, error),
+
+		TP_STRUCT__entry(
+			__field(const void *, req)
+			__field(pgoff_t, index)
+			__field(unsigned int, offset)
+			__field(unsigned int, pgbase)
+			__field(unsigned int, bytes)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->req = req;
+			__entry->index = req->wb_index;
+			__entry->offset = req->wb_offset;
+			__entry->pgbase = req->wb_pgbase;
+			__entry->bytes = req->wb_bytes;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d",
+			__entry->req, __entry->index, __entry->offset,
+			__entry->pgbase, __entry->bytes, __entry->error
+		)
+);
+
+#define DEFINE_NFS_PAGEERR_EVENT(name) \
+	DEFINE_EVENT(nfs_page_error_class, name, \
+			TP_PROTO( \
+				const struct nfs_page *req, \
+				int error \
+			), \
+			TP_ARGS(req, error))
+
+DEFINE_NFS_PAGEERR_EVENT(nfs_write_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error);
+
 TRACE_EVENT(nfs_initiate_commit,
 		TP_PROTO(
 			const struct nfs_commit_data *data
@@ -995,71 +1118,81 @@ TRACE_EVENT(nfs_initiate_commit,
 		TP_ARGS(data),
 
 		TP_STRUCT__entry(
-			__field(loff_t, offset)
-			__field(unsigned long, count)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(u32, count)
 		),
 
 		TP_fast_assign(
 			const struct inode *inode = data->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = data->args.fh ?
+						  data->args.fh : &nfsi->fh;
 
 			__entry->offset = data->args.offset;
 			__entry->count = data->args.count;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%lu",
+			"offset=%lld count=%u",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->count
+			(long long)__entry->offset, __entry->count
 		)
 );
 
 TRACE_EVENT(nfs_commit_done,
 		TP_PROTO(
+			const struct rpc_task *task,
 			const struct nfs_commit_data *data
 		),
 
-		TP_ARGS(data),
+		TP_ARGS(task, data),
 
 		TP_STRUCT__entry(
-			__field(int, status)
-			__field(loff_t, offset)
-			__field(unsigned long long, verifier)
 			__field(dev_t, dev)
 			__field(u32, fhandle)
 			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(int, status)
+			__field(enum nfs3_stable_how, stable)
+			__array(char, verifier, NFS4_VERIFIER_SIZE)
 		),
 
 		TP_fast_assign(
 			const struct inode *inode = data->inode;
 			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = data->args.fh ?
+						  data->args.fh : &nfsi->fh;
+			const struct nfs_writeverf *verf = data->res.verf;
 
-			__entry->status = data->res.op_status;
+			__entry->status = task->tk_status;
 			__entry->offset = data->args.offset;
-			memcpy(&__entry->verifier, &data->verf.verifier,
-			       sizeof(__entry->verifier));
+			__entry->stable = verf->committed;
+			memcpy(__entry->verifier,
+				&verf->verifier,
+				NFS4_VERIFIER_SIZE);
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = nfsi->fileid;
-			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
 		TP_printk(
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld status=%d verifier 0x%016llx",
+			"offset=%lld status=%d stable=%s verifier=%s",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			__entry->offset, __entry->status,
-			__entry->verifier
+			(long long)__entry->offset, __entry->status,
+			nfs_show_stable(__entry->stable),
+			__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
 		)
 );
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cec3070ab577..542ea8dfd1bc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1425,7 +1425,7 @@ retry:
 	/* lo ref dropped in pnfs_roc_release() */
 	layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
 	/* If the creds don't match, we can't compound the layoutreturn */
-	if (!layoutreturn || cred != lo->plh_lc_cred)
+	if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0)
 		goto out_noroc;
 
 	roc = layoutreturn;
@@ -1998,8 +1998,6 @@ lookup_again:
 			trace_pnfs_update_layout(ino, pos, count,
 					iomode, lo, lseg,
 					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
-			if (status != -EAGAIN)
-				goto out_unlock;
 			spin_unlock(&ino->i_lock);
 			nfs4_schedule_stateid_recovery(server, ctx->state);
 			pnfs_clear_first_layoutget(lo);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f8a38065c7e4..0fafdadc9c8d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -79,6 +79,10 @@ enum pnfs_try_status {
 	PNFS_TRY_AGAIN     = 2,
 };
 
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS   12001
+#define NFS4ERR_RESET_TO_PNFS  12002
+
 #ifdef CONFIG_NFS_V4_1
 
 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -91,10 +95,6 @@ enum pnfs_try_status {
 #define NFS4_DEF_DS_RETRANS 5
 #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
 
-/* error codes for internal use */
-#define NFS4ERR_RESET_TO_MDS   12001
-#define NFS4ERR_RESET_TO_PNFS  12002
-
 enum {
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 82af4809b869..8b37e7f8e789 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
 void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
 {
-	struct nfs_page *first = nfs_list_entry(data->pages.next);
+	struct nfs_writeverf *verf = data->res.verf;
 
 	data->task.tk_status = 0;
-	memcpy(&data->verf.verifier, &first->wb_verf,
-	       sizeof(data->verf.verifier));
-	data->verf.verifier.data[0]++; /* ensure verifier mismatch */
+	memset(&verf->verifier, 0, sizeof(verf->verifier));
+	verf->committed = NFS_UNSTABLE;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0f7288b94633..15c865cc837f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -108,10 +108,15 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp	= fattr,
 	};
 	int	status;
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+		task_flags |= RPC_TASK_TIMEOUT;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = rpc_call_sync(server->client, &msg, task_flags);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -147,14 +152,14 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
 		struct nfs4_label *label)
 {
 	struct nfs_diropargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len
 	};
 	struct nfs_diropok	res = {
 		.fh		= fhandle,
@@ -166,10 +171,15 @@ nfs_proc_lookup(struct inode *dir, const struct qstr *name,
 		.rpc_resp	= &res,
 	};
 	int			status;
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (nfs_lookup_is_soft_revalidate(dentry))
+		task_flags |= RPC_TASK_TIMEOUT;
 
-	dprintk("NFS call  lookup %s\n", name->name);
+	dprintk("NFS call  lookup %pd2\n", dentry);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -710,7 +720,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs_proc_get_root,
 	.submount	= nfs_submount,
-	.try_mount	= nfs_try_mount,
+	.try_get_tree	= nfs_try_get_tree,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
 	.lookup		= nfs_proc_lookup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfe0b586eadd..34bb9add2302 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -214,7 +214,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
 
 	task_setup_data->flags |= swap_flags;
 	rpc_ops->read_setup(hdr, msg);
-	trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes);
+	trace_nfs_initiate_read(hdr);
 }
 
 static void
@@ -247,8 +247,7 @@ static int nfs_readpage_done(struct rpc_task *task,
 		return status;
 
 	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
-	trace_nfs_readpage_done(inode, task->tk_status,
-				hdr->args.offset, hdr->res.eof);
+	trace_nfs_readpage_done(task, hdr);
 
 	if (task->tk_status == -ESTALE) {
 		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -282,6 +281,8 @@ static void nfs_readpage_retry(struct rpc_task *task,
 	argp->offset += resp->count;
 	argp->pgbase += resp->count;
 	argp->count -= resp->count;
+	resp->count = 0;
+	resp->eof = 0;
 	rpc_restart_call_prepare(task);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8d8d04bb9d64..dada09b391c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -69,250 +69,6 @@
 #include "nfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define NFS_TEXT_DATA		1
-
-#if IS_ENABLED(CONFIG_NFS_V3)
-#define NFS_DEFAULT_VERSION 3
-#else
-#define NFS_DEFAULT_VERSION 2
-#endif
-
-#define NFS_MAX_CONNECTIONS 16
-
-enum {
-	/* Mount options that take no arguments */
-	Opt_soft, Opt_softerr, Opt_hard,
-	Opt_posix, Opt_noposix,
-	Opt_cto, Opt_nocto,
-	Opt_ac, Opt_noac,
-	Opt_lock, Opt_nolock,
-	Opt_udp, Opt_tcp, Opt_rdma,
-	Opt_acl, Opt_noacl,
-	Opt_rdirplus, Opt_nordirplus,
-	Opt_sharecache, Opt_nosharecache,
-	Opt_resvport, Opt_noresvport,
-	Opt_fscache, Opt_nofscache,
-	Opt_migration, Opt_nomigration,
-
-	/* Mount options that take integer arguments */
-	Opt_port,
-	Opt_rsize, Opt_wsize, Opt_bsize,
-	Opt_timeo, Opt_retrans,
-	Opt_acregmin, Opt_acregmax,
-	Opt_acdirmin, Opt_acdirmax,
-	Opt_actimeo,
-	Opt_namelen,
-	Opt_mountport,
-	Opt_mountvers,
-	Opt_minorversion,
-
-	/* Mount options that take string arguments */
-	Opt_nfsvers,
-	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
-	Opt_addr, Opt_mountaddr, Opt_clientaddr,
-	Opt_nconnect,
-	Opt_lookupcache,
-	Opt_fscache_uniq,
-	Opt_local_lock,
-
-	/* Special mount options */
-	Opt_userspace, Opt_deprecated, Opt_sloppy,
-
-	Opt_err
-};
-
-static const match_table_t nfs_mount_option_tokens = {
-	{ Opt_userspace, "bg" },
-	{ Opt_userspace, "fg" },
-	{ Opt_userspace, "retry=%s" },
-
-	{ Opt_sloppy, "sloppy" },
-
-	{ Opt_soft, "soft" },
-	{ Opt_softerr, "softerr" },
-	{ Opt_hard, "hard" },
-	{ Opt_deprecated, "intr" },
-	{ Opt_deprecated, "nointr" },
-	{ Opt_posix, "posix" },
-	{ Opt_noposix, "noposix" },
-	{ Opt_cto, "cto" },
-	{ Opt_nocto, "nocto" },
-	{ Opt_ac, "ac" },
-	{ Opt_noac, "noac" },
-	{ Opt_lock, "lock" },
-	{ Opt_nolock, "nolock" },
-	{ Opt_udp, "udp" },
-	{ Opt_tcp, "tcp" },
-	{ Opt_rdma, "rdma" },
-	{ Opt_acl, "acl" },
-	{ Opt_noacl, "noacl" },
-	{ Opt_rdirplus, "rdirplus" },
-	{ Opt_nordirplus, "nordirplus" },
-	{ Opt_sharecache, "sharecache" },
-	{ Opt_nosharecache, "nosharecache" },
-	{ Opt_resvport, "resvport" },
-	{ Opt_noresvport, "noresvport" },
-	{ Opt_fscache, "fsc" },
-	{ Opt_nofscache, "nofsc" },
-	{ Opt_migration, "migration" },
-	{ Opt_nomigration, "nomigration" },
-
-	{ Opt_port, "port=%s" },
-	{ Opt_rsize, "rsize=%s" },
-	{ Opt_wsize, "wsize=%s" },
-	{ Opt_bsize, "bsize=%s" },
-	{ Opt_timeo, "timeo=%s" },
-	{ Opt_retrans, "retrans=%s" },
-	{ Opt_acregmin, "acregmin=%s" },
-	{ Opt_acregmax, "acregmax=%s" },
-	{ Opt_acdirmin, "acdirmin=%s" },
-	{ Opt_acdirmax, "acdirmax=%s" },
-	{ Opt_actimeo, "actimeo=%s" },
-	{ Opt_namelen, "namlen=%s" },
-	{ Opt_mountport, "mountport=%s" },
-	{ Opt_mountvers, "mountvers=%s" },
-	{ Opt_minorversion, "minorversion=%s" },
-
-	{ Opt_nfsvers, "nfsvers=%s" },
-	{ Opt_nfsvers, "vers=%s" },
-
-	{ Opt_sec, "sec=%s" },
-	{ Opt_proto, "proto=%s" },
-	{ Opt_mountproto, "mountproto=%s" },
-	{ Opt_addr, "addr=%s" },
-	{ Opt_clientaddr, "clientaddr=%s" },
-	{ Opt_mounthost, "mounthost=%s" },
-	{ Opt_mountaddr, "mountaddr=%s" },
-
-	{ Opt_nconnect, "nconnect=%s" },
-
-	{ Opt_lookupcache, "lookupcache=%s" },
-	{ Opt_fscache_uniq, "fsc=%s" },
-	{ Opt_local_lock, "local_lock=%s" },
-
-	/* The following needs to be listed after all other options */
-	{ Opt_nfsvers, "v%s" },
-
-	{ Opt_err, NULL }
-};
-
-enum {
-	Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
-	Opt_xprt_rdma6,
-
-	Opt_xprt_err
-};
-
-static const match_table_t nfs_xprt_protocol_tokens = {
-	{ Opt_xprt_udp, "udp" },
-	{ Opt_xprt_udp6, "udp6" },
-	{ Opt_xprt_tcp, "tcp" },
-	{ Opt_xprt_tcp6, "tcp6" },
-	{ Opt_xprt_rdma, "rdma" },
-	{ Opt_xprt_rdma6, "rdma6" },
-
-	{ Opt_xprt_err, NULL }
-};
-
-enum {
-	Opt_sec_none, Opt_sec_sys,
-	Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
-	Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
-	Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
-
-	Opt_sec_err
-};
-
-static const match_table_t nfs_secflavor_tokens = {
-	{ Opt_sec_none, "none" },
-	{ Opt_sec_none, "null" },
-	{ Opt_sec_sys, "sys" },
-
-	{ Opt_sec_krb5, "krb5" },
-	{ Opt_sec_krb5i, "krb5i" },
-	{ Opt_sec_krb5p, "krb5p" },
-
-	{ Opt_sec_lkey, "lkey" },
-	{ Opt_sec_lkeyi, "lkeyi" },
-	{ Opt_sec_lkeyp, "lkeyp" },
-
-	{ Opt_sec_spkm, "spkm3" },
-	{ Opt_sec_spkmi, "spkm3i" },
-	{ Opt_sec_spkmp, "spkm3p" },
-
-	{ Opt_sec_err, NULL }
-};
-
-enum {
-	Opt_lookupcache_all, Opt_lookupcache_positive,
-	Opt_lookupcache_none,
-
-	Opt_lookupcache_err
-};
-
-static match_table_t nfs_lookupcache_tokens = {
-	{ Opt_lookupcache_all, "all" },
-	{ Opt_lookupcache_positive, "pos" },
-	{ Opt_lookupcache_positive, "positive" },
-	{ Opt_lookupcache_none, "none" },
-
-	{ Opt_lookupcache_err, NULL }
-};
-
-enum {
-	Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
-	Opt_local_lock_none,
-
-	Opt_local_lock_err
-};
-
-static match_table_t nfs_local_lock_tokens = {
-	{ Opt_local_lock_all, "all" },
-	{ Opt_local_lock_flock, "flock" },
-	{ Opt_local_lock_posix, "posix" },
-	{ Opt_local_lock_none, "none" },
-
-	{ Opt_local_lock_err, NULL }
-};
-
-enum {
-	Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
-	Opt_vers_4_1, Opt_vers_4_2,
-
-	Opt_vers_err
-};
-
-static match_table_t nfs_vers_tokens = {
-	{ Opt_vers_2, "2" },
-	{ Opt_vers_3, "3" },
-	{ Opt_vers_4, "4" },
-	{ Opt_vers_4_0, "4.0" },
-	{ Opt_vers_4_1, "4.1" },
-	{ Opt_vers_4_2, "4.2" },
-
-	{ Opt_vers_err, NULL }
-};
-
-static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data);
-
-struct file_system_type nfs_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs",
-	.mount		= nfs_fs_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-MODULE_ALIAS_FS("nfs");
-EXPORT_SYMBOL_GPL(nfs_fs_type);
-
-struct file_system_type nfs_xdev_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs",
-	.mount		= nfs_xdev_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
 
 const struct super_operations nfs_sops = {
 	.alloc_inode	= nfs_alloc_inode,
@@ -326,26 +82,10 @@ const struct super_operations nfs_sops = {
 	.show_devname	= nfs_show_devname,
 	.show_path	= nfs_show_path,
 	.show_stats	= nfs_show_stats,
-	.remount_fs	= nfs_remount,
 };
 EXPORT_SYMBOL_GPL(nfs_sops);
 
 #if IS_ENABLED(CONFIG_NFS_V4)
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
-static int nfs4_validate_mount_data(void *options,
-	struct nfs_parsed_mount_data *args, const char *dev_name);
-
-struct file_system_type nfs4_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "nfs4",
-	.mount		= nfs_fs_mount,
-	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-MODULE_ALIAS_FS("nfs4");
-MODULE_ALIAS("nfs4");
-EXPORT_SYMBOL_GPL(nfs4_fs_type);
-
 static int __init register_nfs4_fs(void)
 {
 	return register_filesystem(&nfs4_fs_type);
@@ -635,6 +375,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", "" },
 		{ NFS_MOUNT_SOFTERR, ",softerr", "" },
+		{ NFS_MOUNT_SOFTREVAL, ",softreval", "" },
 		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
@@ -931,141 +672,6 @@ void nfs_umount_begin(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(nfs_umount_begin);
 
-static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
-{
-	struct nfs_parsed_mount_data *data;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (data) {
-		data->timeo		= NFS_UNSPEC_TIMEO;
-		data->retrans		= NFS_UNSPEC_RETRANS;
-		data->acregmin		= NFS_DEF_ACREGMIN;
-		data->acregmax		= NFS_DEF_ACREGMAX;
-		data->acdirmin		= NFS_DEF_ACDIRMIN;
-		data->acdirmax		= NFS_DEF_ACDIRMAX;
-		data->mount_server.port	= NFS_UNSPEC_PORT;
-		data->nfs_server.port	= NFS_UNSPEC_PORT;
-		data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-		data->selected_flavor	= RPC_AUTH_MAXFLAVOR;
-		data->minorversion	= 0;
-		data->need_mount	= true;
-		data->net		= current->nsproxy->net_ns;
-		data->lsm_opts		= NULL;
-	}
-	return data;
-}
-
-static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
-{
-	if (data) {
-		kfree(data->client_address);
-		kfree(data->mount_server.hostname);
-		kfree(data->nfs_server.export_path);
-		kfree(data->nfs_server.hostname);
-		kfree(data->fscache_uniq);
-		security_free_mnt_opts(&data->lsm_opts);
-		kfree(data);
-	}
-}
-
-/*
- * Sanity-check a server address provided by the mount command.
- *
- * Address family must be initialized, and address must not be
- * the ANY address for that family.
- */
-static int nfs_verify_server_address(struct sockaddr *addr)
-{
-	switch (addr->sa_family) {
-	case AF_INET: {
-		struct sockaddr_in *sa = (struct sockaddr_in *)addr;
-		return sa->sin_addr.s_addr != htonl(INADDR_ANY);
-	}
-	case AF_INET6: {
-		struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
-		return !ipv6_addr_any(sa);
-	}
-	}
-
-	dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
-	return 0;
-}
-
-/*
- * Select between a default port value and a user-specified port value.
- * If a zero value is set, then autobind will be used.
- */
-static void nfs_set_port(struct sockaddr *sap, int *port,
-				 const unsigned short default_port)
-{
-	if (*port == NFS_UNSPEC_PORT)
-		*port = default_port;
-
-	rpc_set_port(sap, *port);
-}
-
-/*
- * Sanity check the NFS transport protocol.
- *
- */
-static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
-	switch (mnt->nfs_server.protocol) {
-	case XPRT_TRANSPORT_UDP:
-	case XPRT_TRANSPORT_TCP:
-	case XPRT_TRANSPORT_RDMA:
-		break;
-	default:
-		mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-	}
-}
-
-/*
- * For text based NFSv2/v3 mounts, the mount protocol transport default
- * settings should depend upon the specified NFS transport.
- */
-static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
-	nfs_validate_transport_protocol(mnt);
-
-	if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP ||
-	    mnt->mount_server.protocol == XPRT_TRANSPORT_TCP)
-			return;
-	switch (mnt->nfs_server.protocol) {
-	case XPRT_TRANSPORT_UDP:
-		mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
-		break;
-	case XPRT_TRANSPORT_TCP:
-	case XPRT_TRANSPORT_RDMA:
-		mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
-	}
-}
-
-/*
- * Add 'flavor' to 'auth_info' if not already present.
- * Returns true if 'flavor' ends up in the list, false otherwise
- */
-static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
-			      rpc_authflavor_t flavor)
-{
-	unsigned int i;
-	unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
-
-	/* make sure this flavor isn't already in the list */
-	for (i = 0; i < auth_info->flavor_len; i++) {
-		if (flavor == auth_info->flavors[i])
-			return true;
-	}
-
-	if (auth_info->flavor_len + 1 >= max_flavor_len) {
-		dfprintk(MOUNT, "NFS: too many sec= flavors\n");
-		return false;
-	}
-
-	auth_info->flavors[auth_info->flavor_len++] = flavor;
-	return true;
-}
-
 /*
  * Return true if 'match' is in auth_info or auth_info is empty.
  * Return false otherwise.
@@ -1087,633 +693,13 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
 EXPORT_SYMBOL_GPL(nfs_auth_info_match);
 
 /*
- * Parse the value of the 'sec=' option.
- */
-static int nfs_parse_security_flavors(char *value,
-				      struct nfs_parsed_mount_data *mnt)
-{
-	substring_t args[MAX_OPT_ARGS];
-	rpc_authflavor_t pseudoflavor;
-	char *p;
-
-	dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
-
-	while ((p = strsep(&value, ":")) != NULL) {
-		switch (match_token(p, nfs_secflavor_tokens, args)) {
-		case Opt_sec_none:
-			pseudoflavor = RPC_AUTH_NULL;
-			break;
-		case Opt_sec_sys:
-			pseudoflavor = RPC_AUTH_UNIX;
-			break;
-		case Opt_sec_krb5:
-			pseudoflavor = RPC_AUTH_GSS_KRB5;
-			break;
-		case Opt_sec_krb5i:
-			pseudoflavor = RPC_AUTH_GSS_KRB5I;
-			break;
-		case Opt_sec_krb5p:
-			pseudoflavor = RPC_AUTH_GSS_KRB5P;
-			break;
-		case Opt_sec_lkey:
-			pseudoflavor = RPC_AUTH_GSS_LKEY;
-			break;
-		case Opt_sec_lkeyi:
-			pseudoflavor = RPC_AUTH_GSS_LKEYI;
-			break;
-		case Opt_sec_lkeyp:
-			pseudoflavor = RPC_AUTH_GSS_LKEYP;
-			break;
-		case Opt_sec_spkm:
-			pseudoflavor = RPC_AUTH_GSS_SPKM;
-			break;
-		case Opt_sec_spkmi:
-			pseudoflavor = RPC_AUTH_GSS_SPKMI;
-			break;
-		case Opt_sec_spkmp:
-			pseudoflavor = RPC_AUTH_GSS_SPKMP;
-			break;
-		default:
-			dfprintk(MOUNT,
-				 "NFS: sec= option '%s' not recognized\n", p);
-			return 0;
-		}
-
-		if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
-			return 0;
-	}
-
-	return 1;
-}
-
-static int nfs_parse_version_string(char *string,
-		struct nfs_parsed_mount_data *mnt,
-		substring_t *args)
-{
-	mnt->flags &= ~NFS_MOUNT_VER3;
-	switch (match_token(string, nfs_vers_tokens, args)) {
-	case Opt_vers_2:
-		mnt->version = 2;
-		break;
-	case Opt_vers_3:
-		mnt->flags |= NFS_MOUNT_VER3;
-		mnt->version = 3;
-		break;
-	case Opt_vers_4:
-		/* Backward compatibility option. In future,
-		 * the mount program should always supply
-		 * a NFSv4 minor version number.
-		 */
-		mnt->version = 4;
-		break;
-	case Opt_vers_4_0:
-		mnt->version = 4;
-		mnt->minorversion = 0;
-		break;
-	case Opt_vers_4_1:
-		mnt->version = 4;
-		mnt->minorversion = 1;
-		break;
-	case Opt_vers_4_2:
-		mnt->version = 4;
-		mnt->minorversion = 2;
-		break;
-	default:
-		return 0;
-	}
-	return 1;
-}
-
-static int nfs_get_option_str(substring_t args[], char **option)
-{
-	kfree(*option);
-	*option = match_strdup(args);
-	return !*option;
-}
-
-static int nfs_get_option_ul(substring_t args[], unsigned long *option)
-{
-	int rc;
-	char *string;
-
-	string = match_strdup(args);
-	if (string == NULL)
-		return -ENOMEM;
-	rc = kstrtoul(string, 10, option);
-	kfree(string);
-
-	return rc;
-}
-
-static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
-		unsigned long l_bound, unsigned long u_bound)
-{
-	int ret;
-
-	ret = nfs_get_option_ul(args, option);
-	if (ret != 0)
-		return ret;
-	if (*option < l_bound || *option > u_bound)
-		return -ERANGE;
-	return 0;
-}
-
-/*
- * Error-check and convert a string of mount options from user space into
- * a data structure.  The whole mount string is processed; bad options are
- * skipped as they are encountered.  If there were no errors, return 1;
- * otherwise return 0 (zero).
- */
-static int nfs_parse_mount_options(char *raw,
-				   struct nfs_parsed_mount_data *mnt)
-{
-	char *p, *string;
-	int rc, sloppy = 0, invalid_option = 0;
-	unsigned short protofamily = AF_UNSPEC;
-	unsigned short mountfamily = AF_UNSPEC;
-
-	if (!raw) {
-		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
-		return 1;
-	}
-	dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
-
-	rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts);
-	if (rc)
-		goto out_security_failure;
-
-	while ((p = strsep(&raw, ",")) != NULL) {
-		substring_t args[MAX_OPT_ARGS];
-		unsigned long option;
-		int token;
-
-		if (!*p)
-			continue;
-
-		dfprintk(MOUNT, "NFS:   parsing nfs mount option '%s'\n", p);
-
-		token = match_token(p, nfs_mount_option_tokens, args);
-		switch (token) {
-
-		/*
-		 * boolean options:  foo/nofoo
-		 */
-		case Opt_soft:
-			mnt->flags |= NFS_MOUNT_SOFT;
-			mnt->flags &= ~NFS_MOUNT_SOFTERR;
-			break;
-		case Opt_softerr:
-			mnt->flags |= NFS_MOUNT_SOFTERR;
-			mnt->flags &= ~NFS_MOUNT_SOFT;
-			break;
-		case Opt_hard:
-			mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
-			break;
-		case Opt_posix:
-			mnt->flags |= NFS_MOUNT_POSIX;
-			break;
-		case Opt_noposix:
-			mnt->flags &= ~NFS_MOUNT_POSIX;
-			break;
-		case Opt_cto:
-			mnt->flags &= ~NFS_MOUNT_NOCTO;
-			break;
-		case Opt_nocto:
-			mnt->flags |= NFS_MOUNT_NOCTO;
-			break;
-		case Opt_ac:
-			mnt->flags &= ~NFS_MOUNT_NOAC;
-			break;
-		case Opt_noac:
-			mnt->flags |= NFS_MOUNT_NOAC;
-			break;
-		case Opt_lock:
-			mnt->flags &= ~NFS_MOUNT_NONLM;
-			mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
-					NFS_MOUNT_LOCAL_FCNTL);
-			break;
-		case Opt_nolock:
-			mnt->flags |= NFS_MOUNT_NONLM;
-			mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
-				       NFS_MOUNT_LOCAL_FCNTL);
-			break;
-		case Opt_udp:
-			mnt->flags &= ~NFS_MOUNT_TCP;
-			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
-			break;
-		case Opt_tcp:
-			mnt->flags |= NFS_MOUNT_TCP;
-			mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-			break;
-		case Opt_rdma:
-			mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
-			mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-			xprt_load_transport(p);
-			break;
-		case Opt_acl:
-			mnt->flags &= ~NFS_MOUNT_NOACL;
-			break;
-		case Opt_noacl:
-			mnt->flags |= NFS_MOUNT_NOACL;
-			break;
-		case Opt_rdirplus:
-			mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
-			break;
-		case Opt_nordirplus:
-			mnt->flags |= NFS_MOUNT_NORDIRPLUS;
-			break;
-		case Opt_sharecache:
-			mnt->flags &= ~NFS_MOUNT_UNSHARED;
-			break;
-		case Opt_nosharecache:
-			mnt->flags |= NFS_MOUNT_UNSHARED;
-			break;
-		case Opt_resvport:
-			mnt->flags &= ~NFS_MOUNT_NORESVPORT;
-			break;
-		case Opt_noresvport:
-			mnt->flags |= NFS_MOUNT_NORESVPORT;
-			break;
-		case Opt_fscache:
-			mnt->options |= NFS_OPTION_FSCACHE;
-			kfree(mnt->fscache_uniq);
-			mnt->fscache_uniq = NULL;
-			break;
-		case Opt_nofscache:
-			mnt->options &= ~NFS_OPTION_FSCACHE;
-			kfree(mnt->fscache_uniq);
-			mnt->fscache_uniq = NULL;
-			break;
-		case Opt_migration:
-			mnt->options |= NFS_OPTION_MIGRATION;
-			break;
-		case Opt_nomigration:
-			mnt->options &= ~NFS_OPTION_MIGRATION;
-			break;
-
-		/*
-		 * options that take numeric values
-		 */
-		case Opt_port:
-			if (nfs_get_option_ul(args, &option) ||
-			    option > USHRT_MAX)
-				goto out_invalid_value;
-			mnt->nfs_server.port = option;
-			break;
-		case Opt_rsize:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->rsize = option;
-			break;
-		case Opt_wsize:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->wsize = option;
-			break;
-		case Opt_bsize:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->bsize = option;
-			break;
-		case Opt_timeo:
-			if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
-				goto out_invalid_value;
-			mnt->timeo = option;
-			break;
-		case Opt_retrans:
-			if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
-				goto out_invalid_value;
-			mnt->retrans = option;
-			break;
-		case Opt_acregmin:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->acregmin = option;
-			break;
-		case Opt_acregmax:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->acregmax = option;
-			break;
-		case Opt_acdirmin:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->acdirmin = option;
-			break;
-		case Opt_acdirmax:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->acdirmax = option;
-			break;
-		case Opt_actimeo:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->acregmin = mnt->acregmax =
-			mnt->acdirmin = mnt->acdirmax = option;
-			break;
-		case Opt_namelen:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			mnt->namlen = option;
-			break;
-		case Opt_mountport:
-			if (nfs_get_option_ul(args, &option) ||
-			    option > USHRT_MAX)
-				goto out_invalid_value;
-			mnt->mount_server.port = option;
-			break;
-		case Opt_mountvers:
-			if (nfs_get_option_ul(args, &option) ||
-			    option < NFS_MNT_VERSION ||
-			    option > NFS_MNT3_VERSION)
-				goto out_invalid_value;
-			mnt->mount_server.version = option;
-			break;
-		case Opt_minorversion:
-			if (nfs_get_option_ul(args, &option))
-				goto out_invalid_value;
-			if (option > NFS4_MAX_MINOR_VERSION)
-				goto out_invalid_value;
-			mnt->minorversion = option;
-			break;
-
-		/*
-		 * options that take text values
-		 */
-		case Opt_nfsvers:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			rc = nfs_parse_version_string(string, mnt, args);
-			kfree(string);
-			if (!rc)
-				goto out_invalid_value;
-			break;
-		case Opt_sec:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			rc = nfs_parse_security_flavors(string, mnt);
-			kfree(string);
-			if (!rc) {
-				dfprintk(MOUNT, "NFS:   unrecognized "
-						"security flavor\n");
-				return 0;
-			}
-			break;
-		case Opt_proto:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			token = match_token(string,
-					    nfs_xprt_protocol_tokens, args);
-
-			protofamily = AF_INET;
-			switch (token) {
-			case Opt_xprt_udp6:
-				protofamily = AF_INET6;
-				/* fall through */
-			case Opt_xprt_udp:
-				mnt->flags &= ~NFS_MOUNT_TCP;
-				mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
-				break;
-			case Opt_xprt_tcp6:
-				protofamily = AF_INET6;
-				/* fall through */
-			case Opt_xprt_tcp:
-				mnt->flags |= NFS_MOUNT_TCP;
-				mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-				break;
-			case Opt_xprt_rdma6:
-				protofamily = AF_INET6;
-				/* fall through */
-			case Opt_xprt_rdma:
-				/* vector side protocols to TCP */
-				mnt->flags |= NFS_MOUNT_TCP;
-				mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-				xprt_load_transport(string);
-				break;
-			default:
-				dfprintk(MOUNT, "NFS:   unrecognized "
-						"transport protocol\n");
-				kfree(string);
-				return 0;
-			}
-			kfree(string);
-			break;
-		case Opt_mountproto:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			token = match_token(string,
-					    nfs_xprt_protocol_tokens, args);
-			kfree(string);
-
-			mountfamily = AF_INET;
-			switch (token) {
-			case Opt_xprt_udp6:
-				mountfamily = AF_INET6;
-				/* fall through */
-			case Opt_xprt_udp:
-				mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
-				break;
-			case Opt_xprt_tcp6:
-				mountfamily = AF_INET6;
-				/* fall through */
-			case Opt_xprt_tcp:
-				mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
-				break;
-			case Opt_xprt_rdma: /* not used for side protocols */
-			default:
-				dfprintk(MOUNT, "NFS:   unrecognized "
-						"transport protocol\n");
-				return 0;
-			}
-			break;
-		case Opt_addr:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			mnt->nfs_server.addrlen =
-				rpc_pton(mnt->net, string, strlen(string),
-					(struct sockaddr *)
-					&mnt->nfs_server.address,
-					sizeof(mnt->nfs_server.address));
-			kfree(string);
-			if (mnt->nfs_server.addrlen == 0)
-				goto out_invalid_address;
-			break;
-		case Opt_clientaddr:
-			if (nfs_get_option_str(args, &mnt->client_address))
-				goto out_nomem;
-			break;
-		case Opt_mounthost:
-			if (nfs_get_option_str(args,
-					       &mnt->mount_server.hostname))
-				goto out_nomem;
-			break;
-		case Opt_mountaddr:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			mnt->mount_server.addrlen =
-				rpc_pton(mnt->net, string, strlen(string),
-					(struct sockaddr *)
-					&mnt->mount_server.address,
-					sizeof(mnt->mount_server.address));
-			kfree(string);
-			if (mnt->mount_server.addrlen == 0)
-				goto out_invalid_address;
-			break;
-		case Opt_nconnect:
-			if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
-				goto out_invalid_value;
-			mnt->nfs_server.nconnect = option;
-			break;
-		case Opt_lookupcache:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			token = match_token(string,
-					nfs_lookupcache_tokens, args);
-			kfree(string);
-			switch (token) {
-				case Opt_lookupcache_all:
-					mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
-					break;
-				case Opt_lookupcache_positive:
-					mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
-					mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
-					break;
-				case Opt_lookupcache_none:
-					mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
-					break;
-				default:
-					dfprintk(MOUNT, "NFS:   invalid "
-							"lookupcache argument\n");
-					return 0;
-			}
-			break;
-		case Opt_fscache_uniq:
-			if (nfs_get_option_str(args, &mnt->fscache_uniq))
-				goto out_nomem;
-			mnt->options |= NFS_OPTION_FSCACHE;
-			break;
-		case Opt_local_lock:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-			token = match_token(string, nfs_local_lock_tokens,
-					args);
-			kfree(string);
-			switch (token) {
-			case Opt_local_lock_all:
-				mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
-					       NFS_MOUNT_LOCAL_FCNTL);
-				break;
-			case Opt_local_lock_flock:
-				mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
-				break;
-			case Opt_local_lock_posix:
-				mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
-				break;
-			case Opt_local_lock_none:
-				mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
-						NFS_MOUNT_LOCAL_FCNTL);
-				break;
-			default:
-				dfprintk(MOUNT, "NFS:	invalid	"
-						"local_lock argument\n");
-				return 0;
-			}
-			break;
-
-		/*
-		 * Special options
-		 */
-		case Opt_sloppy:
-			sloppy = 1;
-			dfprintk(MOUNT, "NFS:   relaxing parsing rules\n");
-			break;
-		case Opt_userspace:
-		case Opt_deprecated:
-			dfprintk(MOUNT, "NFS:   ignoring mount option "
-					"'%s'\n", p);
-			break;
-
-		default:
-			invalid_option = 1;
-			dfprintk(MOUNT, "NFS:   unrecognized mount option "
-					"'%s'\n", p);
-		}
-	}
-
-	if (!sloppy && invalid_option)
-		return 0;
-
-	if (mnt->minorversion && mnt->version != 4)
-		goto out_minorversion_mismatch;
-
-	if (mnt->options & NFS_OPTION_MIGRATION &&
-	    (mnt->version != 4 || mnt->minorversion != 0))
-		goto out_migration_misuse;
-
-	/*
-	 * verify that any proto=/mountproto= options match the address
-	 * families in the addr=/mountaddr= options.
-	 */
-	if (protofamily != AF_UNSPEC &&
-	    protofamily != mnt->nfs_server.address.ss_family)
-		goto out_proto_mismatch;
-
-	if (mountfamily != AF_UNSPEC) {
-		if (mnt->mount_server.addrlen) {
-			if (mountfamily != mnt->mount_server.address.ss_family)
-				goto out_mountproto_mismatch;
-		} else {
-			if (mountfamily != mnt->nfs_server.address.ss_family)
-				goto out_mountproto_mismatch;
-		}
-	}
-
-	return 1;
-
-out_mountproto_mismatch:
-	printk(KERN_INFO "NFS: mount server address does not match mountproto= "
-			 "option\n");
-	return 0;
-out_proto_mismatch:
-	printk(KERN_INFO "NFS: server address does not match proto= option\n");
-	return 0;
-out_invalid_address:
-	printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
-	return 0;
-out_invalid_value:
-	printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
-	return 0;
-out_minorversion_mismatch:
-	printk(KERN_INFO "NFS: mount option vers=%u does not support "
-			 "minorversion=%u\n", mnt->version, mnt->minorversion);
-	return 0;
-out_migration_misuse:
-	printk(KERN_INFO
-		"NFS: 'migration' not supported for this NFS version\n");
-	return 0;
-out_nomem:
-	printk(KERN_INFO "NFS: not enough memory to parse option\n");
-	return 0;
-out_security_failure:
-	printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
-	return 0;
-}
-
-/*
- * Ensure that a specified authtype in args->auth_info is supported by
- * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * Ensure that a specified authtype in ctx->auth_info is supported by
+ * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and
  * -EACCES if not.
  */
-static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
-			rpc_authflavor_t *server_authlist, unsigned int count)
+static int nfs_verify_authflavors(struct nfs_fs_context *ctx,
+				  rpc_authflavor_t *server_authlist,
+				  unsigned int count)
 {
 	rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
 	bool found_auth_null = false;
@@ -1734,7 +720,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
 	for (i = 0; i < count; i++) {
 		flavor = server_authlist[i];
 
-		if (nfs_auth_info_match(&args->auth_info, flavor))
+		if (nfs_auth_info_match(&ctx->auth_info, flavor))
 			goto out;
 
 		if (flavor == RPC_AUTH_NULL)
@@ -1742,7 +728,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
 	}
 
 	if (found_auth_null) {
-		flavor = args->auth_info.flavors[0];
+		flavor = ctx->auth_info.flavors[0];
 		goto out;
 	}
 
@@ -1751,8 +737,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
 	return -EACCES;
 
 out:
-	args->selected_flavor = flavor;
-	dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
+	ctx->selected_flavor = flavor;
+	dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor);
 	return 0;
 }
 
@@ -1760,50 +746,51 @@ out:
  * Use the remote server's MOUNT service to request the NFS file handle
  * corresponding to the provided path.
  */
-static int nfs_request_mount(struct nfs_parsed_mount_data *args,
+static int nfs_request_mount(struct fs_context *fc,
 			     struct nfs_fh *root_fh,
 			     rpc_authflavor_t *server_authlist,
 			     unsigned int *server_authlist_len)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct nfs_mount_request request = {
 		.sap		= (struct sockaddr *)
-						&args->mount_server.address,
-		.dirpath	= args->nfs_server.export_path,
-		.protocol	= args->mount_server.protocol,
+						&ctx->mount_server.address,
+		.dirpath	= ctx->nfs_server.export_path,
+		.protocol	= ctx->mount_server.protocol,
 		.fh		= root_fh,
-		.noresvport	= args->flags & NFS_MOUNT_NORESVPORT,
+		.noresvport	= ctx->flags & NFS_MOUNT_NORESVPORT,
 		.auth_flav_len	= server_authlist_len,
 		.auth_flavs	= server_authlist,
-		.net		= args->net,
+		.net		= fc->net_ns,
 	};
 	int status;
 
-	if (args->mount_server.version == 0) {
-		switch (args->version) {
+	if (ctx->mount_server.version == 0) {
+		switch (ctx->version) {
 			default:
-				args->mount_server.version = NFS_MNT3_VERSION;
+				ctx->mount_server.version = NFS_MNT3_VERSION;
 				break;
 			case 2:
-				args->mount_server.version = NFS_MNT_VERSION;
+				ctx->mount_server.version = NFS_MNT_VERSION;
 		}
 	}
-	request.version = args->mount_server.version;
+	request.version = ctx->mount_server.version;
 
-	if (args->mount_server.hostname)
-		request.hostname = args->mount_server.hostname;
+	if (ctx->mount_server.hostname)
+		request.hostname = ctx->mount_server.hostname;
 	else
-		request.hostname = args->nfs_server.hostname;
+		request.hostname = ctx->nfs_server.hostname;
 
 	/*
 	 * Construct the mount server's address.
 	 */
-	if (args->mount_server.address.ss_family == AF_UNSPEC) {
-		memcpy(request.sap, &args->nfs_server.address,
-		       args->nfs_server.addrlen);
-		args->mount_server.addrlen = args->nfs_server.addrlen;
+	if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
+		memcpy(request.sap, &ctx->nfs_server.address,
+		       ctx->nfs_server.addrlen);
+		ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
 	}
-	request.salen = args->mount_server.addrlen;
-	nfs_set_port(request.sap, &args->mount_server.port, 0);
+	request.salen = ctx->mount_server.addrlen;
+	nfs_set_port(request.sap, &ctx->mount_server.port, 0);
 
 	/*
 	 * Now ask the mount server to map our export path
@@ -1819,20 +806,18 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args,
 	return 0;
 }
 
-static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info,
-					struct nfs_subversion *nfs_mod)
+static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	int status;
 	unsigned int i;
 	bool tried_auth_unix = false;
 	bool auth_null_in_list = false;
 	struct nfs_server *server = ERR_PTR(-EACCES);
-	struct nfs_parsed_mount_data *args = mount_info->parsed;
 	rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
 	unsigned int authlist_len = ARRAY_SIZE(authlist);
 
-	status = nfs_request_mount(args, mount_info->mntfh, authlist,
-					&authlist_len);
+	status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
 	if (status)
 		return ERR_PTR(status);
 
@@ -1840,13 +825,13 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 	 * Was a sec= authflavor specified in the options? First, verify
 	 * whether the server supports it, and then just try to use it if so.
 	 */
-	if (args->auth_info.flavor_len > 0) {
-		status = nfs_verify_authflavors(args, authlist, authlist_len);
+	if (ctx->auth_info.flavor_len > 0) {
+		status = nfs_verify_authflavors(ctx, authlist, authlist_len);
 		dfprintk(MOUNT, "NFS: using auth flavor %u\n",
-			 args->selected_flavor);
+			 ctx->selected_flavor);
 		if (status)
 			return ERR_PTR(status);
-		return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+		return ctx->nfs_mod->rpc_ops->create_server(fc);
 	}
 
 	/*
@@ -1872,8 +857,8 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 			/* Fallthrough */
 		}
 		dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
-		args->selected_flavor = flavor;
-		server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+		ctx->selected_flavor = flavor;
+		server = ctx->nfs_mod->rpc_ops->create_server(fc);
 		if (!IS_ERR(server))
 			return server;
 	}
@@ -1888,348 +873,23 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
 
 	/* Last chance! Try AUTH_UNIX */
 	dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
-	args->selected_flavor = RPC_AUTH_UNIX;
-	return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+	ctx->selected_flavor = RPC_AUTH_UNIX;
+	return ctx->nfs_mod->rpc_ops->create_server(fc);
 }
 
-struct dentry *nfs_try_mount(int flags, const char *dev_name,
-			     struct nfs_mount_info *mount_info,
-			     struct nfs_subversion *nfs_mod)
+int nfs_try_get_tree(struct fs_context *fc)
 {
-	struct nfs_server *server;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 
-	if (mount_info->parsed->need_mount)
-		server = nfs_try_mount_request(mount_info, nfs_mod);
+	if (ctx->need_mount)
+		ctx->server = nfs_try_mount_request(fc);
 	else
-		server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
-
-	if (IS_ERR(server))
-		return ERR_CAST(server);
-
-	return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod);
-}
-EXPORT_SYMBOL_GPL(nfs_try_mount);
-
-/*
- * Split "dev_name" into "hostname:export_path".
- *
- * The leftmost colon demarks the split between the server's hostname
- * and the export path.  If the hostname starts with a left square
- * bracket, then it may contain colons.
- *
- * Note: caller frees hostname and export path, even on error.
- */
-static int nfs_parse_devname(const char *dev_name,
-			     char **hostname, size_t maxnamlen,
-			     char **export_path, size_t maxpathlen)
-{
-	size_t len;
-	char *end;
-
-	if (unlikely(!dev_name || !*dev_name)) {
-		dfprintk(MOUNT, "NFS: device name not specified\n");
-		return -EINVAL;
-	}
-
-	/* Is the host name protected with square brakcets? */
-	if (*dev_name == '[') {
-		end = strchr(++dev_name, ']');
-		if (end == NULL || end[1] != ':')
-			goto out_bad_devname;
-
-		len = end - dev_name;
-		end++;
-	} else {
-		char *comma;
-
-		end = strchr(dev_name, ':');
-		if (end == NULL)
-			goto out_bad_devname;
-		len = end - dev_name;
-
-		/* kill possible hostname list: not supported */
-		comma = strchr(dev_name, ',');
-		if (comma != NULL && comma < end)
-			len = comma - dev_name;
-	}
-
-	if (len > maxnamlen)
-		goto out_hostname;
-
-	/* N.B. caller will free nfs_server.hostname in all cases */
-	*hostname = kstrndup(dev_name, len, GFP_KERNEL);
-	if (*hostname == NULL)
-		goto out_nomem;
-	len = strlen(++end);
-	if (len > maxpathlen)
-		goto out_path;
-	*export_path = kstrndup(end, len, GFP_KERNEL);
-	if (!*export_path)
-		goto out_nomem;
-
-	dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
-	return 0;
-
-out_bad_devname:
-	dfprintk(MOUNT, "NFS: device name not in host:path format\n");
-	return -EINVAL;
+		ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc);
 
-out_nomem:
-	dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
-	return -ENOMEM;
-
-out_hostname:
-	dfprintk(MOUNT, "NFS: server hostname too long\n");
-	return -ENAMETOOLONG;
-
-out_path:
-	dfprintk(MOUNT, "NFS: export pathname too long\n");
-	return -ENAMETOOLONG;
+	return nfs_get_tree_common(fc);
 }
+EXPORT_SYMBOL_GPL(nfs_try_get_tree);
 
-/*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
- *
- * For option strings, user space handles the following behaviors:
- *
- * + DNS: mapping server host name to IP address ("addr=" option)
- *
- * + failure mode: how to behave if a mount request can't be handled
- *   immediately ("fg/bg" option)
- *
- * + retry: how often to retry a mount request ("retry=" option)
- *
- * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
- *   mountproto=tcp after mountproto=udp, and so on
- */
-static int nfs23_validate_mount_data(void *options,
-				     struct nfs_parsed_mount_data *args,
-				     struct nfs_fh *mntfh,
-				     const char *dev_name)
-{
-	struct nfs_mount_data *data = (struct nfs_mount_data *)options;
-	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-	int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
-
-	if (data == NULL)
-		goto out_no_data;
-
-	args->version = NFS_DEFAULT_VERSION;
-	switch (data->version) {
-	case 1:
-		data->namlen = 0; /* fall through */
-	case 2:
-		data->bsize = 0; /* fall through */
-	case 3:
-		if (data->flags & NFS_MOUNT_VER3)
-			goto out_no_v3;
-		data->root.size = NFS2_FHSIZE;
-		memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		/* Turn off security negotiation */
-		extra_flags |= NFS_MOUNT_SECFLAVOUR;
-		/* fall through */
-	case 4:
-		if (data->flags & NFS_MOUNT_SECFLAVOUR)
-			goto out_no_sec;
-		/* fall through */
-	case 5:
-		memset(data->context, 0, sizeof(data->context));
-		/* fall through */
-	case 6:
-		if (data->flags & NFS_MOUNT_VER3) {
-			if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
-				goto out_invalid_fh;
-			mntfh->size = data->root.size;
-			args->version = 3;
-		} else {
-			mntfh->size = NFS2_FHSIZE;
-			args->version = 2;
-		}
-
-
-		memcpy(mntfh->data, data->root.data, mntfh->size);
-		if (mntfh->size < sizeof(mntfh->data))
-			memset(mntfh->data + mntfh->size, 0,
-			       sizeof(mntfh->data) - mntfh->size);
-
-		/*
-		 * Translate to nfs_parsed_mount_data, which nfs_fill_super
-		 * can deal with.
-		 */
-		args->flags		= data->flags & NFS_MOUNT_FLAGMASK;
-		args->flags		|= extra_flags;
-		args->rsize		= data->rsize;
-		args->wsize		= data->wsize;
-		args->timeo		= data->timeo;
-		args->retrans		= data->retrans;
-		args->acregmin		= data->acregmin;
-		args->acregmax		= data->acregmax;
-		args->acdirmin		= data->acdirmin;
-		args->acdirmax		= data->acdirmax;
-		args->need_mount	= false;
-
-		memcpy(sap, &data->addr, sizeof(data->addr));
-		args->nfs_server.addrlen = sizeof(data->addr);
-		args->nfs_server.port = ntohs(data->addr.sin_port);
-		if (sap->sa_family != AF_INET ||
-		    !nfs_verify_server_address(sap))
-			goto out_no_address;
-
-		if (!(data->flags & NFS_MOUNT_TCP))
-			args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
-		/* N.B. caller will free nfs_server.hostname in all cases */
-		args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
-		args->namlen		= data->namlen;
-		args->bsize		= data->bsize;
-
-		if (data->flags & NFS_MOUNT_SECFLAVOUR)
-			args->selected_flavor = data->pseudoflavor;
-		else
-			args->selected_flavor = RPC_AUTH_UNIX;
-		if (!args->nfs_server.hostname)
-			goto out_nomem;
-
-		if (!(data->flags & NFS_MOUNT_NONLM))
-			args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
-					 NFS_MOUNT_LOCAL_FCNTL);
-		else
-			args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
-					NFS_MOUNT_LOCAL_FCNTL);
-		/*
-		 * The legacy version 6 binary mount data from userspace has a
-		 * field used only to transport selinux information into the
-		 * the kernel.  To continue to support that functionality we
-		 * have a touch of selinux knowledge here in the NFS code. The
-		 * userspace code converted context=blah to just blah so we are
-		 * converting back to the full string selinux understands.
-		 */
-		if (data->context[0]){
-#ifdef CONFIG_SECURITY_SELINUX
-			int rc;
-			data->context[NFS_MAX_CONTEXT_LEN] = '\0';
-			rc = security_add_mnt_opt("context", data->context,
-					strlen(data->context), &args->lsm_opts);
-			if (rc)
-				return rc;
-#else
-			return -EINVAL;
-#endif
-		}
-
-		break;
-	default:
-		return NFS_TEXT_DATA;
-	}
-
-	return 0;
-
-out_no_data:
-	dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
-	return -EINVAL;
-
-out_no_v3:
-	dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
-		 data->version);
-	return -EINVAL;
-
-out_no_sec:
-	dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
-	return -EINVAL;
-
-out_nomem:
-	dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
-	return -ENOMEM;
-
-out_no_address:
-	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
-	return -EINVAL;
-
-out_invalid_fh:
-	dfprintk(MOUNT, "NFS: invalid root filehandle\n");
-	return -EINVAL;
-}
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
-				   void *options,
-				   struct nfs_parsed_mount_data *args,
-				   struct nfs_fh *mntfh,
-				   const char *dev_name)
-{
-	if (fs_type == &nfs_fs_type)
-		return nfs23_validate_mount_data(options, args, mntfh, dev_name);
-	return nfs4_validate_mount_data(options, args, dev_name);
-}
-#else
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
-				   void *options,
-				   struct nfs_parsed_mount_data *args,
-				   struct nfs_fh *mntfh,
-				   const char *dev_name)
-{
-	return nfs23_validate_mount_data(options, args, mntfh, dev_name);
-}
-#endif
-
-static int nfs_validate_text_mount_data(void *options,
-					struct nfs_parsed_mount_data *args,
-					const char *dev_name)
-{
-	int port = 0;
-	int max_namelen = PAGE_SIZE;
-	int max_pathlen = NFS_MAXPATHLEN;
-	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-
-	if (nfs_parse_mount_options((char *)options, args) == 0)
-		return -EINVAL;
-
-	if (!nfs_verify_server_address(sap))
-		goto out_no_address;
-
-	if (args->version == 4) {
-#if IS_ENABLED(CONFIG_NFS_V4)
-		if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
-			port = NFS_RDMA_PORT;
-		else
-			port = NFS_PORT;
-		max_namelen = NFS4_MAXNAMLEN;
-		max_pathlen = NFS4_MAXPATHLEN;
-		nfs_validate_transport_protocol(args);
-		if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
-			goto out_invalid_transport_udp;
-		nfs4_validate_mount_flags(args);
-#else
-		goto out_v4_not_compiled;
-#endif /* CONFIG_NFS_V4 */
-	} else {
-		nfs_set_mount_transport_protocol(args);
-		if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
-			port = NFS_RDMA_PORT;
-	}
-
-	nfs_set_port(sap, &args->nfs_server.port, port);
-
-	return nfs_parse_devname(dev_name,
-				   &args->nfs_server.hostname,
-				   max_namelen,
-				   &args->nfs_server.export_path,
-				   max_pathlen);
-
-#if !IS_ENABLED(CONFIG_NFS_V4)
-out_v4_not_compiled:
-	dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
-	return -EPROTONOSUPPORT;
-#else
-out_invalid_transport_udp:
-	dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
-	return -EINVAL;
-#endif /* !CONFIG_NFS_V4 */
-
-out_no_address:
-	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
-	return -EINVAL;
-}
 
 #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
 		| NFS_MOUNT_SECURE \
@@ -2246,39 +906,35 @@ out_no_address:
 
 static int
 nfs_compare_remount_data(struct nfs_server *nfss,
-			 struct nfs_parsed_mount_data *data)
-{
-	if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
-	    data->rsize != nfss->rsize ||
-	    data->wsize != nfss->wsize ||
-	    data->version != nfss->nfs_client->rpc_ops->version ||
-	    data->minorversion != nfss->nfs_client->cl_minorversion ||
-	    data->retrans != nfss->client->cl_timeout->to_retries ||
-	    !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) ||
-	    data->acregmin != nfss->acregmin / HZ ||
-	    data->acregmax != nfss->acregmax / HZ ||
-	    data->acdirmin != nfss->acdirmin / HZ ||
-	    data->acdirmax != nfss->acdirmax / HZ ||
-	    data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
-	    (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
-	    data->nfs_server.port != nfss->port ||
-	    data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
-	    !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
+			 struct nfs_fs_context *ctx)
+{
+	if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
+	    ctx->rsize != nfss->rsize ||
+	    ctx->wsize != nfss->wsize ||
+	    ctx->version != nfss->nfs_client->rpc_ops->version ||
+	    ctx->minorversion != nfss->nfs_client->cl_minorversion ||
+	    ctx->retrans != nfss->client->cl_timeout->to_retries ||
+	    !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) ||
+	    ctx->acregmin != nfss->acregmin / HZ ||
+	    ctx->acregmax != nfss->acregmax / HZ ||
+	    ctx->acdirmin != nfss->acdirmin / HZ ||
+	    ctx->acdirmax != nfss->acdirmax / HZ ||
+	    ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
+	    (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
+	    ctx->nfs_server.port != nfss->port ||
+	    ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
+	    !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address,
 			  (struct sockaddr *)&nfss->nfs_client->cl_addr))
 		return -EINVAL;
 
 	return 0;
 }
 
-int
-nfs_remount(struct super_block *sb, int *flags, char *raw_data)
+int nfs_reconfigure(struct fs_context *fc)
 {
-	int error;
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct super_block *sb = fc->root->d_sb;
 	struct nfs_server *nfss = sb->s_fs_info;
-	struct nfs_parsed_mount_data *data;
-	struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data;
-	struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
-	u32 nfsvers = nfss->nfs_client->rpc_ops->version;
 
 	sync_filesystem(sb);
 
@@ -2288,92 +944,38 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	 * ones were explicitly specified. Fall back to legacy behavior and
 	 * just return success.
 	 */
-	if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
-	    (nfsvers <= 3 && (!options || (options->version >= 1 &&
-					   options->version <= 6))))
+	if (ctx->skip_reconfig_option_check)
 		return 0;
 
-	data = nfs_alloc_parsed_mount_data();
-	if (data == NULL)
-		return -ENOMEM;
-
-	/* fill out struct with values from existing mount */
-	data->flags = nfss->flags;
-	data->rsize = nfss->rsize;
-	data->wsize = nfss->wsize;
-	data->retrans = nfss->client->cl_timeout->to_retries;
-	data->selected_flavor = nfss->client->cl_auth->au_flavor;
-	data->acregmin = nfss->acregmin / HZ;
-	data->acregmax = nfss->acregmax / HZ;
-	data->acdirmin = nfss->acdirmin / HZ;
-	data->acdirmax = nfss->acdirmax / HZ;
-	data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
-	data->nfs_server.port = nfss->port;
-	data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
-	data->version = nfsvers;
-	data->minorversion = nfss->nfs_client->cl_minorversion;
-	data->net = current->nsproxy->net_ns;
-	memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
-		data->nfs_server.addrlen);
-
-	/* overwrite those values with any that were specified */
-	error = -EINVAL;
-	if (!nfs_parse_mount_options((char *)options, data))
-		goto out;
-
 	/*
 	 * noac is a special case. It implies -o sync, but that's not
-	 * necessarily reflected in the mtab options. do_remount_sb
+	 * necessarily reflected in the mtab options. reconfigure_super
 	 * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the
 	 * remount options, so we have to explicitly reset it.
 	 */
-	if (data->flags & NFS_MOUNT_NOAC)
-		*flags |= SB_SYNCHRONOUS;
+	if (ctx->flags & NFS_MOUNT_NOAC) {
+		fc->sb_flags |= SB_SYNCHRONOUS;
+		fc->sb_flags_mask |= SB_SYNCHRONOUS;
+	}
 
 	/* compare new mount options with old ones */
-	error = nfs_compare_remount_data(nfss, data);
-	if (!error)
-		error = security_sb_remount(sb, data->lsm_opts);
-out:
-	nfs_free_parsed_mount_data(data);
-	return error;
-}
-EXPORT_SYMBOL_GPL(nfs_remount);
-
-/*
- * Initialise the common bits of the superblock
- */
-static void nfs_initialise_sb(struct super_block *sb)
-{
-	struct nfs_server *server = NFS_SB(sb);
-
-	sb->s_magic = NFS_SUPER_MAGIC;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id),
-		 "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-						 &sb->s_blocksize_bits);
-
-	nfs_super_set_maxbytes(sb, server->maxfilesize);
+	return nfs_compare_remount_data(nfss, ctx);
 }
+EXPORT_SYMBOL_GPL(nfs_reconfigure);
 
 /*
- * Finish setting up an NFS2/3 superblock
+ * Finish setting up an NFS superblock
  */
-void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
 {
-	struct nfs_parsed_mount_data *data = mount_info->parsed;
 	struct nfs_server *server = NFS_SB(sb);
 
 	sb->s_blocksize_bits = 0;
 	sb->s_blocksize = 0;
 	sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr;
 	sb->s_op = server->nfs_client->cl_nfs_mod->sops;
-	if (data && data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+	if (ctx && ctx->bsize)
+		sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
 
 	if (server->nfs_client->rpc_ops->version != 2) {
 		/* The VFS shouldn't apply the umask to mode bits. We will do
@@ -2393,53 +995,27 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		sb->s_time_max = S64_MAX;
 	}
 
- 	nfs_initialise_sb(sb);
-}
-EXPORT_SYMBOL_GPL(nfs_fill_super);
-
-/*
- * Finish setting up a cloned NFS2/3/4 superblock
- */
-static void nfs_clone_super(struct super_block *sb,
-			    struct nfs_mount_info *mount_info)
-{
-	const struct super_block *old_sb = mount_info->cloned->sb;
-	struct nfs_server *server = NFS_SB(sb);
-
-	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
-	sb->s_blocksize = old_sb->s_blocksize;
-	sb->s_maxbytes = old_sb->s_maxbytes;
-	sb->s_xattr = old_sb->s_xattr;
-	sb->s_op = old_sb->s_op;
-	sb->s_export_op = old_sb->s_export_op;
+	sb->s_magic = NFS_SUPER_MAGIC;
 
-	if (server->nfs_client->rpc_ops->version != 2) {
-		/* The VFS shouldn't apply the umask to mode bits. We will do
-		 * so ourselves when necessary.
-		 */
-		sb->s_flags |= SB_POSIXACL;
-		sb->s_time_gran = 1;
-	} else
-		sb->s_time_gran = 1000;
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
-	if (server->nfs_client->rpc_ops->version != 4) {
-		sb->s_time_min = 0;
-		sb->s_time_max = U32_MAX;
-	} else {
-		sb->s_time_min = S64_MIN;
-		sb->s_time_max = S64_MAX;
-	}
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
 
- 	nfs_initialise_sb(sb);
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
-static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b,
+				     const struct fs_context *fc)
 {
 	const struct nfs_server *a = s->s_fs_info;
 	const struct rpc_clnt *clnt_a = a->client;
 	const struct rpc_clnt *clnt_b = b->client;
 
-	if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+	if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK))
 		goto Ebusy;
 	if (a->nfs_client != b->nfs_client)
 		goto Ebusy;
@@ -2464,19 +1040,11 @@ Ebusy:
 	return 0;
 }
 
-struct nfs_sb_mountdata {
-	struct nfs_server *server;
-	int mntflags;
-};
-
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, struct fs_context *fc)
 {
-	struct nfs_sb_mountdata *sb_mntdata = data;
-	struct nfs_server *server = sb_mntdata->server;
+	struct nfs_server *server = fc->s_fs_info;
 	int ret;
 
-	s->s_flags = sb_mntdata->mntflags;
-	s->s_fs_info = server;
 	s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
 	ret = set_anon_super(s, server);
 	if (ret == 0)
@@ -2541,11 +1109,9 @@ static int nfs_compare_userns(const struct nfs_server *old,
 	return 1;
 }
 
-static int nfs_compare_super(struct super_block *sb, void *data)
+static int nfs_compare_super(struct super_block *sb, struct fs_context *fc)
 {
-	struct nfs_sb_mountdata *sb_mntdata = data;
-	struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
-	int mntflags = sb_mntdata->mntflags;
+	struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb);
 
 	if (!nfs_compare_super_address(old, server))
 		return 0;
@@ -2556,13 +1122,12 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 		return 0;
 	if (!nfs_compare_userns(old, server))
 		return 0;
-	return nfs_compare_mount_options(sb, server, mntflags);
+	return nfs_compare_mount_options(sb, server, fc);
 }
 
 #ifdef CONFIG_NFS_FSCACHE
 static void nfs_get_cache_cookie(struct super_block *sb,
-				 struct nfs_parsed_mount_data *parsed,
-				 struct nfs_clone_mount *cloned)
+				 struct nfs_fs_context *ctx)
 {
 	struct nfs_server *nfss = NFS_SB(sb);
 	char *uniq = NULL;
@@ -2571,80 +1136,36 @@ static void nfs_get_cache_cookie(struct super_block *sb,
 	nfss->fscache_key = NULL;
 	nfss->fscache = NULL;
 
-	if (parsed) {
-		if (!(parsed->options & NFS_OPTION_FSCACHE))
-			return;
-		if (parsed->fscache_uniq) {
-			uniq = parsed->fscache_uniq;
-			ulen = strlen(parsed->fscache_uniq);
-		}
-	} else if (cloned) {
-		struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+	if (!ctx)
+		return;
+
+	if (ctx->clone_data.sb) {
+		struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb);
 		if (!(mnt_s->options & NFS_OPTION_FSCACHE))
 			return;
 		if (mnt_s->fscache_key) {
 			uniq = mnt_s->fscache_key->key.uniquifier;
 			ulen = mnt_s->fscache_key->key.uniq_len;
 		}
-	} else
+	} else {
+		if (!(ctx->options & NFS_OPTION_FSCACHE))
+			return;
+		if (ctx->fscache_uniq) {
+			uniq = ctx->fscache_uniq;
+			ulen = strlen(ctx->fscache_uniq);
+		}
 		return;
+	}
 
 	nfs_fscache_get_super_cookie(sb, uniq, ulen);
 }
 #else
 static void nfs_get_cache_cookie(struct super_block *sb,
-				 struct nfs_parsed_mount_data *parsed,
-				 struct nfs_clone_mount *cloned)
+				 struct nfs_fs_context *ctx)
 {
 }
 #endif
 
-int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
-			struct nfs_mount_info *mount_info)
-{
-	int error;
-	unsigned long kflags = 0, kflags_out = 0;
-	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
-		kflags |= SECURITY_LSM_NATIVE_LABELS;
-
-	error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts,
-						kflags, &kflags_out);
-	if (error)
-		goto err;
-
-	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
-		!(kflags_out & SECURITY_LSM_NATIVE_LABELS))
-		NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
-err:
-	return error;
-}
-EXPORT_SYMBOL_GPL(nfs_set_sb_security);
-
-int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
-			  struct nfs_mount_info *mount_info)
-{
-	int error;
-	unsigned long kflags = 0, kflags_out = 0;
-
-	/* clone any lsm security options from the parent to the new sb */
-	if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
-		return -ESTALE;
-
-	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
-		kflags |= SECURITY_LSM_NATIVE_LABELS;
-
-	error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags,
-			&kflags_out);
-	if (error)
-		return error;
-
-	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
-		!(kflags_out & SECURITY_LSM_NATIVE_LABELS))
-		NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
-
 static void nfs_set_readahead(struct backing_dev_info *bdi,
 			      unsigned long iomax_pages)
 {
@@ -2652,35 +1173,40 @@ static void nfs_set_readahead(struct backing_dev_info *bdi,
 	bdi->io_pages = iomax_pages;
 }
 
-struct dentry *nfs_fs_mount_common(struct nfs_server *server,
-				   int flags, const char *dev_name,
-				   struct nfs_mount_info *mount_info,
-				   struct nfs_subversion *nfs_mod)
+int nfs_get_tree_common(struct fs_context *fc)
 {
+	struct nfs_fs_context *ctx = nfs_fc2context(fc);
 	struct super_block *s;
-	struct dentry *mntroot = ERR_PTR(-ENOMEM);
-	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
-	struct nfs_sb_mountdata sb_mntdata = {
-		.mntflags = flags,
-		.server = server,
-	};
+	int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super;
+	struct nfs_server *server = ctx->server;
+	unsigned long kflags = 0, kflags_out = 0;
 	int error;
 
+	ctx->server = NULL;
+	if (IS_ERR(server))
+		return PTR_ERR(server);
+
 	if (server->flags & NFS_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* -o noac implies -o sync */
 	if (server->flags & NFS_MOUNT_NOAC)
-		sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+		fc->sb_flags |= SB_SYNCHRONOUS;
 
-	if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
-		if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS)
-			sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+	if (ctx->clone_data.sb)
+		if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS)
+			fc->sb_flags |= SB_SYNCHRONOUS;
+
+	if (server->caps & NFS_CAP_SECURITY_LABEL)
+		fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
+	fc->s_fs_info = server;
+	s = sget_fc(fc, compare_super, nfs_set_super);
+	fc->s_fs_info = NULL;
 	if (IS_ERR(s)) {
-		mntroot = ERR_CAST(s);
+		error = PTR_ERR(s);
+		nfs_errorf(fc, "NFS: Couldn't get superblock");
 		goto out_err_nosb;
 	}
 
@@ -2690,88 +1216,66 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
 	} else {
 		error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev),
 					     MINOR(server->s_dev));
-		if (error) {
-			mntroot = ERR_PTR(error);
+		if (error)
 			goto error_splat_super;
-		}
 		nfs_set_readahead(s->s_bdi, server->rpages);
 		server->super = s;
 	}
 
 	if (!s->s_root) {
+		unsigned bsize = ctx->clone_data.inherited_bsize;
 		/* initial superblock/root creation */
-		mount_info->fill_super(s, mount_info);
-		nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
-		if (!(server->flags & NFS_MOUNT_UNSHARED))
-			s->s_iflags |= SB_I_MULTIROOT;
+		nfs_fill_super(s, ctx);
+		if (bsize) {
+			s->s_blocksize_bits = bsize;
+			s->s_blocksize = 1U << bsize;
+		}
+		nfs_get_cache_cookie(s, ctx);
 	}
 
-	mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
-	if (IS_ERR(mntroot))
+	error = nfs_get_root(s, fc);
+	if (error < 0) {
+		nfs_errorf(fc, "NFS: Couldn't get root dentry");
 		goto error_splat_super;
+	}
 
-	error = mount_info->set_security(s, mntroot, mount_info);
+	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+		kflags |= SECURITY_LSM_NATIVE_LABELS;
+	if (ctx->clone_data.sb) {
+		if (d_inode(fc->root)->i_fop != &nfs_dir_operations) {
+			error = -ESTALE;
+			goto error_splat_root;
+		}
+		/* clone any lsm security options from the parent to the new sb */
+		error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags,
+				&kflags_out);
+	} else {
+		error = security_sb_set_mnt_opts(s, fc->security,
+							kflags, &kflags_out);
+	}
 	if (error)
 		goto error_splat_root;
+	if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+		!(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+		NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
 
 	s->s_flags |= SB_ACTIVE;
+	error = 0;
 
 out:
-	return mntroot;
+	return error;
 
 out_err_nosb:
 	nfs_free_server(server);
 	goto out;
 
 error_splat_root:
-	dput(mntroot);
-	mntroot = ERR_PTR(error);
+	dput(fc->root);
+	fc->root = NULL;
 error_splat_super:
 	deactivate_locked_super(s);
 	goto out;
 }
-EXPORT_SYMBOL_GPL(nfs_fs_mount_common);
-
-struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data)
-{
-	struct nfs_mount_info mount_info = {
-		.fill_super = nfs_fill_super,
-		.set_security = nfs_set_sb_security,
-	};
-	struct dentry *mntroot = ERR_PTR(-ENOMEM);
-	struct nfs_subversion *nfs_mod;
-	int error;
-
-	mount_info.parsed = nfs_alloc_parsed_mount_data();
-	mount_info.mntfh = nfs_alloc_fhandle();
-	if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
-		goto out;
-
-	/* Validate the mount data */
-	error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
-	if (error == NFS_TEXT_DATA)
-		error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
-	if (error < 0) {
-		mntroot = ERR_PTR(error);
-		goto out;
-	}
-
-	nfs_mod = get_nfs_version(mount_info.parsed->version);
-	if (IS_ERR(nfs_mod)) {
-		mntroot = ERR_CAST(nfs_mod);
-		goto out;
-	}
-
-	mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod);
-
-	put_nfs_version(nfs_mod);
-out:
-	nfs_free_parsed_mount_data(mount_info.parsed);
-	nfs_free_fhandle(mount_info.mntfh);
-	return mntroot;
-}
-EXPORT_SYMBOL_GPL(nfs_fs_mount);
 
 /*
  * Destroy an NFS2/3 superblock
@@ -2790,150 +1294,8 @@ void nfs_kill_super(struct super_block *s)
 }
 EXPORT_SYMBOL_GPL(nfs_kill_super);
 
-/*
- * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
- */
-static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
-		const char *dev_name, void *raw_data)
-{
-	struct nfs_clone_mount *data = raw_data;
-	struct nfs_mount_info mount_info = {
-		.fill_super = nfs_clone_super,
-		.set_security = nfs_clone_sb_security,
-		.cloned = data,
-	};
-	struct nfs_server *server;
-	struct dentry *mntroot = ERR_PTR(-ENOMEM);
-	struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
-
-	dprintk("--> nfs_xdev_mount()\n");
-
-	mount_info.mntfh = mount_info.cloned->fh;
-
-	/* create a new volume representation */
-	server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-
-	if (IS_ERR(server))
-		mntroot = ERR_CAST(server);
-	else
-		mntroot = nfs_fs_mount_common(server, flags,
-				dev_name, &mount_info, nfs_mod);
-
-	dprintk("<-- nfs_xdev_mount() = %ld\n",
-			IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
-	return mntroot;
-}
-
 #if IS_ENABLED(CONFIG_NFS_V4)
 
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
-{
-	args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
-			 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
-}
-
-/*
- * Validate NFSv4 mount options
- */
-static int nfs4_validate_mount_data(void *options,
-				    struct nfs_parsed_mount_data *args,
-				    const char *dev_name)
-{
-	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-	struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
-	char *c;
-
-	if (data == NULL)
-		goto out_no_data;
-
-	args->version = 4;
-
-	switch (data->version) {
-	case 1:
-		if (data->host_addrlen > sizeof(args->nfs_server.address))
-			goto out_no_address;
-		if (data->host_addrlen == 0)
-			goto out_no_address;
-		args->nfs_server.addrlen = data->host_addrlen;
-		if (copy_from_user(sap, data->host_addr, data->host_addrlen))
-			return -EFAULT;
-		if (!nfs_verify_server_address(sap))
-			goto out_no_address;
-		args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
-
-		if (data->auth_flavourlen) {
-			rpc_authflavor_t pseudoflavor;
-			if (data->auth_flavourlen > 1)
-				goto out_inval_auth;
-			if (copy_from_user(&pseudoflavor,
-					   data->auth_flavours,
-					   sizeof(pseudoflavor)))
-				return -EFAULT;
-			args->selected_flavor = pseudoflavor;
-		} else
-			args->selected_flavor = RPC_AUTH_UNIX;
-
-		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
-		if (IS_ERR(c))
-			return PTR_ERR(c);
-		args->nfs_server.hostname = c;
-
-		c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
-		if (IS_ERR(c))
-			return PTR_ERR(c);
-		args->nfs_server.export_path = c;
-		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
-
-		c = strndup_user(data->client_addr.data, 16);
-		if (IS_ERR(c))
-			return PTR_ERR(c);
-		args->client_address = c;
-
-		/*
-		 * Translate to nfs_parsed_mount_data, which nfs4_fill_super
-		 * can deal with.
-		 */
-
-		args->flags	= data->flags & NFS4_MOUNT_FLAGMASK;
-		args->rsize	= data->rsize;
-		args->wsize	= data->wsize;
-		args->timeo	= data->timeo;
-		args->retrans	= data->retrans;
-		args->acregmin	= data->acregmin;
-		args->acregmax	= data->acregmax;
-		args->acdirmin	= data->acdirmin;
-		args->acdirmax	= data->acdirmax;
-		args->nfs_server.protocol = data->proto;
-		nfs_validate_transport_protocol(args);
-		if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
-			goto out_invalid_transport_udp;
-
-		break;
-	default:
-		return NFS_TEXT_DATA;
-	}
-
-	return 0;
-
-out_no_data:
-	dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
-	return -EINVAL;
-
-out_inval_auth:
-	dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
-		 data->auth_flavourlen);
-	return -EINVAL;
-
-out_no_address:
-	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
-	return -EINVAL;
-
-out_invalid_transport_udp:
-	dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
-	return -EINVAL;
-}
-
 /*
  * NFS v4 module parameters need to stay in the
  * NFS client for backwards compatibility
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 52cab65f91cf..c478b772cc49 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -243,13 +243,24 @@ out:
 /* A writeback failed: mark the page as bad, and invalidate the page cache */
 static void nfs_set_pageerror(struct address_space *mapping)
 {
+	struct inode *inode = mapping->host;
+
 	nfs_zap_mapping(mapping->host, mapping);
+	/* Force file size revalidation */
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED |
+					NFS_INO_REVAL_PAGECACHE |
+					NFS_INO_INVALID_SIZE;
+	spin_unlock(&inode->i_lock);
 }
 
 static void nfs_mapping_set_error(struct page *page, int error)
 {
+	struct address_space *mapping = page_file_mapping(page);
+
 	SetPageError(page);
-	mapping_set_error(page_file_mapping(page), error);
+	mapping_set_error(mapping, error);
+	nfs_set_pageerror(mapping);
 }
 
 /*
@@ -592,7 +603,7 @@ release_request:
 
 static void nfs_write_error(struct nfs_page *req, int error)
 {
-	nfs_set_pageerror(page_file_mapping(req->wb_page));
+	trace_nfs_write_error(req, error);
 	nfs_mapping_set_error(req->wb_page, error);
 	nfs_inode_remove_request(req);
 	nfs_end_page_writeback(req);
@@ -998,7 +1009,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 		nfs_list_remove_request(req);
 		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
 		    (hdr->good_bytes < bytes)) {
-			nfs_set_pageerror(page_file_mapping(req->wb_page));
+			trace_nfs_comp_error(req, hdr->error);
 			nfs_mapping_set_error(req->wb_page, hdr->error);
 			goto remove_req;
 		}
@@ -1403,8 +1414,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
 
 	task_setup_data->priority = priority;
 	rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
-	trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
-				 hdr->args.stable);
+	trace_nfs_initiate_write(hdr);
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1568,8 +1578,7 @@ static int nfs_writeback_done(struct rpc_task *task,
 		return status;
 
 	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
-	trace_nfs_writeback_done(inode, task->tk_status,
-				 hdr->args.offset, hdr->res.verf);
+	trace_nfs_writeback_done(task, hdr);
 
 	if (hdr->res.verf->committed < hdr->args.stable &&
 	    task->tk_status >= 0) {
@@ -1649,6 +1658,8 @@ static void nfs_writeback_result(struct rpc_task *task,
 			 */
 			argp->stable = NFS_FILE_SYNC;
 		}
+		resp->count = 0;
+		resp->verf->committed = 0;
 		rpc_restart_call_prepare(task);
 	}
 }
@@ -1824,11 +1835,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 
 	/* Call the NFS version-specific code */
 	NFS_PROTO(data->inode)->commit_done(task, data);
-	trace_nfs_commit_done(data);
+	trace_nfs_commit_done(task, data);
 }
 
 static void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
+	const struct nfs_writeverf *verf = data->res.verf;
 	struct nfs_page	*req;
 	int status = data->task.tk_status;
 	struct nfs_commit_info cinfo;
@@ -1847,6 +1859,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 			(long long)req_offset(req));
 		if (status < 0) {
 			if (req->wb_page) {
+				trace_nfs_commit_error(req, status);
 				nfs_mapping_set_error(req->wb_page, status);
 				nfs_inode_remove_request(req);
 			}
@@ -1856,7 +1869,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 
 		/* Okay, COMMIT succeeded, apparently. Check the verifier
 		 * returned by the server against all stored verfs. */
-		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
+		if (verf->committed > NFS_UNSTABLE &&
+		    !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) {
 			/* We have a match */
 			if (req->wb_page)
 				nfs_inode_remove_request(req);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f2f81561ebb6..f368f3215f88 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -134,6 +134,16 @@ config NFSD_FLEXFILELAYOUT
 
 	  If unsure, say N.
 
+config NFSD_V4_2_INTER_SSC
+	bool "NFSv4.2 inter server to server COPY"
+	depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+	help
+	  This option enables support for NFSv4.2 inter server to
+	  server copy where the destination server calls the NFSv4.2
+	  client to read the data to copy from the source server.
+
+	  If unsure, say N.
+
 config NFSD_V4_SECURITY_LABEL
 	bool "Provide Security Label support for NFSv4 server"
 	depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 32a9bf22ac08..22e77ede9f14 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -27,7 +27,6 @@
 #define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
 #define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
 
-#define NFSD_FILE_LRU_RESCAN		     (0)
 #define NFSD_FILE_SHUTDOWN		     (1)
 #define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
 #define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
@@ -44,6 +43,17 @@ struct nfsd_fcache_bucket {
 
 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
 
+struct nfsd_fcache_disposal {
+	struct list_head list;
+	struct work_struct work;
+	struct net *net;
+	spinlock_t lock;
+	struct list_head freeme;
+	struct rcu_head rcu;
+};
+
+static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+
 static struct kmem_cache		*nfsd_file_slab;
 static struct kmem_cache		*nfsd_file_mark_slab;
 static struct nfsd_fcache_bucket	*nfsd_file_hashtbl;
@@ -52,32 +62,21 @@ static long				nfsd_file_lru_flags;
 static struct fsnotify_group		*nfsd_file_fsnotify_group;
 static atomic_long_t			nfsd_filecache_count;
 static struct delayed_work		nfsd_filecache_laundrette;
+static DEFINE_SPINLOCK(laundrette_lock);
+static LIST_HEAD(laundrettes);
 
-enum nfsd_file_laundrette_ctl {
-	NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
-	NFSD_FILE_LAUNDRETTE_MAY_FLUSH
-};
+static void nfsd_file_gc(void);
 
 static void
-nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+nfsd_file_schedule_laundrette(void)
 {
 	long count = atomic_long_read(&nfsd_filecache_count);
 
 	if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
 		return;
 
-	/* Be more aggressive about scanning if over the threshold */
-	if (count > NFSD_FILE_LRU_THRESHOLD)
-		mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
-	else
-		schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
-
-	if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
-		return;
-
-	/* ...and don't delay flushing if we're out of control */
-	if (count >= NFSD_FILE_LRU_LIMIT)
-		flush_delayed_work(&nfsd_filecache_laundrette);
+	queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+			NFSD_LAUNDRETTE_DELAY);
 }
 
 static void
@@ -101,7 +100,7 @@ nfsd_file_mark_free(struct fsnotify_mark *mark)
 static struct nfsd_file_mark *
 nfsd_file_mark_get(struct nfsd_file_mark *nfm)
 {
-	if (!atomic_inc_not_zero(&nfm->nfm_ref))
+	if (!refcount_inc_not_zero(&nfm->nfm_ref))
 		return NULL;
 	return nfm;
 }
@@ -109,8 +108,7 @@ nfsd_file_mark_get(struct nfsd_file_mark *nfm)
 static void
 nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 {
-	if (atomic_dec_and_test(&nfm->nfm_ref)) {
-
+	if (refcount_dec_and_test(&nfm->nfm_ref)) {
 		fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
 		fsnotify_put_mark(&nfm->nfm_mark);
 	}
@@ -133,9 +131,13 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
 						 struct nfsd_file_mark,
 						 nfm_mark));
 			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
-			fsnotify_put_mark(mark);
-			if (likely(nfm))
+			if (nfm) {
+				fsnotify_put_mark(mark);
 				break;
+			}
+			/* Avoid soft lockup race with nfsd_file_mark_put() */
+			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+			fsnotify_put_mark(mark);
 		} else
 			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 
@@ -145,7 +147,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
 			return NULL;
 		fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
 		new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
-		atomic_set(&new->nfm_ref, 1);
+		refcount_set(&new->nfm_ref, 1);
 
 		err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
 
@@ -183,7 +185,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
 		nf->nf_flags = 0;
 		nf->nf_inode = inode;
 		nf->nf_hashval = hashval;
-		atomic_set(&nf->nf_ref, 1);
+		refcount_set(&nf->nf_ref, 1);
 		nf->nf_may = may & NFSD_FILE_MAY_MASK;
 		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
 			if (may & NFSD_MAY_WRITE)
@@ -192,6 +194,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
 				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
 		}
 		nf->nf_mark = NULL;
+		init_rwsem(&nf->nf_rwsem);
 		trace_nfsd_file_alloc(nf);
 	}
 	return nf;
@@ -238,13 +241,6 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
 	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
 }
 
-static bool
-nfsd_file_in_use(struct nfsd_file *nf)
-{
-	return nfsd_file_check_writeback(nf) ||
-			nfsd_file_check_write_error(nf);
-}
-
 static void
 nfsd_file_do_unhash(struct nfsd_file *nf)
 {
@@ -256,8 +252,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
 		nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
 	--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
 	hlist_del_rcu(&nf->nf_node);
-	if (!list_empty(&nf->nf_lru))
-		list_lru_del(&nfsd_file_lru, &nf->nf_lru);
 	atomic_long_dec(&nfsd_filecache_count);
 }
 
@@ -266,6 +260,8 @@ nfsd_file_unhash(struct nfsd_file *nf)
 {
 	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 		nfsd_file_do_unhash(nf);
+		if (!list_empty(&nf->nf_lru))
+			list_lru_del(&nfsd_file_lru, &nf->nf_lru);
 		return true;
 	}
 	return false;
@@ -283,42 +279,48 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp
 	if (!nfsd_file_unhash(nf))
 		return false;
 	/* keep final reference for nfsd_file_lru_dispose */
-	if (atomic_add_unless(&nf->nf_ref, -1, 1))
+	if (refcount_dec_not_one(&nf->nf_ref))
 		return true;
 
 	list_add(&nf->nf_lru, dispose);
 	return true;
 }
 
-static int
+static void
 nfsd_file_put_noref(struct nfsd_file *nf)
 {
-	int count;
 	trace_nfsd_file_put(nf);
 
-	count = atomic_dec_return(&nf->nf_ref);
-	if (!count) {
+	if (refcount_dec_and_test(&nf->nf_ref)) {
 		WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
 		nfsd_file_free(nf);
 	}
-	return count;
 }
 
 void
 nfsd_file_put(struct nfsd_file *nf)
 {
-	bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
-	bool unused = !nfsd_file_in_use(nf);
+	bool is_hashed;
 
 	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
-	if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
-		nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+	if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+		nfsd_file_put_noref(nf);
+		return;
+	}
+
+	filemap_flush(nf->nf_file->f_mapping);
+	is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+	nfsd_file_put_noref(nf);
+	if (is_hashed)
+		nfsd_file_schedule_laundrette();
+	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+		nfsd_file_gc();
 }
 
 struct nfsd_file *
 nfsd_file_get(struct nfsd_file *nf)
 {
-	if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+	if (likely(refcount_inc_not_zero(&nf->nf_ref)))
 		return nf;
 	return NULL;
 }
@@ -344,7 +346,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
 	while(!list_empty(dispose)) {
 		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 		list_del(&nf->nf_lru);
-		if (!atomic_dec_and_test(&nf->nf_ref))
+		if (!refcount_dec_and_test(&nf->nf_ref))
 			continue;
 		if (nfsd_file_free(nf))
 			flush = true;
@@ -353,6 +355,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
 		flush_delayed_fput();
 }
 
+static void
+nfsd_file_list_remove_disposal(struct list_head *dst,
+		struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&l->lock);
+	list_splice_init(&l->freeme, dst);
+	spin_unlock(&l->lock);
+}
+
+static void
+nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(l, &laundrettes, list) {
+		if (l->net == net) {
+			spin_lock(&l->lock);
+			list_splice_tail_init(files, &l->freeme);
+			spin_unlock(&l->lock);
+			queue_work(nfsd_filecache_wq, &l->work);
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void
+nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+		struct net *net)
+{
+	struct nfsd_file *nf, *tmp;
+
+	list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+		if (nf->nf_net == net)
+			list_move_tail(&nf->nf_lru, dst);
+	}
+}
+
+static void
+nfsd_file_dispose_list_delayed(struct list_head *dispose)
+{
+	LIST_HEAD(list);
+	struct nfsd_file *nf;
+
+	while(!list_empty(dispose)) {
+		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+		nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+		nfsd_file_list_add_disposal(&list, nf->nf_net);
+	}
+}
+
 /*
  * Note this can deadlock with nfsd_file_cache_purge.
  */
@@ -375,7 +429,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 	 * counter. Here we check the counter and then test and clear the flag.
 	 * That order is deliberate to ensure that we can do this locklessly.
 	 */
-	if (atomic_read(&nf->nf_ref) > 1)
+	if (refcount_read(&nf->nf_ref) > 1)
 		goto out_skip;
 
 	/*
@@ -386,31 +440,51 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 		goto out_skip;
 
 	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
-		goto out_rescan;
+		goto out_skip;
 
 	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
 		goto out_skip;
 
 	list_lru_isolate_move(lru, &nf->nf_lru, head);
 	return LRU_REMOVED;
-out_rescan:
-	set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
 out_skip:
 	return LRU_SKIP;
 }
 
-static void
-nfsd_file_lru_dispose(struct list_head *head)
+static unsigned long
+nfsd_file_lru_walk_list(struct shrink_control *sc)
 {
-	while(!list_empty(head)) {
-		struct nfsd_file *nf = list_first_entry(head,
-				struct nfsd_file, nf_lru);
-		list_del_init(&nf->nf_lru);
+	LIST_HEAD(head);
+	struct nfsd_file *nf;
+	unsigned long ret;
+
+	if (sc)
+		ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+				nfsd_file_lru_cb, &head);
+	else
+		ret = list_lru_walk(&nfsd_file_lru,
+				nfsd_file_lru_cb,
+				&head, LONG_MAX);
+	list_for_each_entry(nf, &head, nf_lru) {
 		spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 		nfsd_file_do_unhash(nf);
 		spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
-		nfsd_file_put_noref(nf);
 	}
+	nfsd_file_dispose_list_delayed(&head);
+	return ret;
+}
+
+static void
+nfsd_file_gc(void)
+{
+	nfsd_file_lru_walk_list(NULL);
+}
+
+static void
+nfsd_file_gc_worker(struct work_struct *work)
+{
+	nfsd_file_gc();
+	nfsd_file_schedule_laundrette();
 }
 
 static unsigned long
@@ -422,12 +496,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
 static unsigned long
 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 {
-	LIST_HEAD(head);
-	unsigned long ret;
-
-	ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
-	nfsd_file_lru_dispose(&head);
-	return ret;
+	return nfsd_file_lru_walk_list(sc);
 }
 
 static struct shrinker	nfsd_file_shrinker = {
@@ -489,7 +558,7 @@ nfsd_file_close_inode(struct inode *inode)
 
 	__nfsd_file_close_inode(inode, hashval, &dispose);
 	trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
-	nfsd_file_dispose_list(&dispose);
+	nfsd_file_dispose_list_delayed(&dispose);
 }
 
 /**
@@ -505,16 +574,11 @@ static void
 nfsd_file_delayed_close(struct work_struct *work)
 {
 	LIST_HEAD(head);
+	struct nfsd_fcache_disposal *l = container_of(work,
+			struct nfsd_fcache_disposal, work);
 
-	list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
-
-	if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
-		nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
-
-	if (!list_empty(&head)) {
-		nfsd_file_lru_dispose(&head);
-		flush_delayed_fput();
-	}
+	nfsd_file_list_remove_disposal(&head, l);
+	nfsd_file_dispose_list(&head);
 }
 
 static int
@@ -575,6 +639,10 @@ nfsd_file_cache_init(void)
 	if (nfsd_file_hashtbl)
 		return 0;
 
+	nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+	if (!nfsd_filecache_wq)
+		goto out;
+
 	nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
 				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
 	if (!nfsd_file_hashtbl) {
@@ -628,7 +696,7 @@ nfsd_file_cache_init(void)
 		spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
 	}
 
-	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
 out:
 	return ret;
 out_notifier:
@@ -644,6 +712,8 @@ out_err:
 	nfsd_file_mark_slab = NULL;
 	kfree(nfsd_file_hashtbl);
 	nfsd_file_hashtbl = NULL;
+	destroy_workqueue(nfsd_filecache_wq);
+	nfsd_filecache_wq = NULL;
 	goto out;
 }
 
@@ -682,6 +752,88 @@ nfsd_file_cache_purge(struct net *net)
 	}
 }
 
+static struct nfsd_fcache_disposal *
+nfsd_alloc_fcache_disposal(struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	l = kmalloc(sizeof(*l), GFP_KERNEL);
+	if (!l)
+		return NULL;
+	INIT_WORK(&l->work, nfsd_file_delayed_close);
+	l->net = net;
+	spin_lock_init(&l->lock);
+	INIT_LIST_HEAD(&l->freeme);
+	return l;
+}
+
+static void
+nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+	rcu_assign_pointer(l->net, NULL);
+	cancel_work_sync(&l->work);
+	nfsd_file_dispose_list(&l->freeme);
+	kfree_rcu(l, rcu);
+}
+
+static void
+nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&laundrette_lock);
+	list_add_tail_rcu(&l->list, &laundrettes);
+	spin_unlock(&laundrette_lock);
+}
+
+static void
+nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&laundrette_lock);
+	list_del_rcu(&l->list);
+	spin_unlock(&laundrette_lock);
+}
+
+static int
+nfsd_alloc_fcache_disposal_net(struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	l = nfsd_alloc_fcache_disposal(net);
+	if (!l)
+		return -ENOMEM;
+	nfsd_add_fcache_disposal(l);
+	return 0;
+}
+
+static void
+nfsd_free_fcache_disposal_net(struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(l, &laundrettes, list) {
+		if (l->net != net)
+			continue;
+		nfsd_del_fcache_disposal(l);
+		rcu_read_unlock();
+		nfsd_free_fcache_disposal(l);
+		return;
+	}
+	rcu_read_unlock();
+}
+
+int
+nfsd_file_cache_start_net(struct net *net)
+{
+	return nfsd_alloc_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown_net(struct net *net)
+{
+	nfsd_file_cache_purge(net);
+	nfsd_free_fcache_disposal_net(net);
+}
+
 void
 nfsd_file_cache_shutdown(void)
 {
@@ -706,6 +858,8 @@ nfsd_file_cache_shutdown(void)
 	nfsd_file_mark_slab = NULL;
 	kfree(nfsd_file_hashtbl);
 	nfsd_file_hashtbl = NULL;
+	destroy_workqueue(nfsd_filecache_wq);
+	nfsd_filecache_wq = NULL;
 }
 
 static bool
@@ -789,6 +943,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct nfsd_file *nf, *new;
 	struct inode *inode;
 	unsigned int hashval;
+	bool retry = true;
 
 	/* FIXME: skip this if fh_dentry is already set? */
 	status = fh_verify(rqstp, fhp, S_IFREG,
@@ -824,6 +979,11 @@ wait_for_construction:
 
 	/* Did construction of this file fail? */
 	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+		if (!retry) {
+			status = nfserr_jukebox;
+			goto out;
+		}
+		retry = false;
 		nfsd_file_put_noref(nf);
 		goto retry;
 	}
@@ -858,7 +1018,7 @@ out:
 open_file:
 	nf = new;
 	/* Take reference for the hashtable */
-	atomic_inc(&nf->nf_ref);
+	refcount_inc(&nf->nf_ref);
 	__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
 	__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
 	list_lru_add(&nfsd_file_lru, &nf->nf_lru);
@@ -867,7 +1027,8 @@ open_file:
 	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
 			nfsd_file_hashtbl[hashval].nfb_count);
 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
-	atomic_long_inc(&nfsd_filecache_count);
+	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+		nfsd_file_gc();
 
 	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
 	if (nf->nf_mark)
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 851d9abf54c2..7872df5a0fe3 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -19,7 +19,7 @@
  */
 struct nfsd_file_mark {
 	struct fsnotify_mark	nfm_mark;
-	atomic_t		nfm_ref;
+	refcount_t		nfm_ref;
 };
 
 /*
@@ -43,14 +43,17 @@ struct nfsd_file {
 	unsigned long		nf_flags;
 	struct inode		*nf_inode;
 	unsigned int		nf_hashval;
-	atomic_t		nf_ref;
+	refcount_t		nf_ref;
 	unsigned char		nf_may;
 	struct nfsd_file_mark	*nf_mark;
+	struct rw_semaphore	nf_rwsem;
 };
 
 int nfsd_file_cache_init(void);
 void nfsd_file_cache_purge(struct net *);
 void nfsd_file_cache_shutdown(void);
+int nfsd_file_cache_start_net(struct net *net);
+void nfsd_file_cache_shutdown_net(struct net *net);
 void nfsd_file_put(struct nfsd_file *nf);
 struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
 void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9a4ef815fb8c..2baf32311e00 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -40,7 +40,7 @@ struct nfsd_net {
 
 	struct lock_manager nfsd4_manager;
 	bool grace_ended;
-	time_t boot_time;
+	time64_t boot_time;
 
 	/* internal mount of the "nfsd" pseudofilesystem: */
 	struct vfsmount *nfsd_mnt;
@@ -92,8 +92,8 @@ struct nfsd_net {
 	bool in_grace;
 	const struct nfsd4_client_tracking_ops *client_tracking_ops;
 
-	time_t nfsd4_lease;
-	time_t nfsd4_grace;
+	time64_t nfsd4_lease;
+	time64_t nfsd4_grace;
 	bool somebody_reclaimed;
 
 	bool track_reclaim_completes;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index cea68d8411ac..288bc76b4574 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -203,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 		RETURN_STATUS(nfserr_io);
 	nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
 			    rqstp->rq_vec, nvecs, &cnt,
-			    resp->committed);
+			    resp->committed, resp->verf);
 	resp->count = cnt;
 	RETURN_STATUS(nfserr);
 }
@@ -683,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
 		RETURN_STATUS(nfserr_inval);
 
 	fh_copy(&resp->fh, &argp->fh);
-	nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+	nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count,
+			resp->verf);
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 195ab7a0fc89..aae514d40b64 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -32,14 +32,14 @@ static u32	nfs3_ftypes[] = {
  * XDR functions for basic NFS types
  */
 static __be32 *
-encode_time3(__be32 *p, struct timespec *time)
+encode_time3(__be32 *p, struct timespec64 *time)
 {
 	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
 	return p;
 }
 
 static __be32 *
-decode_time3(__be32 *p, struct timespec *time)
+decode_time3(__be32 *p, struct timespec64 *time)
 {
 	time->tv_sec = ntohl(*p++);
 	time->tv_nsec = ntohl(*p++);
@@ -167,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	      struct kstat *stat)
 {
 	struct user_namespace *userns = nfsd_user_namespace(rqstp);
-	struct timespec ts;
 	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
 	*p++ = htonl((u32) (stat->mode & S_IALLUGO));
 	*p++ = htonl((u32) stat->nlink);
@@ -183,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	*p++ = htonl((u32) MINOR(stat->rdev));
 	p = encode_fsid(p, fhp);
 	p = xdr_encode_hyper(p, stat->ino);
-	ts = timespec64_to_timespec(stat->atime);
-	p = encode_time3(p, &ts);
-	ts = timespec64_to_timespec(stat->mtime);
-	p = encode_time3(p, &ts);
-	ts = timespec64_to_timespec(stat->ctime);
-	p = encode_time3(p, &ts);
+	p = encode_time3(p, &stat->atime);
+	p = encode_time3(p, &stat->mtime);
+	p = encode_time3(p, &stat->ctime);
 
 	return p;
 }
@@ -277,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp)
 		stat.size  = inode->i_size;
 	}
 
-	fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime);
-	fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime);
+	fhp->fh_pre_mtime = stat.mtime;
+	fhp->fh_pre_ctime = stat.ctime;
 	fhp->fh_pre_size  = stat.size;
 	fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
 	fhp->fh_pre_saved = true;
@@ -330,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 	p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
 
 	if ((args->check_guard = ntohl(*p++)) != 0) { 
-		struct timespec time; 
+		struct timespec64 time;
 		p = decode_time3(p, &time);
 		args->guardtime = time.tv_sec;
 	}
@@ -751,17 +747,13 @@ int
 nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_writeres *resp = rqstp->rq_resp;
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-	__be32 verf[2];
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
 		*p++ = htonl(resp->committed);
-		/* unique identifier, y2038 overflow can be ignored */
-		nfsd_copy_boot_verifier(verf, nn);
-		*p++ = verf[0];
-		*p++ = verf[1];
+		*p++ = resp->verf[0];
+		*p++ = resp->verf[1];
 	}
 	return xdr_ressize_check(rqstp, p);
 }
@@ -1125,16 +1117,12 @@ int
 nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_commitres *resp = rqstp->rq_resp;
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-	__be32 verf[2];
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	/* Write verifier */
 	if (resp->status == 0) {
-		/* unique identifier, y2038 overflow can be ignored */
-		nfsd_copy_boot_verifier(verf, nn);
-		*p++ = verf[0];
-		*p++ = verf[1];
+		*p++ = resp->verf[0];
+		*p++ = resp->verf[1];
 	}
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24534db87e86..c3b11a715082 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -823,7 +823,16 @@ static const struct rpc_program cb_program = {
 static int max_cb_time(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
+
+	/*
+	 * nfsd4_lease is set to at most one hour in __nfsd4_write_time,
+	 * so we can use 32-bit math on it. Warn if that assumption
+	 * ever stops being true.
+	 */
+	if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
+		return 360 * HZ;
+
+	return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
 }
 
 static struct workqueue_struct *callback_wq;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2681c70283ce..e12409eca7cc 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 
 		/* Client gets 2 lease periods to return it */
 		cutoff = ktime_add_ns(task->tk_start,
-					 nn->nfsd4_lease * NSEC_PER_SEC * 2);
+					 (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2);
 
 		if (ktime_before(now, cutoff)) {
 			rpc_delay(task, HZ/100); /* 10 mili-seconds */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4798667af647..0e75f7fb5fec 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,7 @@
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/sunrpc/addr.h>
 
 #include "idmap.h"
 #include "cache.h"
@@ -232,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
 	if (!*resfh)
 		return nfserr_jukebox;
 	fh_init(*resfh, NFS4_FHSIZE);
-	open->op_truncate = 0;
+	open->op_truncate = false;
 
 	if (open->op_create) {
 		/* FIXME: check session persistence and pnfs flags.
@@ -365,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
 		return nfserr_inval;
 
-	open->op_created = 0;
+	open->op_created = false;
 	/*
 	 * RFC5661 18.51.3
 	 * Before RECLAIM_COMPLETE done, server should deny new lock
@@ -503,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    union nfsd4_op_u *u)
 {
 	struct nfsd4_putfh *putfh = &u->putfh;
+	__be32 ret;
 
 	fh_put(&cstate->current_fh);
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
 	       putfh->pf_fhlen);
-	return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+	ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+	if (ret == nfserr_stale && putfh->no_verify) {
+		SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN);
+		ret = 0;
+	}
+#endif
+	return ret;
 }
 
 static __be32
@@ -530,9 +539,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return nfserr_restorefh;
 
 	fh_dup2(&cstate->current_fh, &cstate->save_fh);
-	if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
+	if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) {
 		memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
-		SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+		SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
 	}
 	return nfs_ok;
 }
@@ -542,9 +551,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     union nfsd4_op_u *u)
 {
 	fh_dup2(&cstate->save_fh, &cstate->current_fh);
-	if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
+	if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) {
 		memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
-		SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
+		SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG);
 	}
 	return nfs_ok;
 }
@@ -581,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct nfsd4_commit *commit = &u->commit;
 
-	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
-			     commit->co_count);
+			     commit->co_count,
+			     (__be32 *)commit->co_verf.data);
 }
 
 static __be32
@@ -776,7 +785,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* check stateid */
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
-					&read->rd_nf);
+					&read->rd_nf, NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
@@ -948,7 +957,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
 				&cstate->current_fh, &setattr->sa_stateid,
-				WR_STATE, NULL);
+				WR_STATE, NULL, NULL);
 		if (status) {
 			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
 			return status;
@@ -975,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
-				0, (time_t)0);
+				0, (time64_t)0);
 out:
 	fh_drop_write(&cstate->current_fh);
 	return status;
@@ -999,22 +1008,22 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	trace_nfsd_write_start(rqstp, &cstate->current_fh,
 			       write->wr_offset, cnt);
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-						stateid, WR_STATE, &nf);
+						stateid, WR_STATE, &nf, NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		return status;
 	}
 
 	write->wr_how_written = write->wr_stable_how;
-	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
 	nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
 				      &write->wr_head, write->wr_buflen);
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
-	status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
+	status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
 				write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
-				write->wr_how_written);
+				write->wr_how_written,
+				(__be32 *)write->wr_verifier.data);
 	nfsd_file_put(nf);
 
 	write->wr_bytes_written = cnt;
@@ -1034,14 +1043,14 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return nfserr_nofilehandle;
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
-					    src_stateid, RD_STATE, src);
+					    src_stateid, RD_STATE, src, NULL);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
 		goto out;
 	}
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-					    dst_stateid, WR_STATE, dst);
+					    dst_stateid, WR_STATE, dst, NULL);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1076,8 +1085,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
-	status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
-			dst->nf_file, clone->cl_dst_pos, clone->cl_count,
+	status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+			dst, clone->cl_dst_pos, clone->cl_count,
 			EX_ISSYNC(cstate->current_fh.fh_export));
 
 	nfsd_file_put(dst);
@@ -1135,6 +1144,207 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
 	while ((copy = nfsd4_get_copy(clp)) != NULL)
 		nfsd4_stop_copy(copy);
 }
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+
+extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+				   struct nfs_fh *src_fh,
+				   nfs4_stateid *stateid);
+extern void nfs42_ssc_close(struct file *filep);
+
+extern void nfs_sb_deactive(struct super_block *sb);
+
+#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
+
+/**
+ * Support one copy source server for now.
+ */
+static __be32
+nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+		       struct vfsmount **mount)
+{
+	struct file_system_type *type;
+	struct vfsmount *ss_mnt;
+	struct nfs42_netaddr *naddr;
+	struct sockaddr_storage tmp_addr;
+	size_t tmp_addrlen, match_netid_len = 3;
+	char *startsep = "", *endsep = "", *match_netid = "tcp";
+	char *ipaddr, *dev_name, *raw_data;
+	int len, raw_len;
+	__be32 status = nfserr_inval;
+
+	naddr = &nss->u.nl4_addr;
+	tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
+					 naddr->addr_len,
+					 (struct sockaddr *)&tmp_addr,
+					 sizeof(tmp_addr));
+	if (tmp_addrlen == 0)
+		goto out_err;
+
+	if (tmp_addr.ss_family == AF_INET6) {
+		startsep = "[";
+		endsep = "]";
+		match_netid = "tcp6";
+		match_netid_len = 4;
+	}
+
+	if (naddr->netid_len != match_netid_len ||
+		strncmp(naddr->netid, match_netid, naddr->netid_len))
+		goto out_err;
+
+	/* Construct the raw data for the vfs_kern_mount call */
+	len = RPC_MAX_ADDRBUFLEN + 1;
+	ipaddr = kzalloc(len, GFP_KERNEL);
+	if (!ipaddr)
+		goto out_err;
+
+	rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len);
+
+	/* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/
+
+	raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr);
+	raw_data = kzalloc(raw_len, GFP_KERNEL);
+	if (!raw_data)
+		goto out_free_ipaddr;
+
+	snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr);
+
+	status = nfserr_nodev;
+	type = get_fs_type("nfs");
+	if (!type)
+		goto out_free_rawdata;
+
+	/* Set the server:<export> for the vfs_kern_mount call */
+	dev_name = kzalloc(len + 5, GFP_KERNEL);
+	if (!dev_name)
+		goto out_free_rawdata;
+	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+
+	/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
+	ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
+	module_put(type->owner);
+	if (IS_ERR(ss_mnt))
+		goto out_free_devname;
+
+	status = 0;
+	*mount = ss_mnt;
+
+out_free_devname:
+	kfree(dev_name);
+out_free_rawdata:
+	kfree(raw_data);
+out_free_ipaddr:
+	kfree(ipaddr);
+out_err:
+	return status;
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+	nfs_sb_deactive(ss_mnt->mnt_sb);
+	mntput(ss_mnt);
+}
+
+/**
+ * nfsd4_setup_inter_ssc
+ *
+ * Verify COPY destination stateid.
+ * Connect to the source server with NFSv4.1.
+ * Create the source struct file for nfsd_copy_range.
+ * Called with COPY cstate:
+ *    SAVED_FH: source filehandle
+ *    CURRENT_FH: destination filehandle
+ */
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+		      struct nfsd4_compound_state *cstate,
+		      struct nfsd4_copy *copy, struct vfsmount **mount)
+{
+	struct svc_fh *s_fh = NULL;
+	stateid_t *s_stid = &copy->cp_src_stateid;
+	__be32 status = nfserr_inval;
+
+	/* Verify the destination stateid and set dst struct file*/
+	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+					    &copy->cp_dst_stateid,
+					    WR_STATE, &copy->nf_dst, NULL);
+	if (status)
+		goto out;
+
+	status = nfsd4_interssc_connect(&copy->cp_src, rqstp, mount);
+	if (status)
+		goto out;
+
+	s_fh = &cstate->save_fh;
+
+	copy->c_fh.size = s_fh->fh_handle.fh_size;
+	memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size);
+	copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
+	memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
+	       sizeof(stateid_opaque_t));
+
+	status = 0;
+out:
+	return status;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+			struct nfsd_file *dst)
+{
+	nfs42_ssc_close(src->nf_file);
+	nfsd_file_put(src);
+	nfsd_file_put(dst);
+	mntput(ss_mnt);
+}
+
+#else /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+		      struct nfsd4_compound_state *cstate,
+		      struct nfsd4_copy *copy,
+		      struct vfsmount **mount)
+{
+	*mount = NULL;
+	return nfserr_inval;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+			struct nfsd_file *dst)
+{
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+}
+
+static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+				   struct nfs_fh *src_fh,
+				   nfs4_stateid *stateid)
+{
+	return NULL;
+}
+#endif /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
+		      struct nfsd4_compound_state *cstate,
+		      struct nfsd4_copy *copy)
+{
+	return nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
+				 &copy->nf_src, &copy->cp_dst_stateid,
+				 &copy->nf_dst);
+}
+
+static void
+nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+{
+	nfsd_file_put(src);
+	nfsd_file_put(dst);
+}
 
 static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 {
@@ -1200,12 +1410,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
 		status = nfs_ok;
 	}
 
-	nfsd_file_put(copy->nf_src);
-	nfsd_file_put(copy->nf_dst);
+	if (!copy->cp_intra) /* Inter server SSC */
+		nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
+					copy->nf_dst);
+	else
+		nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+
 	return status;
 }
 
-static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 {
 	dst->cp_src_pos = src->cp_src_pos;
 	dst->cp_dst_pos = src->cp_dst_pos;
@@ -1215,15 +1429,25 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 	memcpy(&dst->fh, &src->fh, sizeof(src->fh));
 	dst->cp_clp = src->cp_clp;
 	dst->nf_dst = nfsd_file_get(src->nf_dst);
-	dst->nf_src = nfsd_file_get(src->nf_src);
+	dst->cp_intra = src->cp_intra;
+	if (src->cp_intra) /* for inter, file_src doesn't exist yet */
+		dst->nf_src = nfsd_file_get(src->nf_src);
+
 	memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+	memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server));
+	memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+	memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+	dst->ss_mnt = src->ss_mnt;
+
+	return 0;
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
-	nfs4_free_cp_state(copy);
+	nfs4_free_copy_state(copy);
 	nfsd_file_put(copy->nf_dst);
-	nfsd_file_put(copy->nf_src);
+	if (copy->cp_intra)
+		nfsd_file_put(copy->nf_src);
 	spin_lock(&copy->cp_clp->async_lock);
 	list_del(&copy->copies);
 	spin_unlock(&copy->cp_clp->async_lock);
@@ -1235,7 +1459,24 @@ static int nfsd4_do_async_copy(void *data)
 	struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
 	struct nfsd4_copy *cb_copy;
 
+	if (!copy->cp_intra) { /* Inter server SSC */
+		copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+		if (!copy->nf_src) {
+			copy->nfserr = nfserr_serverfault;
+			nfsd4_interssc_disconnect(copy->ss_mnt);
+			goto do_callback;
+		}
+		copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
+					      &copy->stateid);
+		if (IS_ERR(copy->nf_src->nf_file)) {
+			copy->nfserr = nfserr_offload_denied;
+			nfsd4_interssc_disconnect(copy->ss_mnt);
+			goto do_callback;
+		}
+	}
+
 	copy->nfserr = nfsd4_do_copy(copy, 0);
+do_callback:
 	cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 	if (!cb_copy)
 		goto out;
@@ -1247,6 +1488,8 @@ static int nfsd4_do_async_copy(void *data)
 			&nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
 	nfsd4_run_cb(&cb_copy->cp_cb);
 out:
+	if (!copy->cp_intra)
+		kfree(copy->nf_src);
 	cleanup_async_copy(copy);
 	return 0;
 }
@@ -1259,11 +1502,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	__be32 status;
 	struct nfsd4_copy *async_copy = NULL;
 
-	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
-				   &copy->nf_src, &copy->cp_dst_stateid,
-				   &copy->nf_dst);
-	if (status)
-		goto out;
+	if (!copy->cp_intra) { /* Inter server SSC */
+		if (!inter_copy_offload_enable || copy->cp_synchronous) {
+			status = nfserr_notsupp;
+			goto out;
+		}
+		status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
+				&copy->ss_mnt);
+		if (status)
+			return nfserr_offload_denied;
+	} else {
+		status = nfsd4_setup_intra_ssc(rqstp, cstate, copy);
+		if (status)
+			return status;
+	}
 
 	copy->cp_clp = cstate->clp;
 	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
@@ -1274,15 +1526,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfserrno(-ENOMEM);
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy)
-			goto out;
-		if (!nfs4_init_cp_state(nn, copy)) {
-			kfree(async_copy);
-			goto out;
-		}
+			goto out_err;
+		if (!nfs4_init_copy_state(nn, copy))
+			goto out_err;
 		refcount_set(&async_copy->refcount, 1);
 		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
 			sizeof(copy->cp_stateid));
-		dup_copy_fields(copy, async_copy);
+		status = dup_copy_fields(copy, async_copy);
+		if (status)
+			goto out_err;
 		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
 				async_copy, "%s", "copy thread");
 		if (IS_ERR(async_copy->copy_task))
@@ -1293,13 +1545,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		spin_unlock(&async_copy->cp_clp->async_lock);
 		wake_up_process(async_copy->copy_task);
 		status = nfs_ok;
-	} else
+	} else {
 		status = nfsd4_do_copy(copy, 1);
+	}
 out:
 	return status;
 out_err:
 	if (async_copy)
 		cleanup_async_copy(async_copy);
+	status = nfserrno(-ENOMEM);
+	if (!copy->cp_intra)
+		nfsd4_interssc_disconnect(copy->ss_mnt);
 	goto out;
 }
 
@@ -1310,7 +1566,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
 
 	spin_lock(&clp->async_lock);
 	list_for_each_entry(copy, &clp->async_copies, copies) {
-		if (memcmp(&copy->cp_stateid, stateid, NFS4_STATEID_SIZE))
+		if (memcmp(&copy->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
 			continue;
 		refcount_inc(&copy->refcount);
 		spin_unlock(&clp->async_lock);
@@ -1326,16 +1582,61 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp,
 		     union nfsd4_op_u *u)
 {
 	struct nfsd4_offload_status *os = &u->offload_status;
-	__be32 status = 0;
 	struct nfsd4_copy *copy;
 	struct nfs4_client *clp = cstate->clp;
 
 	copy = find_async_copy(clp, &os->stateid);
-	if (copy)
+	if (!copy) {
+		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+		return manage_cpntf_state(nn, &os->stateid, clp, NULL);
+	} else
 		nfsd4_stop_copy(copy);
-	else
-		status = nfserr_bad_stateid;
 
+	return nfs_ok;
+}
+
+static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		  union nfsd4_op_u *u)
+{
+	struct nfsd4_copy_notify *cn = &u->copy_notify;
+	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfs4_stid *stid;
+	struct nfs4_cpntf_state *cps;
+	struct nfs4_client *clp = cstate->clp;
+
+	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+					&cn->cpn_src_stateid, RD_STATE, NULL,
+					&stid);
+	if (status)
+		return status;
+
+	cn->cpn_sec = nn->nfsd4_lease;
+	cn->cpn_nsec = 0;
+
+	status = nfserrno(-ENOMEM);
+	cps = nfs4_alloc_init_cpntf_state(nn, stid);
+	if (!cps)
+		goto out;
+	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
+	memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
+	memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
+
+	/* For now, only return one server address in cpn_src, the
+	 * address used by the client to connect to this server.
+	 */
+	cn->cpn_src.nl4_type = NL4_NETADDR;
+	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+				 &cn->cpn_src.u.nl4_addr);
+	WARN_ON_ONCE(status);
+	if (status) {
+		nfs4_put_cpntf_state(nn, cps);
+		goto out;
+	}
+out:
+	nfs4_put_stid(stid);
 	return status;
 }
 
@@ -1348,7 +1649,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &fallocate->falloc_stateid,
-					    WR_STATE, &nf);
+					    WR_STATE, &nf, NULL);
 	if (status != nfs_ok) {
 		dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
 		return status;
@@ -1407,7 +1708,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &seek->seek_stateid,
-					    RD_STATE, &nf);
+					    RD_STATE, &nf, NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
 		return status;
@@ -1912,6 +2213,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
 		- rqstp->rq_auth_slack;
 }
 
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+	struct nfsd4_op	*op, *current_op = NULL, *saved_op = NULL;
+	struct nfsd4_copy *copy;
+	struct nfsd4_putfh *putfh;
+	int i;
+
+	/* traverse all operation and if it's a COPY compound, mark the
+	 * source filehandle to skip verification
+	 */
+	for (i = 0; i < args->opcnt; i++) {
+		op = &args->ops[i];
+		if (op->opnum == OP_PUTFH)
+			current_op = op;
+		else if (op->opnum == OP_SAVEFH)
+			saved_op = current_op;
+		else if (op->opnum == OP_RESTOREFH)
+			current_op = saved_op;
+		else if (op->opnum == OP_COPY) {
+			copy = (struct nfsd4_copy *)&op->u;
+			if (!saved_op) {
+				op->status = nfserr_nofilehandle;
+				return;
+			}
+			putfh = (struct nfsd4_putfh *)&saved_op->u;
+			if (!copy->cp_intra)
+				putfh->no_verify = true;
+		}
+	}
+}
+#else
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+}
+#endif
+
 /*
  * COMPOUND call.
  */
@@ -1960,6 +2300,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 		resp->opcnt = 1;
 		goto encode_op;
 	}
+	check_if_stalefh_allowed(args);
 
 	trace_nfsd_compound(rqstp, args->opcnt);
 	while (!status && resp->opcnt < args->opcnt) {
@@ -1975,13 +2316,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 				op->status = nfsd4_open_omfg(rqstp, cstate, op);
 			goto encode_op;
 		}
-
-		if (!current_fh->fh_dentry) {
+		if (!current_fh->fh_dentry &&
+				!HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) {
 			if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
 				op->status = nfserr_nofilehandle;
 				goto encode_op;
 			}
-		} else if (current_fh->fh_export->ex_fslocs.migrated &&
+		} else if (current_fh->fh_export &&
+			   current_fh->fh_export->ex_fslocs.migrated &&
 			  !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
 			op->status = nfserr_moved;
 			goto encode_op;
@@ -2025,7 +2367,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 			if (op->opdesc->op_flags & OP_CLEAR_STATEID)
 				clear_current_stateid(cstate);
 
-			if (need_wrongsec_check(rqstp))
+			if (current_fh->fh_export &&
+					need_wrongsec_check(rqstp))
 				op->status = check_nfsd_access(current_fh->fh_export, rqstp);
 		}
 encode_op:
@@ -2292,6 +2635,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
 		1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+					struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		3 /* cnr_lease_time */ +
+		1 /* We support one cnr_source_server */ +
+		1 /* cnr_stateid seq */ +
+		op_encode_stateid_maxsz /* cnr_stateid */ +
+		1 /* num cnr_source_server*/ +
+		1 /* nl4_type */ +
+		1 /* nl4 size */ +
+		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+		* sizeof(__be32);
+}
+
 #ifdef CONFIG_NFSD_PNFS
 static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
@@ -2716,6 +3074,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_OFFLOAD_CANCEL",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
+	[OP_COPY_NOTIFY] = {
+		.op_func = nfsd4_copy_notify,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_COPY_NOTIFY",
+		.op_rsize_bop = nfsd4_copy_notify_rsize,
+	},
 };
 
 /**
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2481e7662128..a8fb18609146 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1445,7 +1445,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
 	}
 
 	cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
-	cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+	cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret)
 		ret = cup->cu_u.cu_msg.cm_status;
@@ -1782,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
 }
 
 static char *
-nfsd4_cltrack_grace_start(time_t grace_start)
+nfsd4_cltrack_grace_start(time64_t grace_start)
 {
 	int copied;
 	size_t len;
@@ -1795,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start)
 	if (!result)
 		return result;
 
-	copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
+	copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld",
 				grace_start);
 	if (copied >= len) {
 		/* just return nothing if output was truncated */
@@ -2004,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
 	char *legacy;
 	char timestr[22]; /* FIXME: better way to determine max size? */
 
-	sprintf(timestr, "%ld", nn->boot_time);
+	sprintf(timestr, "%lld", nn->boot_time);
 	legacy = nfsd4_cltrack_legacy_topdir();
 	nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
 	kfree(legacy);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 369e574c5092..65cfe9ab47be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -80,6 +80,7 @@ static u64 current_sessionid = 1;
 static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
 static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
 void nfsd4_end_grace(struct nfsd_net *nn);
+static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
 
 /* Locking: */
 
@@ -170,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp)
 			clp->cl_clientid.cl_boot,
 			clp->cl_clientid.cl_id);
 	list_move_tail(&clp->cl_lru, &nn->client_lru);
-	clp->cl_time = get_seconds();
+	clp->cl_time = ktime_get_boottime_seconds();
 }
 
 static void put_client_renew_locked(struct nfs4_client *clp)
@@ -722,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	/* Will be incremented before return to client: */
 	refcount_set(&stid->sc_count, 1);
 	spin_lock_init(&stid->sc_lock);
+	INIT_LIST_HEAD(&stid->sc_cp_list);
 
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -741,30 +743,76 @@ out_free:
 /*
  * Create a unique stateid_t to represent each COPY.
  */
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+			      unsigned char sc_type)
 {
 	int new_id;
 
+	stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+	stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	stid->sc_type = sc_type;
+
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->s2s_cp_lock);
-	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
+	stid->stid.si_opaque.so_id = new_id;
 	spin_unlock(&nn->s2s_cp_lock);
 	idr_preload_end();
 	if (new_id < 0)
 		return 0;
-	copy->cp_stateid.si_opaque.so_id = new_id;
-	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
-	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
 	return 1;
 }
 
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+	return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+						     struct nfs4_stid *p_stid)
+{
+	struct nfs4_cpntf_state *cps;
+
+	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+	if (!cps)
+		return NULL;
+	cps->cpntf_time = ktime_get_boottime_seconds();
+	refcount_set(&cps->cp_stateid.sc_count, 1);
+	if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+		goto out_free;
+	spin_lock(&nn->s2s_cp_lock);
+	list_add(&cps->cp_list, &p_stid->sc_cp_list);
+	spin_unlock(&nn->s2s_cp_lock);
+	return cps;
+out_free:
+	kfree(cps);
+	return NULL;
+}
+
+void nfs4_free_copy_state(struct nfsd4_copy *copy)
 {
 	struct nfsd_net *nn;
 
+	WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
 	nn = net_generic(copy->cp_clp->net, nfsd_net_id);
 	spin_lock(&nn->s2s_cp_lock);
-	idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+	idr_remove(&nn->s2s_cp_stateids,
+		   copy->cp_stateid.stid.si_opaque.so_id);
+	spin_unlock(&nn->s2s_cp_lock);
+}
+
+static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
+{
+	struct nfs4_cpntf_state *cps;
+	struct nfsd_net *nn;
+
+	nn = net_generic(net, nfsd_net_id);
+	spin_lock(&nn->s2s_cp_lock);
+	while (!list_empty(&stid->sc_cp_list)) {
+		cps = list_first_entry(&stid->sc_cp_list,
+				       struct nfs4_cpntf_state, cp_list);
+		_free_cpntf_state_locked(nn, cps);
+	}
 	spin_unlock(&nn->s2s_cp_lock);
 }
 
@@ -806,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
 static DEFINE_SPINLOCK(blocked_delegations_lock);
 static struct bloom_pair {
 	int	entries, old_entries;
-	time_t	swap_time;
+	time64_t swap_time;
 	int	new; /* index into 'set' */
 	DECLARE_BITMAP(set[2], 256);
 } blocked_delegations;
@@ -818,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh)
 
 	if (bd->entries == 0)
 		return 0;
-	if (seconds_since_boot() - bd->swap_time > 30) {
+	if (ktime_get_seconds() - bd->swap_time > 30) {
 		spin_lock(&blocked_delegations_lock);
-		if (seconds_since_boot() - bd->swap_time > 30) {
+		if (ktime_get_seconds() - bd->swap_time > 30) {
 			bd->entries -= bd->old_entries;
 			bd->old_entries = bd->entries;
 			memset(bd->set[bd->new], 0,
 			       sizeof(bd->set[0]));
 			bd->new = 1-bd->new;
-			bd->swap_time = seconds_since_boot();
+			bd->swap_time = ktime_get_seconds();
 		}
 		spin_unlock(&blocked_delegations_lock);
 	}
@@ -856,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh)
 	__set_bit((hash>>8)&255, bd->set[bd->new]);
 	__set_bit((hash>>16)&255, bd->set[bd->new]);
 	if (bd->entries == 0)
-		bd->swap_time = seconds_since_boot();
+		bd->swap_time = ktime_get_seconds();
 	bd->entries += 1;
 	spin_unlock(&blocked_delegations_lock);
 }
@@ -915,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s)
 		return;
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+	nfs4_free_cpntf_statelist(clp->net, s);
 	spin_unlock(&clp->cl_lock);
 	s->sc_free(s);
 	if (fp)
@@ -1862,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
 	 */
 	if (clid->cl_boot == (u32)nn->boot_time)
 		return 0;
-	dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
+	dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n",
 		clid->cl_boot, clid->cl_id, nn->boot_time);
 	return 1;
 }
@@ -2215,14 +2264,14 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
 	 * This is opaque to client, so no need to byte-swap. Use
 	 * __force to keep sparse happy
 	 */
-	verf[0] = (__force __be32)get_seconds();
+	verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
 	verf[1] = (__force __be32)nn->clverifier_counter++;
 	memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
 }
 
 static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
 {
-	clp->cl_clientid.cl_boot = nn->boot_time;
+	clp->cl_clientid.cl_boot = (u32)nn->boot_time;
 	clp->cl_clientid.cl_id = nn->clientid_counter++;
 	gen_confirm(clp, nn);
 }
@@ -2292,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v)
 					clp->cl_nii_domain.len);
 		seq_printf(m, "\nImplementation name: ");
 		seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
-		seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+		seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
 			clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
 	}
 	drop_client(clp);
@@ -2612,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	gen_clid(clp, nn);
 	kref_init(&clp->cl_nfsdfs.cl_ref);
 	nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
-	clp->cl_time = get_seconds();
+	clp->cl_time = ktime_get_boottime_seconds();
 	clear_bit(0, &clp->cl_cb_slot_busy);
 	copy_verf(clp, verf);
 	memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
@@ -2946,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp,
 	xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
 	if (!clp->cl_nii_name.data)
 		return nfserr_jukebox;
-	clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
-	clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+	clp->cl_nii_time = exid->nii_time;
 	return 0;
 }
 
@@ -3373,7 +3421,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 	case NFS4_CDFC4_BACK_OR_BOTH:
 		*dir = NFS4_CDFC4_BOTH;
 		return nfs_ok;
-	};
+	}
 	return nfserr_inval;
 }
 
@@ -4283,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
 	last = oo->oo_last_closed_stid;
 	oo->oo_last_closed_stid = s;
 	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
-	oo->oo_time = get_seconds();
+	oo->oo_time = ktime_get_boottime_seconds();
 	spin_unlock(&nn->client_lock);
 	if (last)
 		nfs4_put_stid(&last->st_stid);
@@ -4378,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
 	 */
 	spin_lock(&state_lock);
 	if (dp->dl_time == 0) {
-		dp->dl_time = get_seconds();
+		dp->dl_time = ktime_get_boottime_seconds();
 		list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
 	}
 	spin_unlock(&state_lock);
@@ -4490,7 +4538,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
 
 static __be32 lookup_clientid(clientid_t *clid,
 		struct nfsd4_compound_state *cstate,
-		struct nfsd_net *nn)
+		struct nfsd_net *nn,
+		bool sessions)
 {
 	struct nfs4_client *found;
 
@@ -4511,7 +4560,7 @@ static __be32 lookup_clientid(clientid_t *clid,
 	 */
 	WARN_ON_ONCE(cstate->session);
 	spin_lock(&nn->client_lock);
-	found = find_confirmed_client(clid, false, nn);
+	found = find_confirmed_client(clid, sessions, nn);
 	if (!found) {
 		spin_unlock(&nn->client_lock);
 		return nfserr_expired;
@@ -4544,7 +4593,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 	if (open->op_file == NULL)
 		return nfserr_jukebox;
 
-	status = lookup_clientid(clientid, cstate, nn);
+	status = lookup_clientid(clientid, cstate, nn, false);
 	if (status)
 		return status;
 	clp = cstate->clp;
@@ -4672,7 +4721,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 		return 0;
 	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
 		return nfserr_inval;
-	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+	return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
 }
 
 static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
@@ -5133,7 +5182,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	dprintk("process_renew(%08x/%08x): starting\n", 
 			clid->cl_boot, clid->cl_id);
-	status = lookup_clientid(clid, cstate, nn);
+	status = lookup_clientid(clid, cstate, nn, false);
 	if (status)
 		goto out;
 	clp = cstate->clp;
@@ -5184,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn)
  */
 static bool clients_still_reclaiming(struct nfsd_net *nn)
 {
-	unsigned long now = get_seconds();
-	unsigned long double_grace_period_end = nn->boot_time +
-						2 * nn->nfsd4_lease;
+	time64_t double_grace_period_end = nn->boot_time +
+					   2 * nn->nfsd4_lease;
 
 	if (nn->track_reclaim_completes &&
 			atomic_read(&nn->nr_reclaim_complete) ==
@@ -5199,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
 	 * If we've given them *two* lease times to reclaim, and they're
 	 * still not done, give up:
 	 */
-	if (time_after(now, double_grace_period_end))
+	if (ktime_get_boottime_seconds() > double_grace_period_end)
 		return false;
 	return true;
 }
 
-static time_t
+static time64_t
 nfs4_laundromat(struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
@@ -5213,8 +5261,11 @@ nfs4_laundromat(struct nfsd_net *nn)
 	struct nfs4_ol_stateid *stp;
 	struct nfsd4_blocked_lock *nbl;
 	struct list_head *pos, *next, reaplist;
-	time_t cutoff = get_seconds() - nn->nfsd4_lease;
-	time_t t, new_timeo = nn->nfsd4_lease;
+	time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
+	time64_t t, new_timeo = nn->nfsd4_lease;
+	struct nfs4_cpntf_state *cps;
+	copy_stateid_t *cps_t;
+	int i;
 
 	dprintk("NFSD: laundromat service - starting\n");
 
@@ -5225,10 +5276,20 @@ nfs4_laundromat(struct nfsd_net *nn)
 	dprintk("NFSD: end of grace period\n");
 	nfsd4_end_grace(nn);
 	INIT_LIST_HEAD(&reaplist);
+
+	spin_lock(&nn->s2s_cp_lock);
+	idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+		cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+		if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
+				cps->cpntf_time > cutoff)
+			_free_cpntf_state_locked(nn, cps);
+	}
+	spin_unlock(&nn->s2s_cp_lock);
+
 	spin_lock(&nn->client_lock);
 	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
-		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+		if (clp->cl_time > cutoff) {
 			t = clp->cl_time - cutoff;
 			new_timeo = min(new_timeo, t);
 			break;
@@ -5251,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+		if (dp->dl_time > cutoff) {
 			t = dp->dl_time - cutoff;
 			new_timeo = min(new_timeo, t);
 			break;
@@ -5271,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 	while (!list_empty(&nn->close_lru)) {
 		oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
 					oo_close_lru);
-		if (time_after((unsigned long)oo->oo_time,
-			       (unsigned long)cutoff)) {
+		if (oo->oo_time > cutoff) {
 			t = oo->oo_time - cutoff;
 			new_timeo = min(new_timeo, t);
 			break;
@@ -5302,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 	while (!list_empty(&nn->blocked_locks_lru)) {
 		nbl = list_first_entry(&nn->blocked_locks_lru,
 					struct nfsd4_blocked_lock, nbl_lru);
-		if (time_after((unsigned long)nbl->nbl_time,
-			       (unsigned long)cutoff)) {
+		if (nbl->nbl_time > cutoff) {
 			t = nbl->nbl_time - cutoff;
 			new_timeo = min(new_timeo, t);
 			break;
@@ -5320,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		free_blocked_lock(nbl);
 	}
 out:
-	new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+	new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
 	return new_timeo;
 }
 
@@ -5330,13 +5389,13 @@ static void laundromat_main(struct work_struct *);
 static void
 laundromat_main(struct work_struct *laundry)
 {
-	time_t t;
+	time64_t t;
 	struct delayed_work *dwork = to_delayed_work(laundry);
 	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
 					   laundromat_work);
 
 	t = nfs4_laundromat(nn);
-	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+	dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t);
 	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
 }
 
@@ -5521,7 +5580,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
 		CLOSE_STATEID(stateid))
 		return nfserr_bad_stateid;
-	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
+	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn,
+				 false);
 	if (status == nfserr_stale_clientid) {
 		if (cstate->session)
 			return nfserr_bad_stateid;
@@ -5600,6 +5660,85 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
 out:
 	return status;
 }
+static void
+_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+	WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
+	if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
+		return;
+	list_del(&cps->cp_list);
+	idr_remove(&nn->s2s_cp_stateids,
+		   cps->cp_stateid.stid.si_opaque.so_id);
+	kfree(cps);
+}
+/*
+ * A READ from an inter server to server COPY will have a
+ * copy stateid. Look up the copy notify stateid from the
+ * idr structure and take a reference on it.
+ */
+__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+			  struct nfs4_client *clp,
+			  struct nfs4_cpntf_state **cps)
+{
+	copy_stateid_t *cps_t;
+	struct nfs4_cpntf_state *state = NULL;
+
+	if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id)
+		return nfserr_bad_stateid;
+	spin_lock(&nn->s2s_cp_lock);
+	cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id);
+	if (cps_t) {
+		state = container_of(cps_t, struct nfs4_cpntf_state,
+				     cp_stateid);
+		if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
+			state = NULL;
+			goto unlock;
+		}
+		if (!clp)
+			refcount_inc(&state->cp_stateid.sc_count);
+		else
+			_free_cpntf_state_locked(nn, state);
+	}
+unlock:
+	spin_unlock(&nn->s2s_cp_lock);
+	if (!state)
+		return nfserr_bad_stateid;
+	if (!clp && state)
+		*cps = state;
+	return 0;
+}
+
+static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+			       struct nfs4_stid **stid)
+{
+	__be32 status;
+	struct nfs4_cpntf_state *cps = NULL;
+	struct nfsd4_compound_state cstate;
+
+	status = manage_cpntf_state(nn, st, NULL, &cps);
+	if (status)
+		return status;
+
+	cps->cpntf_time = ktime_get_boottime_seconds();
+	memset(&cstate, 0, sizeof(cstate));
+	status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true);
+	if (status)
+		goto out;
+	status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid,
+				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+				stid, nn);
+	put_client_renew(cstate.clp);
+out:
+	nfs4_put_cpntf_state(nn, cps);
+	return status;
+}
+
+void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+	spin_lock(&nn->s2s_cp_lock);
+	_free_cpntf_state_locked(nn, cps);
+	spin_unlock(&nn->s2s_cp_lock);
+}
 
 /*
  * Checks for stateid operations
@@ -5607,7 +5746,8 @@ out:
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-		stateid_t *stateid, int flags, struct nfsd_file **nfp)
+		stateid_t *stateid, int flags, struct nfsd_file **nfp,
+		struct nfs4_stid **cstid)
 {
 	struct inode *ino = d_inode(fhp->fh_dentry);
 	struct net *net = SVC_NET(rqstp);
@@ -5629,6 +5769,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 	status = nfsd4_lookup_stateid(cstate, stateid,
 				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
 				&s, nn);
+	if (status == nfserr_bad_stateid)
+		status = find_cpntf_state(nn, stateid, &s);
 	if (status)
 		return status;
 	status = nfsd4_stid_check_stateid_generation(stateid, s,
@@ -5656,8 +5798,12 @@ done:
 	if (status == nfs_ok && nfp)
 		status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
 out:
-	if (s)
-		nfs4_put_stid(s);
+	if (s) {
+		if (!status && cstid)
+			*cstid = s;
+		else
+			nfs4_put_stid(s);
+	}
 	return status;
 }
 
@@ -6550,7 +6696,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 
 	if (fl_flags & FL_SLEEP) {
-		nbl->nbl_time = jiffies;
+		nbl->nbl_time = ktime_get_boottime_seconds();
 		spin_lock(&nn->blocked_locks_lock);
 		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
 		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
@@ -6657,7 +6803,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 return nfserr_inval;
 
 	if (!nfsd4_has_session(cstate)) {
-		status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
+		status = lookup_clientid(&lockt->lt_clientid, cstate, nn,
+					 false);
 		if (status)
 			goto out;
 	}
@@ -6841,7 +6988,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
 
-	status = lookup_clientid(clid, cstate, nn);
+	status = lookup_clientid(clid, cstate, nn, false);
 	if (status)
 		return status;
 
@@ -6988,7 +7135,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
 	__be32 status;
 
 	/* find clientid in conf_id_hashtbl */
-	status = lookup_clientid(clid, cstate, nn);
+	status = lookup_clientid(clid, cstate, nn, false);
 	if (status)
 		return nfserr_reclaim_bad;
 
@@ -7641,7 +7788,7 @@ static int nfs4_state_create_net(struct net *net)
 		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
 	nn->conf_name_tree = RB_ROOT;
 	nn->unconf_name_tree = RB_ROOT;
-	nn->boot_time = get_seconds();
+	nn->boot_time = ktime_get_real_seconds();
 	nn->grace_ended = false;
 	nn->nfsd4_manager.block_opens = true;
 	INIT_LIST_HEAD(&nn->nfsd4_manager.list);
@@ -7710,7 +7857,7 @@ nfs4_state_start_net(struct net *net)
 	nfsd4_client_tracking_init(net);
 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
 		goto skip_grace;
-	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+	printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
 	       nn->nfsd4_grace, net->ns.inum);
 	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
 	return 0;
@@ -7786,7 +7933,8 @@ nfs4_state_shutdown(void)
 static void
 get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
 {
-	if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+	if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) &&
+	    CURRENT_STATEID(stateid))
 		memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
 }
 
@@ -7795,14 +7943,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
 {
 	if (cstate->minorversion) {
 		memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
-		SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+		SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
 	}
 }
 
 void
 clear_current_stateid(struct nfsd4_compound_state *cstate)
 {
-	CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+	CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
 }
 
 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d2dc4c0e22e8..9761512674a0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,6 +40,7 @@
 #include <linux/utsname.h>
 #include <linux/pagemap.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/addr.h>
 
 #include "idmap.h"
 #include "acl.h"
@@ -1744,10 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
 	DECODE_TAIL;
 }
 
+static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+				      struct nl4_server *ns)
+{
+	DECODE_HEAD;
+	struct nfs42_netaddr *naddr;
+
+	READ_BUF(4);
+	ns->nl4_type = be32_to_cpup(p++);
+
+	/* currently support for 1 inter-server source server */
+	switch (ns->nl4_type) {
+	case NL4_NETADDR:
+		naddr = &ns->u.nl4_addr;
+
+		READ_BUF(4);
+		naddr->netid_len = be32_to_cpup(p++);
+		if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
+			goto xdr_error;
+
+		READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */
+		COPYMEM(naddr->netid, naddr->netid_len);
+
+		naddr->addr_len = be32_to_cpup(p++);
+		if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
+			goto xdr_error;
+
+		READ_BUF(naddr->addr_len);
+		COPYMEM(naddr->addr, naddr->addr_len);
+		break;
+	default:
+		goto xdr_error;
+	}
+	DECODE_TAIL;
+}
+
 static __be32
 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 {
 	DECODE_HEAD;
+	struct nl4_server *ns_dummy;
+	int i, count;
 
 	status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
 	if (status)
@@ -1762,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 	p = xdr_decode_hyper(p, &copy->cp_count);
 	p++; /* ca_consecutive: we always do consecutive copies */
 	copy->cp_synchronous = be32_to_cpup(p++);
-	/* tmp = be32_to_cpup(p); Source server list not supported */
+
+	count = be32_to_cpup(p++);
+
+	copy->cp_intra = false;
+	if (count == 0) { /* intra-server copy */
+		copy->cp_intra = true;
+		goto intra;
+	}
+
+	/* decode all the supplied server addresses but use first */
+	status = nfsd4_decode_nl4_server(argp, &copy->cp_src);
+	if (status)
+		return status;
+
+	ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
+	if (ns_dummy == NULL)
+		return nfserrno(-ENOMEM);
+	for (i = 0; i < count - 1; i++) {
+		status = nfsd4_decode_nl4_server(argp, ns_dummy);
+		if (status) {
+			kfree(ns_dummy);
+			return status;
+		}
+	}
+	kfree(ns_dummy);
+intra:
 
 	DECODE_TAIL;
 }
@@ -1775,6 +1838,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
 }
 
 static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+			 struct nfsd4_copy_notify *cn)
+{
+	int status;
+
+	status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+	if (status)
+		return status;
+	return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+}
+
+static __be32
 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
 	DECODE_HEAD;
@@ -1875,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
 	/* new operations for NFSv4.2 */
 	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2024,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
  */
 static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 	u32 ns;
 
 	ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
-	ts = ns_to_timespec(ns);
+	ts = ns_to_timespec64(ns);
 
 	p = xdr_encode_hyper(p, ts.tv_sec);
 	*p++ = cpu_to_be32(ts.tv_nsec);
@@ -4244,6 +4319,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct nfs42_netaddr *addr;
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = cpu_to_be32(ns->nl4_type);
+
+	switch (ns->nl4_type) {
+	case NL4_NETADDR:
+		addr = &ns->u.nl4_addr;
+
+		/* netid_len, netid, uaddr_len, uaddr (port included
+		 * in RPCBIND_MAXUADDRLEN)
+		 */
+		p = xdr_reserve_space(xdr,
+			4 /* netid len */ +
+			(XDR_QUADLEN(addr->netid_len) * 4) +
+			4 /* uaddr len */ +
+			(XDR_QUADLEN(addr->addr_len) * 4));
+		if (!p)
+			return nfserr_resource;
+
+		*p++ = cpu_to_be32(addr->netid_len);
+		p = xdr_encode_opaque_fixed(p, addr->netid,
+					    addr->netid_len);
+		*p++ = cpu_to_be32(addr->addr_len);
+		p = xdr_encode_opaque_fixed(p, addr->addr,
+					addr->addr_len);
+		break;
+	default:
+		WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR);
+		return nfserr_inval;
+	}
+
+	return 0;
+}
+
+static __be32
 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_copy *copy)
 {
@@ -4277,6 +4392,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+			 struct nfsd4_copy_notify *cn)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	/* 8 sec, 4 nsec */
+	p = xdr_reserve_space(xdr, 12);
+	if (!p)
+		return nfserr_resource;
+
+	/* cnr_lease_time */
+	p = xdr_encode_hyper(p, cn->cpn_sec);
+	*p++ = cpu_to_be32(cn->cpn_nsec);
+
+	/* cnr_stateid */
+	nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
+	if (nfserr)
+		return nfserr;
+
+	/* cnr_src.nl_nsvr */
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+
+	*p++ = cpu_to_be32(1);
+
+	return nfsd42_encode_nl4_server(resp, &cn->cpn_src);
+}
+
+static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
 {
@@ -4373,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
@@ -4500,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 	__be32 *p;
 	struct nfs4_replay *rp = op->replay;
 
-	BUG_ON(!rp);
-
 	p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
 	if (!p) {
 		WARN_ON_ONCE(1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 11b42c523f04..e109a1007704 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -157,11 +157,11 @@ static int exports_proc_open(struct inode *inode, struct file *file)
 	return exports_net_open(current->nsproxy->net_ns, file);
 }
 
-static const struct file_operations exports_proc_operations = {
-	.open		= exports_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
+static const struct proc_ops exports_proc_ops = {
+	.proc_open	= exports_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
 };
 
 static int exports_nfsd_open(struct inode *inode, struct file *file)
@@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
 
 #ifdef CONFIG_NFSD_V4
 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
-				  time_t *time, struct nfsd_net *nn)
+				  time64_t *time, struct nfsd_net *nn)
 {
 	char *mesg = buf;
 	int rv, i;
@@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
 		*time = i;
 	}
 
-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time);
 }
 
 static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
-				time_t *time, struct nfsd_net *nn)
+				time64_t *time, struct nfsd_net *nn)
 {
 	ssize_t rv;
 
@@ -1431,8 +1431,7 @@ static int create_proc_exports_entry(void)
 	entry = proc_mkdir("fs/nfs", NULL);
 	if (!entry)
 		return -ENOMEM;
-	entry = proc_create("exports", 0, entry,
-				 &exports_proc_operations);
+	entry = proc_create("exports", 0, entry, &exports_proc_ops);
 	if (!entry) {
 		remove_proc_entry("fs/nfs", NULL);
 		return -ENOMEM;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 57b93d95fa5c..2ab5569126b8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,6 +19,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/addr.h>
 
 #include <uapi/linux/nfsd/debug.h>
 
@@ -142,7 +143,6 @@ int nfs4_state_start(void);
 int nfs4_state_start_net(struct net *net);
 void nfs4_state_shutdown(void);
 void nfs4_state_shutdown_net(struct net *net);
-void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 char * nfs4_recoverydir(void);
 bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
@@ -153,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; }
 static inline int nfs4_state_start_net(struct net *net) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
 static inline void nfs4_state_shutdown_net(struct net *net) { }
-static inline void nfs4_reset_lease(time_t leasetime) { }
 static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
 static inline char * nfs4_recoverydir(void) {return NULL; }
 static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
@@ -387,6 +386,37 @@ void		nfsd_lockd_shutdown(void);
 
 extern const u32 nfsd_suppattrs[3][3];
 
+static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr,
+				    struct nfs42_netaddr *netaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+	unsigned int port;
+	size_t ret_addr, ret_port;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		port = ntohs(sin->sin_port);
+		sprintf(netaddr->netid, "tcp");
+		netaddr->netid_len = 3;
+		break;
+	case AF_INET6:
+		port = ntohs(sin6->sin6_port);
+		sprintf(netaddr->netid, "tcp6");
+		netaddr->netid_len = 4;
+		break;
+	default:
+		return nfserr_inval;
+	}
+	ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr));
+	ret_port = snprintf(netaddr->addr + ret_addr,
+			    RPCBIND_MAXUADDRLEN + 1 - ret_addr,
+			    ".%u.%u", port >> 8, port & 0xff);
+	WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr);
+	netaddr->addr_len = ret_addr + ret_port;
+	return 0;
+}
+
 static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
 {
 	return !((bm1[0] & ~bm2[0]) ||
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 755e256a9103..56cfbc361561 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -35,15 +35,15 @@ typedef struct svc_fh {
 
 	bool			fh_locked;	/* inode locked by us */
 	bool			fh_want_write;	/* remount protection taken */
-
+	int			fh_flags;	/* FH flags */
 #ifdef CONFIG_NFSD_V3
 	bool			fh_post_saved;	/* post-op attrs saved */
 	bool			fh_pre_saved;	/* pre-op attrs saved */
 
 	/* Pre-op attributes saved during fh_lock */
 	__u64			fh_pre_size;	/* size before operation */
-	struct timespec		fh_pre_mtime;	/* mtime before oper */
-	struct timespec		fh_pre_ctime;	/* ctime before oper */
+	struct timespec64	fh_pre_mtime;	/* mtime before oper */
+	struct timespec64	fh_pre_ctime;	/* ctime before oper */
 	/*
 	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
 	 *  to find out if it is valid.
@@ -56,6 +56,9 @@ typedef struct svc_fh {
 #endif /* CONFIG_NFSD_V3 */
 
 } svc_fh;
+#define NFSD4_FH_FOREIGN (1<<0)
+#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
+#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f))
 
 enum nfsd_fsid {
 	FSID_DEV = 0,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c83ddac22f38..543bbe0a556e 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
 		 * Solaris, at least, doesn't seem to care what the time
 		 * request is.  We require it be within 30 minutes of now.
 		 */
-		time_t delta = iap->ia_atime.tv_sec - get_seconds();
+		time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds();
 
 		nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
 		if (nfserr)
@@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
 		}
 	}
 
-	nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+	nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0);
 done:
 	return nfsd_return_attrs(nfserr, resp);
 }
@@ -226,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 		return nfserr_io;
 	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
 			    argp->offset, rqstp->rq_vec, nvecs,
-			    &cnt, NFS_DATA_SYNC);
+			    &cnt, NFS_DATA_SYNC, NULL);
 	return nfsd_return_attrs(nfserr, resp);
 }
 
@@ -380,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
 		 */
 		attr->ia_valid &= ATTR_SIZE;
 		if (attr->ia_valid)
-			nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+			nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0);
 	}
 
 out_unlock:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e8bee8ff30c5..3b77b904212d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -31,6 +31,12 @@
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
+bool inter_copy_offload_enable;
+EXPORT_SYMBOL_GPL(inter_copy_offload_enable);
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+		 "Enable inter server to server copy offload. Default: false");
+
 extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -391,20 +397,25 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
 		ret = lockd_up(net, cred);
 		if (ret)
 			goto out_socks;
-		nn->lockd_up = 1;
+		nn->lockd_up = true;
 	}
 
-	ret = nfs4_state_start_net(net);
+	ret = nfsd_file_cache_start_net(net);
 	if (ret)
 		goto out_lockd;
+	ret = nfs4_state_start_net(net);
+	if (ret)
+		goto out_filecache;
 
 	nn->nfsd_net_up = true;
 	return 0;
 
+out_filecache:
+	nfsd_file_cache_shutdown_net(net);
 out_lockd:
 	if (nn->lockd_up) {
 		lockd_down(net);
-		nn->lockd_up = 0;
+		nn->lockd_up = false;
 	}
 out_socks:
 	nfsd_shutdown_generic();
@@ -415,11 +426,11 @@ static void nfsd_shutdown_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	nfsd_file_cache_purge(net);
+	nfsd_file_cache_shutdown_net(net);
 	nfs4_state_shutdown_net(net);
 	if (nn->lockd_up) {
 		lockd_down(net);
-		nn->lockd_up = 0;
+		nn->lockd_up = false;
 	}
 	nn->nfsd_net_up = false;
 	nfsd_shutdown_generic();
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d61b83b9654c..68d3f30ee760 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -56,6 +56,14 @@ typedef struct {
 	stateid_opaque_t        si_opaque;
 } stateid_t;
 
+typedef struct {
+	stateid_t		stid;
+#define NFS4_COPY_STID 1
+#define NFS4_COPYNOTIFY_STID 2
+	unsigned char		sc_type;
+	refcount_t		sc_count;
+} copy_stateid_t;
+
 #define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
 #define STATEID_VAL(s) \
 	(s)->si_opaque.so_clid.cl_boot, \
@@ -96,6 +104,7 @@ struct nfs4_stid {
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
 #define NFS4_LAYOUT_STID 64
+	struct list_head	sc_cp_list;
 	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
@@ -104,6 +113,17 @@ struct nfs4_stid {
 	void			(*sc_free)(struct nfs4_stid *);
 };
 
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+	copy_stateid_t		cp_stateid;
+	struct list_head	cp_list;	/* per parent nfs4_stid */
+	stateid_t		cp_p_stateid;	/* copy of parent's stateid */
+	clientid_t		cp_p_clid;	/* copy of parent's clid */
+	time64_t		cpntf_time;	/* last time stateid used */
+};
+
 /*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
@@ -132,7 +152,7 @@ struct nfs4_delegation {
 	struct list_head	dl_recall_lru;  /* delegation recalled */
 	struct nfs4_clnt_odstate *dl_clnt_odstate;
 	u32			dl_type;
-	time_t			dl_time;
+	time64_t		dl_time;
 /* For recall: */
 	int			dl_retries;
 	struct nfsd4_callback	dl_recall;
@@ -310,7 +330,7 @@ struct nfs4_client {
 #endif
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
-	time_t                  cl_time;        /* time of last lease renewal */
+	time64_t		cl_time;	/* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	bool			cl_mach_cred;	/* SP4_MACH_CRED in force */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
@@ -320,7 +340,7 @@ struct nfs4_client {
 	/* NFSv4.1 client implementation id: */
 	struct xdr_netobj	cl_nii_domain;
 	struct xdr_netobj	cl_nii_name;
-	struct timespec		cl_nii_time;
+	struct timespec64	cl_nii_time;
 
 	/* for v4.0 and v4.1 callbacks: */
 	struct nfs4_cb_conn	cl_cb_conn;
@@ -449,7 +469,7 @@ struct nfs4_openowner {
 	 */
 	struct list_head	oo_close_lru;
 	struct nfs4_ol_stateid *oo_last_closed_stid;
-	time_t			oo_time; /* time of placement on so_close_lru */
+	time64_t		oo_time; /* time of placement on so_close_lru */
 #define NFS4_OO_CONFIRMED   1
 	unsigned char		oo_flags;
 };
@@ -606,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
 struct nfsd4_blocked_lock {
 	struct list_head	nbl_list;
 	struct list_head	nbl_lru;
-	unsigned long		nbl_time;
+	time64_t		nbl_time;
 	struct file_lock	nbl_lock;
 	struct knfsd_fh		nbl_fh;
 	struct nfsd4_callback	nbl_cb;
@@ -618,14 +638,17 @@ struct nfsd4_copy;
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-		stateid_t *stateid, int flags, struct nfsd_file **filp);
+		stateid_t *stateid, int flags, struct nfsd_file **filp,
+		struct nfs4_stid **cstid);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     stateid_t *stateid, unsigned char typemask,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
 				  void (*sc_free)(struct nfs4_stid *));
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
-void nfs4_free_cp_state(struct nfsd4_copy *copy);
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_copy_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+			struct nfs4_stid *p_stid);
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
@@ -655,6 +678,11 @@ void put_nfs4_file(struct nfs4_file *fi);
 extern void nfs4_put_copy(struct nfsd4_copy *copy);
 extern struct nfsd4_copy *
 find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
+extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
+				 struct nfs4_cpntf_state *cps);
+extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+				 struct nfs4_client *clp,
+				 struct nfs4_cpntf_state **cps);
 static inline void get_nfs4_file(struct nfs4_file *fi)
 {
 	refcount_inc(&fi->fi_ref);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 9bce3b913189..b1bc582b0493 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -84,17 +84,17 @@ static int nfsd_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, nfsd_proc_show, NULL);
 }
 
-static const struct file_operations nfsd_proc_fops = {
-	.open = nfsd_proc_open,
-	.read  = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
+static const struct proc_ops nfsd_proc_ops = {
+	.proc_open	= nfsd_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
 };
 
 void
 nfsd_stat_init(void)
 {
-	svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
+	svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
 }
 
 void
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index ffc78a0e28b2..06dd0d337049 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -166,6 +166,12 @@ DEFINE_STATEID_EVENT(layout_recall_done);
 DEFINE_STATEID_EVENT(layout_recall_fail);
 DEFINE_STATEID_EVENT(layout_recall_release);
 
+TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
+TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
+TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
+
 #define show_nf_flags(val)						\
 	__print_flags(val, "|",						\
 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
@@ -195,7 +201,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
 	TP_fast_assign(
 		__entry->nf_hashval = nf->nf_hashval;
 		__entry->nf_inode = nf->nf_inode;
-		__entry->nf_ref = atomic_read(&nf->nf_ref);
+		__entry->nf_ref = refcount_read(&nf->nf_ref);
 		__entry->nf_flags = nf->nf_flags;
 		__entry->nf_may = nf->nf_may;
 		__entry->nf_file = nf->nf_file;
@@ -228,7 +234,7 @@ TRACE_EVENT(nfsd_file_acquire,
 	TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
 
 	TP_STRUCT__entry(
-		__field(__be32, xid)
+		__field(u32, xid)
 		__field(unsigned int, hash)
 		__field(void *, inode)
 		__field(unsigned int, may_flags)
@@ -236,27 +242,27 @@ TRACE_EVENT(nfsd_file_acquire,
 		__field(unsigned long, nf_flags)
 		__field(unsigned char, nf_may)
 		__field(struct file *, nf_file)
-		__field(__be32, status)
+		__field(u32, status)
 	),
 
 	TP_fast_assign(
-		__entry->xid = rqstp->rq_xid;
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
 		__entry->hash = hash;
 		__entry->inode = inode;
 		__entry->may_flags = may_flags;
-		__entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+		__entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
 		__entry->nf_flags = nf ? nf->nf_flags : 0;
 		__entry->nf_may = nf ? nf->nf_may : 0;
 		__entry->nf_file = nf ? nf->nf_file : NULL;
-		__entry->status = status;
+		__entry->status = be32_to_cpu(status);
 	),
 
 	TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
-			be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+			__entry->xid, __entry->hash, __entry->inode,
 			show_nf_may(__entry->may_flags), __entry->nf_ref,
 			show_nf_flags(__entry->nf_flags),
 			show_nf_may(__entry->nf_may), __entry->nf_file,
-			be32_to_cpu(__entry->status))
+			__entry->status)
 );
 
 DECLARE_EVENT_CLASS(nfsd_file_search_class,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c0dc491537a6..0aa02eb18bd3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -280,19 +280,25 @@ out:
  * Commit metadata changes to stable storage.
  */
 static int
-commit_metadata(struct svc_fh *fhp)
+commit_inode_metadata(struct inode *inode)
 {
-	struct inode *inode = d_inode(fhp->fh_dentry);
 	const struct export_operations *export_ops = inode->i_sb->s_export_op;
 
-	if (!EX_ISSYNC(fhp->fh_export))
-		return 0;
-
 	if (export_ops->commit_metadata)
 		return export_ops->commit_metadata(inode);
 	return sync_inode_metadata(inode, 1);
 }
 
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+	struct inode *inode = d_inode(fhp->fh_dentry);
+
+	if (!EX_ISSYNC(fhp->fh_export))
+		return 0;
+	return commit_inode_metadata(inode);
+}
+
 /*
  * Go over the attributes and take care of the small differences between
  * NFS semantics and what Linux expects.
@@ -358,7 +364,7 @@ out_nfserrno:
  */
 __be32
 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
-	     int check_guard, time_t guardtime)
+	     int check_guard, time64_t guardtime)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
@@ -524,23 +530,39 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 #endif
 
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
-		u64 dst_pos, u64 count, bool sync)
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+		struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
 {
+	struct file *src = nf_src->nf_file;
+	struct file *dst = nf_dst->nf_file;
 	loff_t cloned;
+	__be32 ret = 0;
 
+	down_write(&nf_dst->nf_rwsem);
 	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
-	if (cloned < 0)
-		return nfserrno(cloned);
-	if (count && cloned != count)
-		return nfserrno(-EINVAL);
+	if (cloned < 0) {
+		ret = nfserrno(cloned);
+		goto out_err;
+	}
+	if (count && cloned != count) {
+		ret = nfserrno(-EINVAL);
+		goto out_err;
+	}
 	if (sync) {
 		loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
 		int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
-		if (status < 0)
-			return nfserrno(status);
+
+		if (!status)
+			status = commit_inode_metadata(file_inode(src));
+		if (status < 0) {
+			nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+						 nfsd_net_id));
+			ret = nfserrno(status);
+		}
 	}
-	return 0;
+out_err:
+	up_write(&nf_dst->nf_rwsem);
+	return ret;
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -938,10 +960,12 @@ static int wait_for_concurrent_writes(struct file *file)
 }
 
 __be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 				loff_t offset, struct kvec *vec, int vlen,
-				unsigned long *cnt, int stable)
+				unsigned long *cnt, int stable,
+				__be32 *verf)
 {
+	struct file		*file = nf->nf_file;
 	struct svc_export	*exp;
 	struct iov_iter		iter;
 	__be32			nfserr;
@@ -972,9 +996,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		flags |= RWF_SYNC;
 
 	iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
-	host_err = vfs_iter_write(file, &iter, &pos, flags);
-	if (host_err < 0)
+	if (flags & RWF_SYNC) {
+		down_write(&nf->nf_rwsem);
+		host_err = vfs_iter_write(file, &iter, &pos, flags);
+		if (host_err < 0)
+			nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+						 nfsd_net_id));
+		up_write(&nf->nf_rwsem);
+	} else {
+		down_read(&nf->nf_rwsem);
+		if (verf)
+			nfsd_copy_boot_verifier(verf,
+					net_generic(SVC_NET(rqstp),
+					nfsd_net_id));
+		host_err = vfs_iter_write(file, &iter, &pos, flags);
+		up_read(&nf->nf_rwsem);
+	}
+	if (host_err < 0) {
+		nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+					 nfsd_net_id));
 		goto out_nfserr;
+	}
+	*cnt = host_err;
 	nfsdstats.io_write += *cnt;
 	fsnotify_modify(file);
 
@@ -1036,7 +1079,8 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
  */
 __be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
-	   struct kvec *vec, int vlen, unsigned long *cnt, int stable)
+	   struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+	   __be32 *verf)
 {
 	struct nfsd_file *nf;
 	__be32 err;
@@ -1047,8 +1091,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	if (err)
 		goto out;
 
-	err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
-			vlen, cnt, stable);
+	err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
+			vlen, cnt, stable, verf);
 	nfsd_file_put(nf);
 out:
 	trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
@@ -1067,7 +1111,7 @@ out:
  */
 __be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
-               loff_t offset, unsigned long count)
+               loff_t offset, unsigned long count, __be32 *verf)
 {
 	struct nfsd_file	*nf;
 	loff_t			end = LLONG_MAX;
@@ -1086,10 +1130,14 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (err)
 		goto out;
 	if (EX_ISSYNC(fhp->fh_export)) {
-		int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+		int err2;
 
+		down_write(&nf->nf_rwsem);
+		err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
 		switch (err2) {
 		case 0:
+			nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+						nfsd_net_id));
 			break;
 		case -EINVAL:
 			err = nfserr_notsupp;
@@ -1099,7 +1147,10 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			nfsd_reset_boot_verifier(net_generic(nf->nf_net,
 						 nfsd_net_id));
 		}
-	}
+		up_write(&nf->nf_rwsem);
+	} else
+		nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+					nfsd_net_id));
 
 	nfsd_file_put(nf);
 out:
@@ -1123,7 +1174,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
 	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
 		iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
 	if (iap->ia_valid)
-		return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+		return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
 	/* Callers expect file metadata to be committed here */
 	return nfserrno(commit_metadata(resfhp));
 }
@@ -1386,7 +1437,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			    && d_inode(dchild)->i_atime.tv_sec == v_atime
 			    && d_inode(dchild)->i_size  == 0 ) {
 				if (created)
-					*created = 1;
+					*created = true;
 				break;
 			}
 			/* fall through */
@@ -1395,7 +1446,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			    && d_inode(dchild)->i_atime.tv_sec == v_atime
 			    && d_inode(dchild)->i_size  == 0 ) {
 				if (created)
-					*created = 1;
+					*created = true;
 				goto set_attr;
 			}
 			/* fall through */
@@ -1412,7 +1463,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 	}
 	if (created)
-		*created = 1;
+		*created = true;
 
 	nfsd_check_ignore_resizing(iap);
 
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index cc110a10bfe8..3eb660ad80d1 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -34,6 +34,8 @@
 #define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
 #define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
 
+struct nfsd_file;
+
 /*
  * Callback function for readdir
  */
@@ -48,15 +50,16 @@ __be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
 				const char *, unsigned int,
 				struct svc_export **, struct dentry **);
 __be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
-				struct iattr *, int, time_t);
+				struct iattr *, int, time64_t);
 int nfsd_mountpoint(struct dentry *, struct svc_export *);
 #ifdef CONFIG_NFSD_V4
 __be32          nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
 		    struct xdr_netobj *);
 __be32		nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
 				    struct file *, loff_t, loff_t, int);
-__be32		nfsd4_clone_file_range(struct file *, u64, struct file *,
-				       u64, u64, bool);
+__be32		nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+				       struct nfsd_file *nf_dst, u64 dst_pos,
+				       u64 count, bool sync);
 #endif /* CONFIG_NFSD_V4 */
 __be32		nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
@@ -71,7 +74,7 @@ __be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 				struct svc_fh *res, int createmode,
 				u32 *verifier, bool *truncp, bool *created);
 __be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
-				loff_t, unsigned long);
+				loff_t, unsigned long, __be32 *verf);
 #endif /* CONFIG_NFSD_V3 */
 int 		nfsd_open_break_lease(struct inode *, int);
 __be32		nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
@@ -91,11 +94,12 @@ __be32 		nfsd_read(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *, int, unsigned long *,
 				u32 *eof);
 __be32 		nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
-				struct kvec *, int, unsigned long *, int);
+				struct kvec *, int, unsigned long *,
+				int stable, __be32 *verf);
 __be32		nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
-				struct file *file, loff_t offset,
+				struct nfsd_file *nf, loff_t offset,
 				struct kvec *vec, int vlen, unsigned long *cnt,
-				int stable);
+				int stable, __be32 *verf);
 __be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
 				char *, int *);
 __be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 99ff9f403ff1..4155fd71714c 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
 	struct svc_fh		fh;
 	struct iattr		attrs;
 	int			check_guard;
-	time_t			guardtime;
+	time64_t		guardtime;
 };
 
 struct nfsd3_diropargs {
@@ -159,6 +159,7 @@ struct nfsd3_writeres {
 	struct svc_fh		fh;
 	unsigned long		count;
 	int			committed;
+	__be32			verf[2];
 };
 
 struct nfsd3_renameres {
@@ -223,6 +224,7 @@ struct nfsd3_pathconfres {
 struct nfsd3_commitres {
 	__be32			status;
 	struct svc_fh		fh;
+	__be32			verf[2];
 };
 
 struct nfsd3_getaclres {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f4737d66ee98..db63d39b1507 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -46,9 +46,9 @@
 #define CURRENT_STATE_ID_FLAG (1<<0)
 #define SAVED_STATE_ID_FLAG (1<<1)
 
-#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
-#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
-#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
+#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f))
+#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f))
+#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f))
 
 struct nfsd4_compound_state {
 	struct svc_fh		current_fh;
@@ -221,6 +221,7 @@ struct nfsd4_lookup {
 struct nfsd4_putfh {
 	u32		pf_fhlen;           /* request */
 	char		*pf_fhval;          /* request */
+	bool		no_verify;	    /* represents foreigh fh */
 };
 
 struct nfsd4_open {
@@ -518,11 +519,13 @@ struct nfsd42_write_res {
 
 struct nfsd4_copy {
 	/* request */
-	stateid_t	cp_src_stateid;
-	stateid_t	cp_dst_stateid;
-	u64		cp_src_pos;
-	u64		cp_dst_pos;
-	u64		cp_count;
+	stateid_t		cp_src_stateid;
+	stateid_t		cp_dst_stateid;
+	u64			cp_src_pos;
+	u64			cp_dst_pos;
+	u64			cp_count;
+	struct nl4_server	cp_src;
+	bool			cp_intra;
 
 	/* both */
 	bool		cp_synchronous;
@@ -540,13 +543,18 @@ struct nfsd4_copy {
 	struct nfsd_file        *nf_src;
 	struct nfsd_file        *nf_dst;
 
-	stateid_t		cp_stateid;
+	copy_stateid_t		cp_stateid;
 
 	struct list_head	copies;
 	struct task_struct	*copy_task;
 	refcount_t		refcount;
 	bool			stopped;
+
+	struct vfsmount		*ss_mnt;
+	struct nfs_fh		c_fh;
+	nfs4_stateid		stateid;
 };
+extern bool inter_copy_offload_enable;
 
 struct nfsd4_seek {
 	/* request */
@@ -568,6 +576,18 @@ struct nfsd4_offload_status {
 	u32		status;
 };
 
+struct nfsd4_copy_notify {
+	/* request */
+	stateid_t		cpn_src_stateid;
+	struct nl4_server	cpn_dst;
+
+	/* response */
+	stateid_t		cpn_cnr_stateid;
+	u64			cpn_sec;
+	u32			cpn_nsec;
+	struct nl4_server	cpn_src;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	const struct nfsd4_operation *		opdesc;
@@ -627,6 +647,7 @@ struct nfsd4_op {
 		struct nfsd4_clone		clone;
 		struct nfsd4_copy		copy;
 		struct nfsd4_offload_status	offload_status;
+		struct nfsd4_copy_notify	copy_notify;
 		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 6c7388430ad3..d4359a1df3d5 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2899,18 +2899,12 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 			ia_valid |= ATTR_MTIME | ATTR_CTIME;
 		}
 	}
-	if (ia_valid & ATTR_ATIME) {
-		vi->i_atime = timestamp_truncate(attr->ia_atime,
-					       vi);
-	}
-	if (ia_valid & ATTR_MTIME) {
-		vi->i_mtime = timestamp_truncate(attr->ia_mtime,
-					       vi);
-	}
-	if (ia_valid & ATTR_CTIME) {
-		vi->i_ctime = timestamp_truncate(attr->ia_ctime,
-					       vi);
-	}
+	if (ia_valid & ATTR_ATIME)
+		vi->i_atime = attr->ia_atime;
+	if (ia_valid & ATTR_MTIME)
+		vi->i_mtime = attr->ia_mtime;
+	if (ia_valid & ATTR_CTIME)
+		vi->i_ctime = attr->ia_ctime;
 	mark_inode_dirty(vi);
 out:
 	return err;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9876db52913a..6cd5e4924e4d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2101,17 +2101,15 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
 static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
 					    struct buffer_head **di_bh,
 					    int meta_level,
-					    int overwrite_io,
 					    int write_sem,
 					    int wait)
 {
 	int ret = 0;
 
 	if (wait)
-		ret = ocfs2_inode_lock(inode, NULL, meta_level);
+		ret = ocfs2_inode_lock(inode, di_bh, meta_level);
 	else
-		ret = ocfs2_try_inode_lock(inode,
-			overwrite_io ? NULL : di_bh, meta_level);
+		ret = ocfs2_try_inode_lock(inode, di_bh, meta_level);
 	if (ret < 0)
 		goto out;
 
@@ -2136,6 +2134,7 @@ static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
 
 out_unlock:
 	brelse(*di_bh);
+	*di_bh = NULL;
 	ocfs2_inode_unlock(inode, meta_level);
 out:
 	return ret;
@@ -2177,7 +2176,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 		ret = ocfs2_inode_lock_for_extent_tree(inode,
 						       &di_bh,
 						       meta_level,
-						       overwrite_io,
 						       write_sem,
 						       wait);
 		if (ret < 0) {
@@ -2233,13 +2231,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 							   &di_bh,
 							   meta_level,
 							   write_sem);
+			meta_level = 1;
+			write_sem = 1;
 			ret = ocfs2_inode_lock_for_extent_tree(inode,
 							       &di_bh,
 							       meta_level,
-							       overwrite_io,
-							       1,
+							       write_sem,
 							       wait);
-			write_sem = 1;
 			if (ret < 0) {
 				if (ret != -EAGAIN)
 					mlog_errno(ret);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 4180c3ef0a68..939df99d2dec 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -696,7 +696,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
 					       ac, cl);
-	if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC))
+	if (PTR_ERR(bg_bh) == -ENOSPC)
 		bg_bh = ocfs2_block_group_alloc_discontig(handle,
 							  alloc_inode,
 							  ac, cl);
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 25543a966c48..29eaa4544372 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -273,6 +273,7 @@ static void *help_start(struct seq_file *m, loff_t *pos)
 
 static void *help_next(struct seq_file *m, void *v, loff_t *pos)
 {
+	(*pos)++;
 	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
 
 	return NULL;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6220642fe113..9fc47c2e078d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -24,7 +24,7 @@
 
 static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
 {
-	pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
+	pr_warn("\"check_copy_up\" module option is obsolete\n");
 	return 0;
 }
 
@@ -123,6 +123,9 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	loff_t old_pos = 0;
 	loff_t new_pos = 0;
 	loff_t cloned;
+	loff_t data_pos = -1;
+	loff_t hole_len;
+	bool skip_hole = false;
 	int error = 0;
 
 	if (len == 0)
@@ -144,7 +147,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
 
-	/* FIXME: copy up sparse files efficiently */
+	/* Check if lower fs supports seek operation */
+	if (old_file->f_mode & FMODE_LSEEK &&
+	    old_file->f_op->llseek)
+		skip_hole = true;
+
 	while (len) {
 		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
 		long bytes;
@@ -157,6 +164,36 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 			break;
 		}
 
+		/*
+		 * Fill zero for hole will cost unnecessary disk space
+		 * and meanwhile slow down the copy-up speed, so we do
+		 * an optimization for hole during copy-up, it relies
+		 * on SEEK_DATA implementation in lower fs so if lower
+		 * fs does not support it, copy-up will behave as before.
+		 *
+		 * Detail logic of hole detection as below:
+		 * When we detect next data position is larger than current
+		 * position we will skip that hole, otherwise we copy
+		 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
+		 * it may not recognize all kind of holes and sometimes
+		 * only skips partial of hole area. However, it will be
+		 * enough for most of the use cases.
+		 */
+
+		if (skip_hole && data_pos < old_pos) {
+			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
+			if (data_pos > old_pos) {
+				hole_len = data_pos - old_pos;
+				len -= hole_len;
+				old_pos = new_pos = data_pos;
+				continue;
+			} else if (data_pos == -ENXIO) {
+				break;
+			} else if (data_pos < 0) {
+				skip_hole = false;
+			}
+		}
+
 		bytes = do_splice_direct(old_file, &old_pos,
 					 new_file, &new_pos,
 					 this_len, SPLICE_F_MOVE);
@@ -480,7 +517,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	}
 
 	inode_lock(temp->d_inode);
-	if (c->metacopy)
+	if (S_ISREG(c->stat.mode))
 		err = ovl_set_size(temp, &c->stat);
 	if (!err)
 		err = ovl_set_attr(temp, &c->stat);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 29abdb1d3b5c..8e57d5372b8f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -35,7 +35,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 	dput(wdentry);
 
 	if (err) {
-		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+		pr_err("cleanup of '%pd2' failed (%i)\n",
 		       wdentry, err);
 	}
 
@@ -53,7 +53,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir)
 
 	temp = lookup_one_len(name, workdir, strlen(name));
 	if (!IS_ERR(temp) && temp->d_inode) {
-		pr_err("overlayfs: workdir/%s already exists\n", name);
+		pr_err("workdir/%s already exists\n", name);
 		dput(temp);
 		temp = ERR_PTR(-EIO);
 	}
@@ -134,7 +134,7 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
 	d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
 			   dentry->d_name.len);
 	if (IS_ERR(d)) {
-		pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
+		pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
 			dentry, err);
 		return PTR_ERR(d);
 	}
@@ -267,7 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
 
 	d_instantiate(dentry, inode);
 	if (inode != oip.newinode) {
-		pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
+		pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
 				    dentry);
 	}
 
@@ -1009,7 +1009,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 		spin_unlock(&dentry->d_lock);
 	} else {
 		kfree(redirect);
-		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+		pr_warn_ratelimited("failed to set redirect (%i)\n",
 				    err);
 		/* Fall back to userspace copy-up */
 		err = -EXDEV;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 70e55588aedc..6f54d70cef27 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -30,7 +30,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
 	}
 
 	if (err) {
-		pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+		pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
 				    dentry, err);
 	}
 
@@ -244,7 +244,7 @@ out:
 	return err;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+	pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
 			    dentry, err, buflen, fh ? (int)fh->fb.len : 0,
 			    fh ? fh->fb.type : 0);
 	goto out;
@@ -358,7 +358,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
  */
 static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 					  struct dentry *real,
-					  struct ovl_layer *layer)
+					  const struct ovl_layer *layer)
 {
 	struct inode *dir = d_inode(connected);
 	struct dentry *this, *parent = NULL;
@@ -406,7 +406,7 @@ out:
 	return this;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+	pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
 			    real, layer->idx, connected, err);
 	this = ERR_PTR(err);
 	goto out;
@@ -414,17 +414,16 @@ fail:
 
 static struct dentry *ovl_lookup_real(struct super_block *sb,
 				      struct dentry *real,
-				      struct ovl_layer *layer);
+				      const struct ovl_layer *layer);
 
 /*
  * Lookup an indexed or hashed overlay dentry by real inode.
  */
 static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
 					    struct dentry *real,
-					    struct ovl_layer *layer)
+					    const struct ovl_layer *layer)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
-	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
 	struct dentry *index = NULL;
 	struct dentry *this = NULL;
 	struct inode *inode;
@@ -466,7 +465,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
 		 * recursive call walks back from indexed upper to the topmost
 		 * connected/hashed upper parent (or up to root).
 		 */
-		this = ovl_lookup_real(sb, upper, &upper_layer);
+		this = ovl_lookup_real(sb, upper, &ofs->layers[0]);
 		dput(upper);
 	}
 
@@ -487,7 +486,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
  */
 static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
 					       struct dentry *real,
-					       struct ovl_layer *layer)
+					       const struct ovl_layer *layer)
 {
 	struct dentry *next, *parent = NULL;
 	struct dentry *ancestor = ERR_PTR(-EIO);
@@ -540,7 +539,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
  */
 static struct dentry *ovl_lookup_real(struct super_block *sb,
 				      struct dentry *real,
-				      struct ovl_layer *layer)
+				      const struct ovl_layer *layer)
 {
 	struct dentry *connected;
 	int err = 0;
@@ -631,7 +630,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
 	return connected;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+	pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
 			    real, layer->idx, connected, err);
 	dput(connected);
 	return ERR_PTR(err);
@@ -646,8 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
 				     struct dentry *index)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
-	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
-	struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+	const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
 	struct dentry *real = upper ?: (index ?: lowerpath->dentry);
 
 	/*
@@ -822,7 +820,7 @@ out:
 	return dentry;
 
 out_err:
-	pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+	pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
 			    fh_len, fh_type, flags, err);
 	dentry = ERR_PTR(err);
 	goto out;
@@ -831,7 +829,7 @@ out_err:
 static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
 				       int fh_len, int fh_type)
 {
-	pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+	pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
 	return ERR_PTR(-EACCES);
 }
 
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a635d9ec..a5317216de73 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -9,8 +9,19 @@
 #include <linux/xattr.h>
 #include <linux/uio.h>
 #include <linux/uaccess.h>
+#include <linux/splice.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
 #include "overlayfs.h"
 
+struct ovl_aio_req {
+	struct kiocb iocb;
+	struct kiocb *orig_iocb;
+	struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 {
 	if (realinode != ovl_inode_upper(inode))
@@ -146,7 +157,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 	struct inode *inode = file_inode(file);
 	struct fd real;
 	const struct cred *old_cred;
-	ssize_t ret;
+	loff_t ret;
 
 	/*
 	 * The two special cases below do not need to involve real fs,
@@ -171,7 +182,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 	 * limitations that are more strict than ->s_maxbytes for specific
 	 * files, so we use the real file to perform seeks.
 	 */
-	inode_lock(inode);
+	ovl_inode_lock(inode);
 	real.file->f_pos = file->f_pos;
 
 	old_cred = ovl_override_creds(inode->i_sb);
@@ -179,7 +190,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 	revert_creds(old_cred);
 
 	file->f_pos = real.file->f_pos;
-	inode_unlock(inode);
+	ovl_inode_unlock(inode);
 
 	fdput(real);
 
@@ -225,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 	return flags;
 }
 
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+	struct kiocb *iocb = &aio_req->iocb;
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	if (iocb->ki_flags & IOCB_WRITE) {
+		struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+		file_end_write(iocb->ki_filp);
+		ovl_copyattr(ovl_inode_real(inode), inode);
+	}
+
+	orig_iocb->ki_pos = iocb->ki_pos;
+	fdput(aio_req->fd);
+	kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb,
+						   struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	ovl_aio_cleanup_handler(aio_req);
+	orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -240,10 +278,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		return ret;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-			    ovl_iocb_to_rwf(iocb));
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+				    ovl_iocb_to_rwf(iocb));
+	} else {
+		struct ovl_aio_req *aio_req;
+
+		ret = -ENOMEM;
+		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		aio_req->fd = real;
+		real.flags = 0;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+		if (ret != -EIOCBQUEUED)
+			ovl_aio_cleanup_handler(aio_req);
+	}
+out:
 	revert_creds(old_cred);
-
 	ovl_file_accessed(file);
 
 	fdput(real);
@@ -274,15 +330,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		goto out_unlock;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	file_start_write(real.file);
-	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-			     ovl_iocb_to_rwf(iocb));
-	file_end_write(real.file);
+	if (is_sync_kiocb(iocb)) {
+		file_start_write(real.file);
+		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+				     ovl_iocb_to_rwf(iocb));
+		file_end_write(real.file);
+		/* Update size */
+		ovl_copyattr(ovl_inode_real(inode), inode);
+	} else {
+		struct ovl_aio_req *aio_req;
+
+		ret = -ENOMEM;
+		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		file_start_write(real.file);
+		aio_req->fd = real;
+		real.flags = 0;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+		if (ret != -EIOCBQUEUED)
+			ovl_aio_cleanup_handler(aio_req);
+	}
+out:
 	revert_creds(old_cred);
-
-	/* Update size */
-	ovl_copyattr(ovl_inode_real(inode), inode);
-
 	fdput(real);
 
 out_unlock:
@@ -291,6 +365,48 @@ out_unlock:
 	return ret;
 }
 
+static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
+			 struct pipe_inode_info *pipe, size_t len,
+			 unsigned int flags)
+{
+	ssize_t ret;
+	struct fd real;
+	const struct cred *old_cred;
+
+	ret = ovl_real_fdget(in, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(in)->i_sb);
+	ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
+	revert_creds(old_cred);
+
+	ovl_file_accessed(in);
+	fdput(real);
+	return ret;
+}
+
+static ssize_t
+ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	ssize_t ret;
+
+	ret = ovl_real_fdget(out, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(out)->i_sb);
+	ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+	revert_creds(old_cred);
+
+	ovl_file_accessed(out);
+	fdput(real);
+	return ret;
+}
+
 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct fd real;
@@ -647,7 +763,25 @@ const struct file_operations ovl_file_operations = {
 	.fadvise	= ovl_fadvise,
 	.unlocked_ioctl	= ovl_ioctl,
 	.compat_ioctl	= ovl_compat_ioctl,
+	.splice_read    = ovl_splice_read,
+	.splice_write   = ovl_splice_write,
 
 	.copy_file_range	= ovl_copy_file_range,
 	.remap_file_range	= ovl_remap_file_range,
 };
+
+int __init ovl_aio_request_cache_init(void)
+{
+	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+						   sizeof(struct ovl_aio_req),
+						   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ovl_aio_request_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+	kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b045cf1826fc..79e8994e3bc1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -75,10 +75,9 @@ out:
 	return err;
 }
 
-static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
-			   struct ovl_layer *lower_layer)
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
 {
-	bool samefs = ovl_same_sb(dentry->d_sb);
+	bool samefs = ovl_same_fs(dentry->d_sb);
 	unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
 
 	if (samefs) {
@@ -100,12 +99,10 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
 		 * persistent for a given layer configuration.
 		 */
 		if (stat->ino >> shift) {
-			pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+			pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
 					    dentry, stat->ino, xinobits);
 		} else {
-			if (lower_layer)
-				stat->ino |= ((u64)lower_layer->fsid) << shift;
-
+			stat->ino |= ((u64)fsid) << shift;
 			stat->dev = dentry->d_sb->s_dev;
 			return 0;
 		}
@@ -124,15 +121,14 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
 		 */
 		stat->dev = dentry->d_sb->s_dev;
 		stat->ino = dentry->d_inode->i_ino;
-	} else if (lower_layer && lower_layer->fsid) {
+	} else {
 		/*
 		 * For non-samefs setup, if we cannot map all layers st_ino
 		 * to a unified address space, we need to make sure that st_dev
-		 * is unique per lower fs. Upper layer uses real st_dev and
-		 * lower layers use the unique anonymous bdev assigned to the
-		 * lower fs.
+		 * is unique per underlying fs, so we use the unique anonymous
+		 * bdev assigned to the underlying fs.
 		 */
-		stat->dev = lower_layer->fs->pseudo_dev;
+		stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
 	}
 
 	return 0;
@@ -146,8 +142,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	struct path realpath;
 	const struct cred *old_cred;
 	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
-	bool samefs = ovl_same_sb(dentry->d_sb);
-	struct ovl_layer *lower_layer = NULL;
+	int fsid = 0;
 	int err;
 	bool metacopy_blocks = false;
 
@@ -168,9 +163,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	 * If lower filesystem supports NFS file handles, this also guaranties
 	 * persistent st_ino across mount cycle.
 	 */
-	if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+	if (!is_dir || ovl_same_dev(dentry->d_sb)) {
 		if (!OVL_TYPE_UPPER(type)) {
-			lower_layer = ovl_layer_lower(dentry);
+			fsid = ovl_layer_lower(dentry)->fsid;
 		} else if (OVL_TYPE_ORIGIN(type)) {
 			struct kstat lowerstat;
 			u32 lowermask = STATX_INO | STATX_BLOCKS |
@@ -200,14 +195,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 			if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
 			    (!ovl_verify_lower(dentry->d_sb) &&
 			     (is_dir || lowerstat.nlink == 1))) {
-				lower_layer = ovl_layer_lower(dentry);
-				/*
-				 * Cannot use origin st_dev;st_ino because
-				 * origin inode content may differ from overlay
-				 * inode content.
-				 */
-				if (samefs || lower_layer->fsid)
-					stat->ino = lowerstat.ino;
+				fsid = ovl_layer_lower(dentry)->fsid;
+				stat->ino = lowerstat.ino;
 			}
 
 			/*
@@ -241,7 +230,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 		}
 	}
 
-	err = ovl_map_dev_ino(dentry, stat, lower_layer);
+	err = ovl_map_dev_ino(dentry, stat, fsid);
 	if (err)
 		goto out;
 
@@ -527,6 +516,27 @@ static const struct address_space_operations ovl_aops = {
  * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
  * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
  * [...] &type->i_mutex_dir_key        (stack_depth=0)
+ *
+ * Locking order w.r.t ovl_want_write() is important for nested overlayfs.
+ *
+ * This chain is valid:
+ * - inode->i_rwsem			(inode_lock[2])
+ * - upper_mnt->mnt_sb->s_writers	(ovl_want_write[0])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[2])
+ * - OVL_I(lowerinode)->lock		(ovl_inode_lock[1])
+ *
+ * And this chain is valid:
+ * - inode->i_rwsem			(inode_lock[2])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[2])
+ * - lowerinode->i_rwsem		(inode_lock[1])
+ * - OVL_I(lowerinode)->lock		(ovl_inode_lock[1])
+ *
+ * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is
+ * held, because it is in reverse order of the non-nested case using the same
+ * upper fs:
+ * - inode->i_rwsem			(inode_lock[1])
+ * - upper_mnt->mnt_sb->s_writers	(ovl_want_write[0])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[1])
  */
 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
 
@@ -565,7 +575,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
 	 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
 	 * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
 	 */
-	if (ovl_same_sb(inode->i_sb) || xinobits) {
+	if (ovl_same_dev(inode->i_sb)) {
 		inode->i_ino = ino;
 		if (xinobits && fsid && !(ino >> (64 - xinobits)))
 			inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
@@ -698,7 +708,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
 	return nlink;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+	pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n",
 			    upperdentry, err);
 	return fallback;
 }
@@ -969,7 +979,7 @@ out:
 	return inode;
 
 out_err:
-	pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err);
+	pr_warn_ratelimited("failed to get inode (%i)\n", err);
 	inode = ERR_PTR(err);
 	goto out;
 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 76ff66339173..ed9e129fae04 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -141,10 +141,10 @@ out:
 	return NULL;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	pr_warn_ratelimited("failed to get origin (%i)\n", res);
 	goto out;
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
 	goto out;
 }
 
@@ -322,16 +322,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 	struct dentry *origin = NULL;
 	int i;
 
-	for (i = 0; i < ofs->numlower; i++) {
+	for (i = 1; i < ofs->numlayer; i++) {
 		/*
 		 * If lower fs uuid is not unique among lower fs we cannot match
 		 * fh->uuid to layer.
 		 */
-		if (ofs->lower_layers[i].fsid &&
-		    ofs->lower_layers[i].fs->bad_uuid)
+		if (ofs->layers[i].fsid &&
+		    ofs->layers[i].fs->bad_uuid)
 			continue;
 
-		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+		origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
 					    connected);
 		if (origin)
 			break;
@@ -354,13 +354,13 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 	}
 	**stackp = (struct ovl_path){
 		.dentry = origin,
-		.layer = &ofs->lower_layers[i]
+		.layer = &ofs->layers[i]
 	};
 
 	return 0;
 
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
 			    d_inode(origin)->i_mode & S_IFMT);
 	dput(origin);
@@ -449,7 +449,7 @@ out:
 
 fail:
 	inode = d_inode(real);
-	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
 			    is_upper ? "upper" : "origin", real,
 			    inode ? inode->i_ino : 0, err);
 	goto out;
@@ -475,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 		return upper ?: ERR_PTR(-ESTALE);
 
 	if (!d_is_dir(upper)) {
-		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
 				    index, upper);
 		dput(upper);
 		return ERR_PTR(-EIO);
@@ -589,12 +589,12 @@ out:
 	return err;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
 			    index, d_inode(index)->i_mode & S_IFMT, err);
 	goto out;
 
 orphan:
-	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
 			    index, d_inode(index)->i_mode & S_IFMT,
 			    d_inode(index)->i_nlink);
 	err = -ENOENT;
@@ -696,7 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 			index = NULL;
 			goto out;
 		}
-		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
 				    d_inode(origin)->i_ino, name.len, name.name,
 				    err);
@@ -723,13 +723,13 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 		 * unlinked, which means that finding a lower origin on lookup
 		 * whose index is a whiteout should be treated as an error.
 		 */
-		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
 				    index, d_inode(index)->i_mode & S_IFMT,
 				    d_inode(origin)->i_mode & S_IFMT);
 		goto fail;
 	} else if (is_dir && verify) {
 		if (!upper) {
-			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
 					    origin, index);
 			goto fail;
 		}
@@ -738,7 +738,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 		err = ovl_verify_upper(index, upper, false);
 		if (err) {
 			if (err == -ESTALE) {
-				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
 						    upper, origin, index);
 			}
 			goto fail;
@@ -885,7 +885,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	if (!d.stop && poe->numlower) {
 		err = -ENOMEM;
-		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
+		stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
 				GFP_KERNEL);
 		if (!stack)
 			goto out_put_upper;
@@ -967,7 +967,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		 */
 		err = -EPERM;
 		if (d.redirect && !ofs->config.redirect_follow) {
-			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
 					    dentry);
 			goto out_put;
 		}
@@ -994,7 +994,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 		err = -EPERM;
 		if (!ofs->config.metacopy) {
-			pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
 					    dentry);
 			goto out_put;
 		}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f283b1d69a9e..3623d28aa4fa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,9 @@
 #include <linux/fs.h>
 #include "ovl_entry.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt) "overlayfs: " fmt
+
 enum ovl_path_type {
 	__OVL_PATH_UPPER	= (1 << 0),
 	__OVL_PATH_MERGE	= (1 << 1),
@@ -221,7 +224,6 @@ int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
-struct super_block *ovl_same_sb(struct super_block *sb);
 int ovl_can_decode_fh(struct super_block *sb);
 struct dentry *ovl_indexdir(struct super_block *sb);
 bool ovl_index_all(struct super_block *sb);
@@ -237,7 +239,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
 struct dentry *ovl_i_dentry_upper(struct inode *inode);
 struct inode *ovl_inode_upper(struct inode *inode);
@@ -299,11 +301,21 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
 }
 
-static inline unsigned int ovl_xino_bits(struct super_block *sb)
+/* All layers on same fs? */
+static inline bool ovl_same_fs(struct super_block *sb)
+{
+	return OVL_FS(sb)->xino_mode == 0;
+}
+
+/* All overlay inodes have same st_dev? */
+static inline bool ovl_same_dev(struct super_block *sb)
 {
-	struct ovl_fs *ofs = sb->s_fs_info;
+	return OVL_FS(sb)->xino_mode >= 0;
+}
 
-	return ofs->xino_bits;
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+	return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
 }
 
 static inline int ovl_inode_lock(struct inode *inode)
@@ -438,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
 
 /* file.c */
 extern const struct file_operations ovl_file_operations;
+int __init ovl_aio_request_cache_init(void);
+void ovl_aio_request_cache_destroy(void);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 28348c44ea5b..89015ea822e7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -24,6 +24,8 @@ struct ovl_sb {
 	dev_t pseudo_dev;
 	/* Unusable (conflicting) uuid */
 	bool bad_uuid;
+	/* Used as a lower layer (but maybe also as upper) */
+	bool is_lower;
 };
 
 struct ovl_layer {
@@ -38,18 +40,18 @@ struct ovl_layer {
 };
 
 struct ovl_path {
-	struct ovl_layer *layer;
+	const struct ovl_layer *layer;
 	struct dentry *dentry;
 };
 
 /* private information held for overlayfs's superblock */
 struct ovl_fs {
 	struct vfsmount *upper_mnt;
-	unsigned int numlower;
-	/* Number of unique lower sb that differ from upper sb */
-	unsigned int numlowerfs;
-	struct ovl_layer *lower_layers;
-	struct ovl_sb *lower_fs;
+	unsigned int numlayer;
+	/* Number of unique fs among layers including upper fs */
+	unsigned int numfs;
+	const struct ovl_layer *layers;
+	struct ovl_sb *fs;
 	/* workbasedir is the path at workdir= mount option */
 	struct dentry *workbasedir;
 	/* workdir is the 'work' directory under workbasedir */
@@ -71,10 +73,15 @@ struct ovl_fs {
 	struct inode *workbasedir_trap;
 	struct inode *workdir_trap;
 	struct inode *indexdir_trap;
-	/* Inode numbers in all layers do not use the high xino_bits */
-	unsigned int xino_bits;
+	/* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
+	int xino_mode;
 };
 
+static inline struct ovl_fs *OVL_FS(struct super_block *sb)
+{
+	return (struct ovl_fs *)sb->s_fs_info;
+}
+
 /* private information held for every overlayfs dentry */
 struct ovl_entry {
 	union {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 47a91c9733a5..40ac9ce2465a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -441,7 +441,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
 			       const char *name, int namelen)
 {
 	if (ino >> (64 - xinobits)) {
-		pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+		pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
 				    namelen, name, ino, xinobits);
 		return ino;
 	}
@@ -469,7 +469,7 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
 	int xinobits = ovl_xino_bits(dir->d_sb);
 	int err = 0;
 
-	if (!ovl_same_sb(dir->d_sb) && !xinobits)
+	if (!ovl_same_dev(dir->d_sb))
 		goto out;
 
 	if (p->name[0] == '.') {
@@ -504,7 +504,13 @@ get:
 		if (err)
 			goto fail;
 
-		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+		/*
+		 * Directory inode is always on overlay st_dev.
+		 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+		 * of xino bits overflow.
+		 */
+		WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+			     dir->d_sb->s_dev != stat.dev);
 		ino = stat.ino;
 	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
 		ino = ovl_remap_lower_ino(ino, xinobits,
@@ -518,7 +524,7 @@ out:
 	return err;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
+	pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
 			    p->name, err);
 	goto out;
 }
@@ -685,7 +691,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
 	int err;
 	struct ovl_dir_file *od = file->private_data;
 	struct dentry *dir = file->f_path.dentry;
-	struct ovl_layer *lower_layer = ovl_layer_lower(dir);
+	const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
 	struct ovl_readdir_translate rdt = {
 		.ctx.actor = ovl_fill_real,
 		.orig_ctx = ctx,
@@ -738,7 +744,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 		 * entries.
 		 */
 		if (ovl_xino_bits(dentry->d_sb) ||
-		    (ovl_same_sb(dentry->d_sb) &&
+		    (ovl_same_fs(dentry->d_sb) &&
 		     (ovl_is_impure_dir(file) ||
 		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
 			return ovl_iterate_real(file, ctx);
@@ -965,7 +971,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 
 		dentry = lookup_one_len(p->name, upper, p->len);
 		if (IS_ERR(dentry)) {
-			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+			pr_err("lookup '%s/%.*s' failed (%i)\n",
 			       upper->d_name.name, p->len, p->name,
 			       (int) PTR_ERR(dentry));
 			continue;
@@ -1147,6 +1153,6 @@ next:
 out:
 	ovl_cache_free(&list);
 	if (err)
-		pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+		pr_err("failed index dir cleanup (%i)\n", err);
 	return err;
 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7621ff176d15..319fe0d355b0 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -224,14 +224,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	if (ofs->upperdir_locked)
 		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
 	mntput(ofs->upper_mnt);
-	for (i = 0; i < ofs->numlower; i++) {
-		iput(ofs->lower_layers[i].trap);
-		mntput(ofs->lower_layers[i].mnt);
+	for (i = 1; i < ofs->numlayer; i++) {
+		iput(ofs->layers[i].trap);
+		mntput(ofs->layers[i].mnt);
 	}
-	for (i = 0; i < ofs->numlowerfs; i++)
-		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
-	kfree(ofs->lower_layers);
-	kfree(ofs->lower_fs);
+	kfree(ofs->layers);
+	for (i = 0; i < ofs->numfs; i++)
+		free_anon_bdev(ofs->fs[i].pseudo_dev);
+	kfree(ofs->fs);
 
 	kfree(ofs->config.lowerdir);
 	kfree(ofs->config.upperdir);
@@ -358,7 +358,7 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ofs->config.nfs_export != ovl_nfs_export_def)
 		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 						"on" : "off");
-	if (ofs->config.xino != ovl_xino_def())
+	if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
 		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 	if (ofs->config.metacopy != ovl_metacopy_def)
 		seq_printf(m, ",metacopy=%s",
@@ -462,7 +462,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 		if (ovl_redirect_always_follow)
 			config->redirect_follow = true;
 	} else if (strcmp(mode, "nofollow") != 0) {
-		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+		pr_err("bad mount option \"redirect_dir=%s\"\n",
 		       mode);
 		return -EINVAL;
 	}
@@ -560,14 +560,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			break;
 
 		default:
-			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
+			pr_err("unrecognized mount option \"%s\" or missing value\n",
+					p);
 			return -EINVAL;
 		}
 	}
 
 	/* Workdir is useless in non-upper mount */
 	if (!config->upperdir && config->workdir) {
-		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+		pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 			config->workdir);
 		kfree(config->workdir);
 		config->workdir = NULL;
@@ -587,7 +588,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 	/* Resolve metacopy -> redirect_dir dependency */
 	if (config->metacopy && !config->redirect_dir) {
 		if (metacopy_opt && redirect_opt) {
-			pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+			pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
 			       config->redirect_mode);
 			return -EINVAL;
 		}
@@ -596,7 +597,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			 * There was an explicit redirect_dir=... that resulted
 			 * in this conflict.
 			 */
-			pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+			pr_info("disabling metacopy due to redirect_dir=%s\n",
 				config->redirect_mode);
 			config->metacopy = false;
 		} else {
@@ -692,7 +693,7 @@ out_unlock:
 out_dput:
 	dput(work);
 out_err:
-	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
 		ofs->config.workdir, name, -err);
 	work = NULL;
 	goto out_unlock;
@@ -716,21 +717,21 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
 	int err = -EINVAL;
 
 	if (!*name) {
-		pr_err("overlayfs: empty lowerdir\n");
+		pr_err("empty lowerdir\n");
 		goto out;
 	}
 	err = kern_path(name, LOOKUP_FOLLOW, path);
 	if (err) {
-		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
+		pr_err("failed to resolve '%s': %i\n", name, err);
 		goto out;
 	}
 	err = -EINVAL;
 	if (ovl_dentry_weird(path->dentry)) {
-		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
+		pr_err("filesystem on '%s' not supported\n", name);
 		goto out_put;
 	}
 	if (!d_is_dir(path->dentry)) {
-		pr_err("overlayfs: '%s' not a directory\n", name);
+		pr_err("'%s' not a directory\n", name);
 		goto out_put;
 	}
 	return 0;
@@ -752,7 +753,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
 
 		if (!err)
 			if (ovl_dentry_remote(path->dentry)) {
-				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
+				pr_err("filesystem on '%s' not supported as upperdir\n",
 				       tmp);
 				path_put_init(path);
 				err = -EINVAL;
@@ -769,7 +770,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 	int err = vfs_statfs(path, &statfs);
 
 	if (err)
-		pr_err("overlayfs: statfs failed on '%s'\n", name);
+		pr_err("statfs failed on '%s'\n", name);
 	else
 		ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 
@@ -804,13 +805,13 @@ static int ovl_lower_dir(const char *name, struct path *path,
 	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 		ofs->config.index = false;
 		ofs->config.nfs_export = false;
-		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 			name);
 	}
 
 	/* Check if lower fs has 32bit inode numbers */
 	if (fh_type != FILEID_INO32_GEN)
-		ofs->xino_bits = 0;
+		ofs->xino_mode = -1;
 
 	return 0;
 
@@ -996,7 +997,7 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
 	err = PTR_ERR_OR_ZERO(trap);
 	if (err) {
 		if (err == -ELOOP)
-			pr_err("overlayfs: conflicting %s path\n", name);
+			pr_err("conflicting %s path\n", name);
 		return err;
 	}
 
@@ -1013,11 +1014,11 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
 {
 	if (ofs->config.index) {
-		pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
+		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
 		       name);
 		return -EBUSY;
 	} else {
-		pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
+		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
 			name);
 		return 0;
 	}
@@ -1035,7 +1036,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 
 	/* Upper fs should not be r/o */
 	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
-		pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+		pr_err("upper fs is r/o, try multi-lower layers mount\n");
 		err = -EINVAL;
 		goto out;
 	}
@@ -1052,7 +1053,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 	upper_mnt = clone_private_mount(upperpath);
 	err = PTR_ERR(upper_mnt);
 	if (IS_ERR(upper_mnt)) {
-		pr_err("overlayfs: failed to clone upperpath\n");
+		pr_err("failed to clone upperpath\n");
 		goto out;
 	}
 
@@ -1108,7 +1109,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 	 * kernel upgrade. So warn instead of erroring out.
 	 */
 	if (!err)
-		pr_warn("overlayfs: upper fs needs to support d_type.\n");
+		pr_warn("upper fs needs to support d_type.\n");
 
 	/* Check if upper/work fs supports O_TMPFILE */
 	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
@@ -1116,7 +1117,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 	if (ofs->tmpfile)
 		dput(temp);
 	else
-		pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+		pr_warn("upper fs does not support tmpfile.\n");
 
 	/*
 	 * Check if upper/work fs supports trusted.overlay.* xattr
@@ -1126,7 +1127,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 		ofs->noxattr = true;
 		ofs->config.index = false;
 		ofs->config.metacopy = false;
-		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
+		pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
 		err = 0;
 	} else {
 		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
@@ -1136,16 +1137,16 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
 	if (ofs->config.index && !fh_type) {
 		ofs->config.index = false;
-		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
 	}
 
 	/* Check if upper fs has 32bit inode numbers */
 	if (fh_type != FILEID_INO32_GEN)
-		ofs->xino_bits = 0;
+		ofs->xino_mode = -1;
 
 	/* NFS export of r/w mount depends on index */
 	if (ofs->config.nfs_export && !ofs->config.index) {
-		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
 		ofs->config.nfs_export = false;
 	}
 out:
@@ -1165,11 +1166,11 @@ static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
 
 	err = -EINVAL;
 	if (upperpath->mnt != workpath.mnt) {
-		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+		pr_err("workdir and upperdir must reside under the same mount\n");
 		goto out;
 	}
 	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
-		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+		pr_err("workdir and upperdir must be separate subtrees\n");
 		goto out;
 	}
 
@@ -1210,7 +1211,7 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
 				true);
 	if (err) {
-		pr_err("overlayfs: failed to verify upper root origin\n");
+		pr_err("failed to verify upper root origin\n");
 		goto out;
 	}
 
@@ -1233,18 +1234,18 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
 						upperpath->dentry, true, false);
 			if (err)
-				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+				pr_err("failed to verify index dir 'origin' xattr\n");
 		}
 		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
 		if (err)
-			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
+			pr_err("failed to verify index dir 'upper' xattr\n");
 
 		/* Cleanup bad/stale/orphan index entries */
 		if (!err)
 			err = ovl_indexdir_cleanup(ofs);
 	}
 	if (err || !ofs->indexdir)
-		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
 
 out:
 	mnt_drop_write(mnt);
@@ -1258,7 +1259,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
 	if (!ofs->config.nfs_export && !ofs->upper_mnt)
 		return true;
 
-	for (i = 0; i < ofs->numlowerfs; i++) {
+	for (i = 0; i < ofs->numfs; i++) {
 		/*
 		 * We use uuid to associate an overlay lower file handle with a
 		 * lower layer, so we can accept lower fs with null uuid as long
@@ -1266,8 +1267,9 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
 		 * if we detect multiple lower fs with the same uuid, we
 		 * disable lower file handle decoding on all of them.
 		 */
-		if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) {
-			ofs->lower_fs[i].bad_uuid = true;
+		if (ofs->fs[i].is_lower &&
+		    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
+			ofs->fs[i].bad_uuid = true;
 			return false;
 		}
 	}
@@ -1283,13 +1285,9 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 	int err;
 	bool bad_uuid = false;
 
-	/* fsid 0 is reserved for upper fs even with non upper overlay */
-	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
-		return 0;
-
-	for (i = 0; i < ofs->numlowerfs; i++) {
-		if (ofs->lower_fs[i].sb == sb)
-			return i + 1;
+	for (i = 0; i < ofs->numfs; i++) {
+		if (ofs->fs[i].sb == sb)
+			return i;
 	}
 
 	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
@@ -1297,7 +1295,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 		if (ofs->config.index || ofs->config.nfs_export) {
 			ofs->config.index = false;
 			ofs->config.nfs_export = false;
-			pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+			pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
 				uuid_is_null(&sb->s_uuid) ? "null" :
 							    "conflicting",
 				path->dentry);
@@ -1306,35 +1304,59 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 
 	err = get_anon_bdev(&dev);
 	if (err) {
-		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+		pr_err("failed to get anonymous bdev for lowerpath\n");
 		return err;
 	}
 
-	ofs->lower_fs[ofs->numlowerfs].sb = sb;
-	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
-	ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid;
-	ofs->numlowerfs++;
+	ofs->fs[ofs->numfs].sb = sb;
+	ofs->fs[ofs->numfs].pseudo_dev = dev;
+	ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
 
-	return ofs->numlowerfs;
+	return ofs->numfs++;
 }
 
-static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
-				struct path *stack, unsigned int numlower)
+static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
+			  struct path *stack, unsigned int numlower)
 {
 	int err;
 	unsigned int i;
+	struct ovl_layer *layers;
 
 	err = -ENOMEM;
-	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
-				    GFP_KERNEL);
-	if (ofs->lower_layers == NULL)
+	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+	if (!layers)
 		goto out;
+	ofs->layers = layers;
 
-	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
-				GFP_KERNEL);
-	if (ofs->lower_fs == NULL)
+	ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
+	if (ofs->fs == NULL)
 		goto out;
 
+	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
+	ofs->numfs++;
+
+	layers[0].mnt = ofs->upper_mnt;
+	layers[0].idx = 0;
+	layers[0].fsid = 0;
+	ofs->numlayer = 1;
+
+	/*
+	 * All lower layers that share the same fs as upper layer, use the same
+	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
+	 * only overlay to simplify ovl_fs_free().
+	 * is_lower will be set if upper fs is shared with a lower layer.
+	 */
+	err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
+	if (err) {
+		pr_err("failed to get anonymous bdev for upper fs\n");
+		goto out;
+	}
+
+	if (ofs->upper_mnt) {
+		ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
+		ofs->fs[0].is_lower = false;
+	}
+
 	for (i = 0; i < numlower; i++) {
 		struct vfsmount *mnt;
 		struct inode *trap;
@@ -1357,7 +1379,7 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
 		mnt = clone_private_mount(&stack[i]);
 		err = PTR_ERR(mnt);
 		if (IS_ERR(mnt)) {
-			pr_err("overlayfs: failed to clone lowerpath\n");
+			pr_err("failed to clone lowerpath\n");
 			iput(trap);
 			goto out;
 		}
@@ -1368,15 +1390,13 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
 		 */
 		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
 
-		ofs->lower_layers[ofs->numlower].trap = trap;
-		ofs->lower_layers[ofs->numlower].mnt = mnt;
-		ofs->lower_layers[ofs->numlower].idx = i + 1;
-		ofs->lower_layers[ofs->numlower].fsid = fsid;
-		if (fsid) {
-			ofs->lower_layers[ofs->numlower].fs =
-				&ofs->lower_fs[fsid - 1];
-		}
-		ofs->numlower++;
+		layers[ofs->numlayer].trap = trap;
+		layers[ofs->numlayer].mnt = mnt;
+		layers[ofs->numlayer].idx = ofs->numlayer;
+		layers[ofs->numlayer].fsid = fsid;
+		layers[ofs->numlayer].fs = &ofs->fs[fsid];
+		ofs->numlayer++;
+		ofs->fs[fsid].is_lower = true;
 	}
 
 	/*
@@ -1387,22 +1407,23 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
 	 * bits reserved for fsid, it emits a warning and uses the original
 	 * inode number.
 	 */
-	if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
-		ofs->xino_bits = 0;
-		ofs->config.xino = OVL_XINO_OFF;
-	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+	if (ofs->numfs - !ofs->upper_mnt == 1) {
+		if (ofs->config.xino == OVL_XINO_ON)
+			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
+		ofs->xino_mode = 0;
+	} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
 		/*
-		 * This is a roundup of number of bits needed for numlowerfs+1
-		 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
-		 * upper fs even with non upper overlay.
+		 * This is a roundup of number of bits needed for encoding
+		 * fsid, where fsid 0 is reserved for upper fs even with
+		 * lower only overlay.
 		 */
 		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
-		ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+		ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
 	}
 
-	if (ofs->xino_bits) {
-		pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
-			ofs->xino_bits);
+	if (ofs->xino_mode > 0) {
+		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
+			ofs->xino_mode);
 	}
 
 	err = 0;
@@ -1428,15 +1449,15 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
 	err = -EINVAL;
 	stacklen = ovl_split_lowerdirs(lowertmp);
 	if (stacklen > OVL_MAX_STACK) {
-		pr_err("overlayfs: too many lower directories, limit is %d\n",
+		pr_err("too many lower directories, limit is %d\n",
 		       OVL_MAX_STACK);
 		goto out_err;
 	} else if (!ofs->config.upperdir && stacklen == 1) {
-		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
 		goto out_err;
 	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
 		   ofs->config.redirect_follow) {
-		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+		pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
 		ofs->config.nfs_export = false;
 	}
 
@@ -1459,11 +1480,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
 	err = -EINVAL;
 	sb->s_stack_depth++;
 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
-		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+		pr_err("maximum fs stacking depth exceeded\n");
 		goto out_err;
 	}
 
-	err = ovl_get_lower_layers(sb, ofs, stack, numlower);
+	err = ovl_get_layers(sb, ofs, stack, numlower);
 	if (err)
 		goto out_err;
 
@@ -1474,7 +1495,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
 
 	for (i = 0; i < numlower; i++) {
 		oe->lowerstack[i].dentry = dget(stack[i].dentry);
-		oe->lowerstack[i].layer = &ofs->lower_layers[i];
+		oe->lowerstack[i].layer = &ofs->layers[i+1];
 	}
 
 	if (remote)
@@ -1515,7 +1536,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
 	while (!err && parent != next) {
 		if (ovl_lookup_trap_inode(sb, parent)) {
 			err = -ELOOP;
-			pr_err("overlayfs: overlapping %s path\n", name);
+			pr_err("overlapping %s path\n", name);
 		} else if (ovl_is_inuse(parent)) {
 			err = ovl_report_in_use(ofs, name);
 		}
@@ -1555,9 +1576,9 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
 			return err;
 	}
 
-	for (i = 0; i < ofs->numlower; i++) {
+	for (i = 1; i < ofs->numlayer; i++) {
 		err = ovl_check_layer(sb, ofs,
-				      ofs->lower_layers[i].mnt->mnt_root,
+				      ofs->layers[i].mnt->mnt_root,
 				      "lowerdir");
 		if (err)
 			return err;
@@ -1595,7 +1616,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	err = -EINVAL;
 	if (!ofs->config.lowerdir) {
 		if (!silent)
-			pr_err("overlayfs: missing 'lowerdir'\n");
+			pr_err("missing 'lowerdir'\n");
 		goto out_err;
 	}
 
@@ -1603,14 +1624,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
 	if (ofs->config.xino != OVL_XINO_OFF)
-		ofs->xino_bits = BITS_PER_LONG - 32;
+		ofs->xino_mode = BITS_PER_LONG - 32;
 
 	/* alloc/destroy_inode needed for setting up traps in inode cache */
 	sb->s_op = &ovl_super_operations;
 
 	if (ofs->config.upperdir) {
 		if (!ofs->config.workdir) {
-			pr_err("overlayfs: missing 'workdir'\n");
+			pr_err("missing 'workdir'\n");
 			goto out_err;
 		}
 
@@ -1660,13 +1681,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ofs->indexdir) {
 		ofs->config.index = false;
 		if (ofs->upper_mnt && ofs->config.nfs_export) {
-			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
 			ofs->config.nfs_export = false;
 		}
 	}
 
 	if (ofs->config.metacopy && ofs->config.nfs_export) {
-		pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
 		ofs->config.nfs_export = false;
 	}
 
@@ -1749,9 +1770,15 @@ static int __init ovl_init(void)
 	if (ovl_inode_cachep == NULL)
 		return -ENOMEM;
 
-	err = register_filesystem(&ovl_fs_type);
-	if (err)
-		kmem_cache_destroy(ovl_inode_cachep);
+	err = ovl_aio_request_cache_init();
+	if (!err) {
+		err = register_filesystem(&ovl_fs_type);
+		if (!err)
+			return 0;
+
+		ovl_aio_request_cache_destroy();
+	}
+	kmem_cache_destroy(ovl_inode_cachep);
 
 	return err;
 }
@@ -1766,7 +1793,7 @@ static void __exit ovl_exit(void)
 	 */
 	rcu_barrier();
 	kmem_cache_destroy(ovl_inode_cachep);
-
+	ovl_aio_request_cache_destroy();
 }
 
 module_init(ovl_init);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f5678a3f8350..ea005085803f 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -40,18 +40,6 @@ const struct cred *ovl_override_creds(struct super_block *sb)
 	return override_creds(ofs->creator_cred);
 }
 
-struct super_block *ovl_same_sb(struct super_block *sb)
-{
-	struct ovl_fs *ofs = sb->s_fs_info;
-
-	if (!ofs->numlowerfs)
-		return ofs->upper_mnt->mnt_sb;
-	else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
-		return ofs->lower_fs[0].sb;
-	else
-		return NULL;
-}
-
 /*
  * Check if underlying fs supports file handles and try to determine encoding
  * type, in order to deduce maximum inode number used by fs.
@@ -198,7 +186,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry)
 	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
 }
 
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
 
@@ -576,7 +564,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
 
 	if (err == -EOPNOTSUPP) {
-		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+		pr_warn("cannot set %s xattr on upper\n", name);
 		ofs->noxattr = true;
 		return xerr;
 	}
@@ -700,7 +688,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 
 	inode = d_inode(upperdentry);
 	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
-		pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
 				    upperdentry, inode->i_ino, inode->i_nlink);
 		/*
 		 * We either have a bug with persistent union nlink or a lower
@@ -739,7 +727,7 @@ out:
 	return;
 
 fail:
-	pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+	pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
 	goto out;
 }
 
@@ -830,7 +818,7 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
 err_unlock:
 	unlock_rename(workdir, upperdir);
 err:
-	pr_err("overlayfs: failed to lock workdir+upperdir\n");
+	pr_err("failed to lock workdir+upperdir\n");
 	return -EIO;
 }
 
@@ -852,7 +840,7 @@ int ovl_check_metacopy_xattr(struct dentry *dentry)
 
 	return 1;
 out:
-	pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+	pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
 	return res;
 }
 
@@ -899,7 +887,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
 	return res;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to get xattr %s: err=%zi)\n",
+	pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n",
 			    name, res);
 	kfree(buf);
 	return res;
@@ -931,7 +919,7 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
 
 	return buf;
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+	pr_warn_ratelimited("invalid redirect (%s)\n", buf);
 	res = -EINVAL;
 	kfree(buf);
 	return ERR_PTR(res);
diff --git a/fs/pipe.c b/fs/pipe.c
index 57502c3c0fba..5a34d6c22d4c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe)
 {
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT(rdwait);
+	DEFINE_WAIT(wrwait);
 
 	/*
 	 * Pipes are system-local resources, so sleeping on them
 	 * is considered a noninteractive wait:
 	 */
-	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
 	pipe_unlock(pipe);
 	schedule();
-	finish_wait(&pipe->wait, &wait);
+	finish_wait(&pipe->rd_wait, &rdwait);
+	finish_wait(&pipe->wr_wait, &wrwait);
 	pipe_lock(pipe);
 }
 
@@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	size_t total_len = iov_iter_count(to);
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	bool was_full;
+	bool was_full, wake_next_reader = false;
 	ssize_t ret;
 
 	/* Null read succeeds. */
@@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->wait.lock);
+				spin_lock_irq(&pipe->rd_wait.lock);
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->wait.lock);
+				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		 * no data.
 		 */
 		if (unlikely(was_full)) {
-			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 		}
 
@@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		 * since we've done any required wakeups and there's no need
 		 * to mark anything accessed. And we've dropped the lock.
 		 */
-		if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0)
+		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
 		__pipe_lock(pipe);
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+		wake_next_reader = true;
 	}
+	if (pipe_empty(pipe->head, pipe->tail))
+		wake_next_reader = false;
 	__pipe_unlock(pipe);
 
 	if (was_full) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
+	if (wake_next_reader)
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	if (ret > 0)
 		file_accessed(filp);
 	return ret;
@@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
 	bool was_empty = false;
+	bool wake_next_writer = false;
 
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
@@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->wait.lock);
+			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->wait.lock);
+				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->wait.lock);
+			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		 */
 		__pipe_unlock(pipe);
 		if (was_empty) {
-			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		}
-		wait_event_interruptible(pipe->wait, pipe_writable(pipe));
+		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
 		__pipe_lock(pipe);
 		was_empty = pipe_empty(pipe->head, pipe->tail);
+		wake_next_writer = true;
 	}
 out:
+	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+		wake_next_writer = false;
 	__pipe_unlock(pipe);
 
 	/*
@@ -596,9 +608,11 @@ out:
 	 * wake up pending jobs
 	 */
 	if (was_empty) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
+	if (wake_next_writer)
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
 		int err = file_update_time(filp);
 		if (err)
@@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait)
 	unsigned int head, tail;
 
 	/*
-	 * Reading only -- no need for acquiring the semaphore.
+	 * Reading pipe state only -- no need for acquiring the semaphore.
 	 *
 	 * But because this is racy, the code has to add the
 	 * entry to the poll table _first_ ..
 	 */
-	poll_wait(filp, &pipe->wait, wait);
+	if (filp->f_mode & FMODE_READ)
+		poll_wait(filp, &pipe->rd_wait, wait);
+	if (filp->f_mode & FMODE_WRITE)
+		poll_wait(filp, &pipe->wr_wait, wait);
 
 	/*
 	 * .. and only then can you do the racy tests. That way,
@@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file)
 		pipe->writers--;
 
 	if (pipe->readers || pipe->writers) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP);
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
@@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
 			     GFP_KERNEL_ACCOUNT);
 
 	if (pipe->bufs) {
-		init_waitqueue_head(&pipe->wait);
+		init_waitqueue_head(&pipe->rd_wait);
+		init_waitqueue_head(&pipe->wr_wait);
 		pipe->r_counter = pipe->w_counter = 1;
 		pipe->max_usage = pipe_bufs;
 		pipe->ring_size = pipe_bufs;
@@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 
 static void wake_up_partner(struct pipe_inode_info *pipe)
 {
-	wake_up_interruptible(&pipe->wait);
+	wake_up_interruptible(&pipe->rd_wait);
+	wake_up_interruptible(&pipe->wr_wait);
 }
 
 static int fifo_open(struct inode *inode, struct file *filp)
@@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp)
 
 err_rd:
 	if (!--pipe->readers)
-		wake_up_interruptible(&pipe->wait);
+		wake_up_interruptible(&pipe->wr_wait);
 	ret = -ERESTARTSYS;
 	goto err;
 
 err_wr:
 	if (!--pipe->writers)
-		wake_up_interruptible(&pipe->wait);
+		wake_up_interruptible(&pipe->rd_wait);
 	ret = -ERESTARTSYS;
 	goto err;
 
@@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	pipe->max_usage = nr_slots;
 	pipe->tail = tail;
 	pipe->head = head;
-	wake_up_interruptible_all(&pipe->wait);
+	wake_up_interruptible_all(&pipe->rd_wait);
+	wake_up_interruptible_all(&pipe->wr_wait);
 	return pipe->max_usage * PAGE_SIZE;
 
 out_revert_acct:
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index ead487e80510..bd08616ed8ba 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -33,3 +33,4 @@ proc-$(CONFIG_PROC_KCORE)	+= kcore.o
 proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PRINTK)	+= kmsg.o
 proc-$(CONFIG_PROC_PAGE_MONITOR)	+= page.o
+proc-$(CONFIG_BOOT_CONFIG)	+= bootconfig.o
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
new file mode 100644
index 000000000000..9955d75c0585
--- /dev/null
+++ b/fs/proc/bootconfig.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/bootconfig - Extra boot configuration
+ */
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/bootconfig.h>
+#include <linux/slab.h>
+
+static char *saved_boot_config;
+
+static int boot_config_proc_show(struct seq_file *m, void *v)
+{
+	if (saved_boot_config)
+		seq_puts(m, saved_boot_config);
+	return 0;
+}
+
+/* Rest size of buffer */
+#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
+
+/* Return the needed total length if @size is 0 */
+static int __init copy_xbc_key_value_list(char *dst, size_t size)
+{
+	struct xbc_node *leaf, *vnode;
+	const char *val;
+	char *key, *end = dst + size;
+	int ret = 0;
+
+	key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL);
+
+	xbc_for_each_key_value(leaf, val) {
+		ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+		if (ret < 0)
+			break;
+		ret = snprintf(dst, rest(dst, end), "%s = ", key);
+		if (ret < 0)
+			break;
+		dst += ret;
+		vnode = xbc_node_get_child(leaf);
+		if (vnode && xbc_node_is_array(vnode)) {
+			xbc_array_for_each_value(vnode, val) {
+				ret = snprintf(dst, rest(dst, end), "\"%s\"%s",
+					val, vnode->next ? ", " : "\n");
+				if (ret < 0)
+					goto out;
+				dst += ret;
+			}
+		} else {
+			ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val);
+			if (ret < 0)
+				break;
+			dst += ret;
+		}
+	}
+out:
+	kfree(key);
+
+	return ret < 0 ? ret : dst - (end - size);
+}
+
+static int __init proc_boot_config_init(void)
+{
+	int len;
+
+	len = copy_xbc_key_value_list(NULL, 0);
+	if (len < 0)
+		return len;
+
+	if (len > 0) {
+		saved_boot_config = kzalloc(len + 1, GFP_KERNEL);
+		if (!saved_boot_config)
+			return -ENOMEM;
+
+		len = copy_xbc_key_value_list(saved_boot_config, len + 1);
+		if (len < 0) {
+			kfree(saved_boot_config);
+			return len;
+		}
+	}
+
+	proc_create_single("bootconfig", 0, NULL, boot_config_proc_show);
+
+	return 0;
+}
+fs_initcall(proc_boot_config_init);
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index 96f1087e372c..c1dea9b8222e 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -16,16 +16,16 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
 	return seq_open(file, &cpuinfo_op);
 }
 
-static const struct file_operations proc_cpuinfo_operations = {
-	.open		= cpuinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
+static const struct proc_ops cpuinfo_proc_ops = {
+	.proc_open	= cpuinfo_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
 };
 
 static int __init proc_cpuinfo_init(void)
 {
-	proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
+	proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops);
 	return 0;
 }
 fs_initcall(proc_cpuinfo_init);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 074e9585c699..3faed94e4b65 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -473,7 +473,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
 	ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
 		ent->data = data;
-		ent->proc_fops = &proc_dir_operations;
+		ent->proc_dir_ops = &proc_dir_operations;
 		ent->proc_iops = &proc_dir_inode_operations;
 		ent = proc_register(parent, ent);
 	}
@@ -503,7 +503,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
 	ent = __proc_create(&parent, name, mode, 2);
 	if (ent) {
 		ent->data = NULL;
-		ent->proc_fops = NULL;
+		ent->proc_dir_ops = NULL;
 		ent->proc_iops = NULL;
 		ent = proc_register(parent, ent);
 	}
@@ -533,25 +533,23 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
 
 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 		struct proc_dir_entry *parent,
-		const struct file_operations *proc_fops, void *data)
+		const struct proc_ops *proc_ops, void *data)
 {
 	struct proc_dir_entry *p;
 
-	BUG_ON(proc_fops == NULL);
-
 	p = proc_create_reg(name, mode, &parent, data);
 	if (!p)
 		return NULL;
-	p->proc_fops = proc_fops;
+	p->proc_ops = proc_ops;
 	return proc_register(parent, p);
 }
 EXPORT_SYMBOL(proc_create_data);
  
 struct proc_dir_entry *proc_create(const char *name, umode_t mode,
 				   struct proc_dir_entry *parent,
-				   const struct file_operations *proc_fops)
+				   const struct proc_ops *proc_ops)
 {
-	return proc_create_data(name, mode, parent, proc_fops, NULL);
+	return proc_create_data(name, mode, parent, proc_ops, NULL);
 }
 EXPORT_SYMBOL(proc_create);
 
@@ -573,11 +571,11 @@ static int proc_seq_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-static const struct file_operations proc_seq_fops = {
-	.open		= proc_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= proc_seq_release,
+static const struct proc_ops proc_seq_ops = {
+	.proc_open	= proc_seq_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= proc_seq_release,
 };
 
 struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
@@ -589,7 +587,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
 	p = proc_create_reg(name, mode, &parent, data);
 	if (!p)
 		return NULL;
-	p->proc_fops = &proc_seq_fops;
+	p->proc_ops = &proc_seq_ops;
 	p->seq_ops = ops;
 	p->state_size = state_size;
 	return proc_register(parent, p);
@@ -603,11 +601,11 @@ static int proc_single_open(struct inode *inode, struct file *file)
 	return single_open(file, de->single_show, de->data);
 }
 
-static const struct file_operations proc_single_fops = {
-	.open		= proc_single_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+static const struct proc_ops proc_single_ops = {
+	.proc_open	= proc_single_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
 };
 
 struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
@@ -619,7 +617,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
 	p = proc_create_reg(name, mode, &parent, data);
 	if (!p)
 		return NULL;
-	p->proc_fops = &proc_single_fops;
+	p->proc_ops = &proc_single_ops;
 	p->single_show = show;
 	return proc_register(parent, p);
 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index dbe43a50caf2..6da18316d209 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -163,7 +163,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		pdeo->closing = true;
 		spin_unlock(&pde->pde_unload_lock);
 		file = pdeo->file;
-		pde->proc_fops->release(file_inode(file), file);
+		pde->proc_ops->proc_release(file_inode(file), file);
 		spin_lock(&pde->pde_unload_lock);
 		/* After ->release. */
 		list_del(&pdeo->lh);
@@ -200,12 +200,12 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	loff_t rv = -EINVAL;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, llseek) llseek;
+		typeof_member(struct proc_ops, proc_lseek) lseek;
 
-		llseek = pde->proc_fops->llseek;
-		if (!llseek)
-			llseek = default_llseek;
-		rv = llseek(file, offset, whence);
+		lseek = pde->proc_ops->proc_lseek;
+		if (!lseek)
+			lseek = default_llseek;
+		rv = lseek(file, offset, whence);
 		unuse_pde(pde);
 	}
 	return rv;
@@ -216,9 +216,9 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, read) read;
+		typeof_member(struct proc_ops, proc_read) read;
 
-		read = pde->proc_fops->read;
+		read = pde->proc_ops->proc_read;
 		if (read)
 			rv = read(file, buf, count, ppos);
 		unuse_pde(pde);
@@ -231,9 +231,9 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, write) write;
+		typeof_member(struct proc_ops, proc_write) write;
 
-		write = pde->proc_fops->write;
+		write = pde->proc_ops->proc_write;
 		if (write)
 			rv = write(file, buf, count, ppos);
 		unuse_pde(pde);
@@ -246,9 +246,9 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	__poll_t rv = DEFAULT_POLLMASK;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, poll) poll;
+		typeof_member(struct proc_ops, proc_poll) poll;
 
-		poll = pde->proc_fops->poll;
+		poll = pde->proc_ops->proc_poll;
 		if (poll)
 			rv = poll(file, pts);
 		unuse_pde(pde);
@@ -261,9 +261,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, unlocked_ioctl) ioctl;
+		typeof_member(struct proc_ops, proc_ioctl) ioctl;
 
-		ioctl = pde->proc_fops->unlocked_ioctl;
+		ioctl = pde->proc_ops->proc_ioctl;
 		if (ioctl)
 			rv = ioctl(file, cmd, arg);
 		unuse_pde(pde);
@@ -277,9 +277,9 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, compat_ioctl) compat_ioctl;
+		typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
 
-		compat_ioctl = pde->proc_fops->compat_ioctl;
+		compat_ioctl = pde->proc_ops->proc_compat_ioctl;
 		if (compat_ioctl)
 			rv = compat_ioctl(file, cmd, arg);
 		unuse_pde(pde);
@@ -293,9 +293,9 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	int rv = -EIO;
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, mmap) mmap;
+		typeof_member(struct proc_ops, proc_mmap) mmap;
 
-		mmap = pde->proc_fops->mmap;
+		mmap = pde->proc_ops->proc_mmap;
 		if (mmap)
 			rv = mmap(file, vma);
 		unuse_pde(pde);
@@ -312,9 +312,9 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
 	unsigned long rv = -EIO;
 
 	if (use_pde(pde)) {
-		typeof_member(struct file_operations, get_unmapped_area) get_area;
+		typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
 
-		get_area = pde->proc_fops->get_unmapped_area;
+		get_area = pde->proc_ops->proc_get_unmapped_area;
 #ifdef CONFIG_MMU
 		if (!get_area)
 			get_area = current->mm->get_unmapped_area;
@@ -333,8 +333,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 {
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
-	typeof_member(struct file_operations, open) open;
-	typeof_member(struct file_operations, release) release;
+	typeof_member(struct proc_ops, proc_open) open;
+	typeof_member(struct proc_ops, proc_release) release;
 	struct pde_opener *pdeo;
 
 	/*
@@ -351,7 +351,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	if (!use_pde(pde))
 		return -ENOENT;
 
-	release = pde->proc_fops->release;
+	release = pde->proc_ops->proc_release;
 	if (release) {
 		pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
 		if (!pdeo) {
@@ -360,7 +360,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 		}
 	}
 
-	open = pde->proc_fops->open;
+	open = pde->proc_ops->proc_open;
 	if (open)
 		rv = open(inode, file);
 
@@ -468,21 +468,23 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 			inode->i_size = de->size;
 		if (de->nlink)
 			set_nlink(inode, de->nlink);
-		WARN_ON(!de->proc_iops);
-		inode->i_op = de->proc_iops;
-		if (de->proc_fops) {
-			if (S_ISREG(inode->i_mode)) {
+
+		if (S_ISREG(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
+			inode->i_fop = &proc_reg_file_ops;
 #ifdef CONFIG_COMPAT
-				if (!de->proc_fops->compat_ioctl)
-					inode->i_fop =
-						&proc_reg_file_ops_no_compat;
-				else
-#endif
-					inode->i_fop = &proc_reg_file_ops;
-			} else {
-				inode->i_fop = de->proc_fops;
+			if (!de->proc_ops->proc_compat_ioctl) {
+				inode->i_fop = &proc_reg_file_ops_no_compat;
 			}
-		}
+#endif
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
+			inode->i_fop = de->proc_dir_ops;
+		} else if (S_ISLNK(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
+			inode->i_fop = NULL;
+		} else
+			BUG();
 	} else
 	       pde_put(de);
 	return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0f3b557c9b77..41587276798e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -39,7 +39,10 @@ struct proc_dir_entry {
 	spinlock_t pde_unload_lock;
 	struct completion *pde_unload_completion;
 	const struct inode_operations *proc_iops;
-	const struct file_operations *proc_fops;
+	union {
+		const struct proc_ops *proc_ops;
+		const struct file_operations *proc_dir_ops;
+	};
 	const struct dentry_operations *proc_dops;
 	union {
 		const struct seq_operations *seq_ops;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e2ed8e08cc7a..8ba492d44e68 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -574,11 +574,11 @@ static int release_kcore(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations proc_kcore_operations = {
-	.read		= read_kcore,
-	.open		= open_kcore,
-	.release	= release_kcore,
-	.llseek		= default_llseek,
+static const struct proc_ops kcore_proc_ops = {
+	.proc_read	= read_kcore,
+	.proc_open	= open_kcore,
+	.proc_release	= release_kcore,
+	.proc_lseek	= default_llseek,
 };
 
 /* just remember that we have to update kcore */
@@ -637,8 +637,7 @@ static void __init add_modules_range(void)
 
 static int __init proc_kcore_init(void)
 {
-	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
-				      &proc_kcore_operations);
+	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops);
 	if (!proc_root_kcore) {
 		pr_err("couldn't create /proc/kcore\n");
 		return 0; /* Always returns 0. */
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 4f4a2abb225e..ec1b7d2fb773 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -49,17 +49,17 @@ static __poll_t kmsg_poll(struct file *file, poll_table *wait)
 }
 
 
-static const struct file_operations proc_kmsg_operations = {
-	.read		= kmsg_read,
-	.poll		= kmsg_poll,
-	.open		= kmsg_open,
-	.release	= kmsg_release,
-	.llseek		= generic_file_llseek,
+static const struct proc_ops kmsg_proc_ops = {
+	.proc_read	= kmsg_read,
+	.proc_poll	= kmsg_poll,
+	.proc_open	= kmsg_open,
+	.proc_release	= kmsg_release,
+	.proc_lseek	= generic_file_llseek,
 };
 
 static int __init proc_kmsg_init(void)
 {
-	proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
+	proc_create("kmsg", S_IRUSR, NULL, &kmsg_proc_ops);
 	return 0;
 }
 fs_initcall(proc_kmsg_init);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7c952ee732e6..f909243d4a66 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -21,6 +21,21 @@
 #define KPMMASK (KPMSIZE - 1)
 #define KPMBITS (KPMSIZE * BITS_PER_BYTE)
 
+static inline unsigned long get_max_dump_pfn(void)
+{
+#ifdef CONFIG_SPARSEMEM
+	/*
+	 * The memmap of early sections is completely populated and marked
+	 * online even if max_pfn does not fall on a section boundary -
+	 * pfn_to_online_page() will succeed on all pages. Allow inspecting
+	 * these memmaps.
+	 */
+	return round_up(max_pfn, PAGES_PER_SECTION);
+#else
+	return max_pfn;
+#endif
+}
+
 /* /proc/kpagecount - an array exposing page counts
  *
  * Each entry is a u64 representing the corresponding
@@ -29,6 +44,7 @@
 static ssize_t kpagecount_read(struct file *file, char __user *buf,
 			     size_t count, loff_t *ppos)
 {
+	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
 	struct page *ppage;
 	unsigned long src = *ppos;
@@ -37,9 +53,11 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
 	u64 pcount;
 
 	pfn = src / KPMSIZE;
-	count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
 	if (src & KPMMASK || count & KPMMASK)
 		return -EINVAL;
+	if (src >= max_dump_pfn * KPMSIZE)
+		return 0;
+	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
 		/*
@@ -71,9 +89,9 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
 	return ret;
 }
 
-static const struct file_operations proc_kpagecount_operations = {
-	.llseek = mem_lseek,
-	.read = kpagecount_read,
+static const struct proc_ops kpagecount_proc_ops = {
+	.proc_lseek	= mem_lseek,
+	.proc_read	= kpagecount_read,
 };
 
 /* /proc/kpageflags - an array exposing page flags
@@ -206,6 +224,7 @@ u64 stable_page_flags(struct page *page)
 static ssize_t kpageflags_read(struct file *file, char __user *buf,
 			     size_t count, loff_t *ppos)
 {
+	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
 	struct page *ppage;
 	unsigned long src = *ppos;
@@ -213,9 +232,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
 	ssize_t ret = 0;
 
 	pfn = src / KPMSIZE;
-	count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
 	if (src & KPMMASK || count & KPMMASK)
 		return -EINVAL;
+	if (src >= max_dump_pfn * KPMSIZE)
+		return 0;
+	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
 		/*
@@ -242,15 +263,16 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
 	return ret;
 }
 
-static const struct file_operations proc_kpageflags_operations = {
-	.llseek = mem_lseek,
-	.read = kpageflags_read,
+static const struct proc_ops kpageflags_proc_ops = {
+	.proc_lseek	= mem_lseek,
+	.proc_read	= kpageflags_read,
 };
 
 #ifdef CONFIG_MEMCG
 static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
+	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
 	struct page *ppage;
 	unsigned long src = *ppos;
@@ -259,9 +281,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
 	u64 ino;
 
 	pfn = src / KPMSIZE;
-	count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
 	if (src & KPMMASK || count & KPMMASK)
 		return -EINVAL;
+	if (src >= max_dump_pfn * KPMSIZE)
+		return 0;
+	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
 		/*
@@ -293,18 +317,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
 	return ret;
 }
 
-static const struct file_operations proc_kpagecgroup_operations = {
-	.llseek = mem_lseek,
-	.read = kpagecgroup_read,
+static const struct proc_ops kpagecgroup_proc_ops = {
+	.proc_lseek	= mem_lseek,
+	.proc_read	= kpagecgroup_read,
 };
 #endif /* CONFIG_MEMCG */
 
 static int __init proc_page_init(void)
 {
-	proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
-	proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
+	proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
+	proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
 #ifdef CONFIG_MEMCG
-	proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations);
+	proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
 #endif
 	return 0;
 }
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 76ae278df1c4..4888c5224442 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -90,12 +90,12 @@ static int seq_release_net(struct inode *ino, struct file *f)
 	return 0;
 }
 
-static const struct file_operations proc_net_seq_fops = {
-	.open		= seq_open_net,
-	.read		= seq_read,
-	.write		= proc_simple_write,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
+static const struct proc_ops proc_net_seq_ops = {
+	.proc_open	= seq_open_net,
+	.proc_read	= seq_read,
+	.proc_write	= proc_simple_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release_net,
 };
 
 struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
@@ -108,7 +108,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
 	if (!p)
 		return NULL;
 	pde_force_lookup(p);
-	p->proc_fops = &proc_net_seq_fops;
+	p->proc_ops = &proc_net_seq_ops;
 	p->seq_ops = ops;
 	p->state_size = state_size;
 	return proc_register(parent, p);
@@ -152,7 +152,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
 	if (!p)
 		return NULL;
 	pde_force_lookup(p);
-	p->proc_fops = &proc_net_seq_fops;
+	p->proc_ops = &proc_net_seq_ops;
 	p->seq_ops = ops;
 	p->state_size = state_size;
 	p->write = write;
@@ -183,12 +183,12 @@ static int single_release_net(struct inode *ino, struct file *f)
 	return single_release(ino, f);
 }
 
-static const struct file_operations proc_net_single_fops = {
-	.open		= single_open_net,
-	.read		= seq_read,
-	.write		= proc_simple_write,
-	.llseek		= seq_lseek,
-	.release	= single_release_net,
+static const struct proc_ops proc_net_single_ops = {
+	.proc_open	= single_open_net,
+	.proc_read	= seq_read,
+	.proc_write	= proc_simple_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release_net,
 };
 
 struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
@@ -201,7 +201,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
 	if (!p)
 		return NULL;
 	pde_force_lookup(p);
-	p->proc_fops = &proc_net_single_fops;
+	p->proc_ops = &proc_net_single_ops;
 	p->single_show = show;
 	return proc_register(parent, p);
 }
@@ -244,7 +244,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
 	if (!p)
 		return NULL;
 	pde_force_lookup(p);
-	p->proc_fops = &proc_net_single_fops;
+	p->proc_ops = &proc_net_single_ops;
 	p->single_show = show;
 	p->write = write;
 	return proc_register(parent, p);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d80989b6c344..c75bb4632ed1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1720,7 +1720,7 @@ int __init proc_sys_init(void)
 
 	proc_sys_root = proc_mkdir("sys", NULL);
 	proc_sys_root->proc_iops = &proc_sys_dir_operations;
-	proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
+	proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
 	proc_sys_root->nlink = 0;
 
 	return sysctl_init();
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 0b7c8dffc9ae..608233dfd29c 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -41,24 +41,19 @@ enum proc_param {
 	Opt_hidepid,
 };
 
-static const struct fs_parameter_spec proc_param_specs[] = {
+static const struct fs_parameter_spec proc_fs_parameters[] = {
 	fsparam_u32("gid",	Opt_gid),
 	fsparam_u32("hidepid",	Opt_hidepid),
 	{}
 };
 
-static const struct fs_parameter_description proc_fs_parameters = {
-	.name		= "proc",
-	.specs		= proc_param_specs,
-};
-
 static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct proc_fs_context *ctx = fc->fs_private;
 	struct fs_parse_result result;
 	int opt;
 
-	opt = fs_parse(fc, &proc_fs_parameters, param, &result);
+	opt = fs_parse(fc, proc_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -71,7 +66,7 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		ctx->hidepid = result.uint_32;
 		if (ctx->hidepid < HIDEPID_OFF ||
 		    ctx->hidepid > HIDEPID_INVISIBLE)
-			return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
+			return invalfc(fc, "hidepid value must be between 0 and 2.\n");
 		break;
 
 	default:
@@ -207,7 +202,7 @@ static void proc_kill_sb(struct super_block *sb)
 static struct file_system_type proc_fs_type = {
 	.name			= "proc",
 	.init_fs_context	= proc_init_fs_context,
-	.parameters		= &proc_fs_parameters,
+	.parameters		= proc_fs_parameters,
 	.kill_sb		= proc_kill_sb,
 	.fs_flags		= FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
 };
@@ -292,7 +287,7 @@ struct proc_dir_entry proc_root = {
 	.nlink		= 2, 
 	.refcnt		= REFCOUNT_INIT(1),
 	.proc_iops	= &proc_root_inode_operations, 
-	.proc_fops	= &proc_root_operations,
+	.proc_dir_ops	= &proc_root_operations,
 	.parent		= &proc_root,
 	.subdir		= RB_ROOT,
 	.name		= "/proc",
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index fd931d3e77be..0449edf460f5 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -223,16 +223,16 @@ static int stat_open(struct inode *inode, struct file *file)
 	return single_open_size(file, show_stat, NULL, size);
 }
 
-static const struct file_operations proc_stat_operations = {
-	.open		= stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+static const struct proc_ops stat_proc_ops = {
+	.proc_open	= stat_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
 };
 
 static int __init proc_stat_init(void)
 {
-	proc_create("stat", 0, NULL, &proc_stat_operations);
+	proc_create("stat", 0, NULL, &stat_proc_ops);
 	return 0;
 }
 fs_initcall(proc_stat_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9442631fd4af..3ba9ae83bff5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -505,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 
 #ifdef CONFIG_SHMEM
 static int smaps_pte_hole(unsigned long addr, unsigned long end,
-		struct mm_walk *walk)
+			  __always_unused int depth, struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 
@@ -1282,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
 }
 
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-				struct mm_walk *walk)
+			    __always_unused int depth, struct mm_walk *walk)
 {
 	struct pagemapread *pm = walk->private;
 	unsigned long addr = start;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7b13988796e1..7dc800cce354 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -667,10 +667,10 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 }
 #endif
 
-static const struct file_operations proc_vmcore_operations = {
-	.read		= read_vmcore,
-	.llseek		= default_llseek,
-	.mmap		= mmap_vmcore,
+static const struct proc_ops vmcore_proc_ops = {
+	.proc_read	= read_vmcore,
+	.proc_lseek	= default_llseek,
+	.proc_mmap	= mmap_vmcore,
 };
 
 static struct vmcore* __init get_new_element(void)
@@ -1555,7 +1555,7 @@ static int __init vmcore_init(void)
 	elfcorehdr_free(elfcorehdr_addr);
 	elfcorehdr_addr = ELFCORE_ADDR_ERR;
 
-	proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
+	proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &vmcore_proc_ops);
 	if (proc_vmcore)
 		proc_vmcore->size = vmcore_size;
 	return 0;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index d82636e8eb65..ee179a81b3da 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -181,23 +181,18 @@ enum ramfs_param {
 	Opt_mode,
 };
 
-static const struct fs_parameter_spec ramfs_param_specs[] = {
+const struct fs_parameter_spec ramfs_fs_parameters[] = {
 	fsparam_u32oct("mode",	Opt_mode),
 	{}
 };
 
-const struct fs_parameter_description ramfs_fs_parameters = {
-	.name		= "ramfs",
-	.specs		= ramfs_param_specs,
-};
-
 static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct fs_parse_result result;
 	struct ramfs_fs_info *fsi = fc->s_fs_info;
 	int opt;
 
-	opt = fs_parse(fc, &ramfs_fs_parameters, param, &result);
+	opt = fs_parse(fc, ramfs_fs_parameters, param, &result);
 	if (opt < 0) {
 		/*
 		 * We might like to report bad mount options here;
@@ -278,7 +273,7 @@ static void ramfs_kill_sb(struct super_block *sb)
 static struct file_system_type ramfs_fs_type = {
 	.name		= "ramfs",
 	.init_fs_context = ramfs_init_fs_context,
-	.parameters	= &ramfs_fs_parameters,
+	.parameters	= ramfs_fs_parameters,
 	.kill_sb	= ramfs_kill_sb,
 	.fs_flags	= FS_USERNS_MOUNT,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 7458fccc59e1..59d819c5b92e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -939,6 +939,34 @@ out:
 	return ret;
 }
 
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+			   struct iov_iter *iter)
+{
+	size_t tot_len;
+	ssize_t ret = 0;
+
+	if (!file->f_op->read_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_READ))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		goto out;
+	ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_read_iter(file, iocb, iter);
+out:
+	if (ret >= 0)
+		fsnotify_access(file);
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_read);
+
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags)
 {
@@ -975,6 +1003,34 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
 	return ret;
 }
 
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+			    struct iov_iter *iter)
+{
+	size_t tot_len;
+	ssize_t ret = 0;
+
+	if (!file->f_op->write_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_WRITE))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		return 0;
+	ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_write_iter(file, iocb, iter);
+	if (ret > 0)
+		fsnotify_modify(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_write);
+
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags)
 {
diff --git a/fs/splice.c b/fs/splice.c
index 3009652a41c8..d671936d0aad 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
 static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
-	if (waitqueue_active(&pipe->wait))
-		wake_up_interruptible(&pipe->wait);
+	if (waitqueue_active(&pipe->rd_wait))
+		wake_up_interruptible(&pipe->rd_wait);
 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 }
 
@@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
-	if (waitqueue_active(&pipe->wait))
-		wake_up_interruptible(&pipe->wait);
+	if (waitqueue_active(&pipe->wr_wait))
+		wake_up_interruptible(&pipe->wr_wait);
 	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 }
 
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index d41c21fef138..c4ab045926b7 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -449,7 +449,7 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
 	}
 
 	link = kernfs_create_link(kobj->sd, target_name, entry);
-	if (IS_ERR(link) && PTR_ERR(link) == -EEXIST)
+	if (PTR_ERR(link) == -EEXIST)
 		sysfs_warn_dup(kobj->sd, target_name);
 
 	kernfs_put(entry);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 0caa151cae4e..0ee8c6dfb036 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -330,7 +330,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 		parent = tracefs_mount->mnt_root;
 
 	inode_lock(parent->d_inode);
-	dentry = lookup_one_len(name, parent, strlen(name));
+	if (unlikely(IS_DEADDIR(parent->d_inode)))
+		dentry = ERR_PTR(-ENOENT);
+	else
+		dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry) && dentry->d_inode) {
 		dput(dentry);
 		dentry = ERR_PTR(-EEXIST);
@@ -499,122 +502,27 @@ __init struct dentry *tracefs_create_instance_dir(const char *name,
 	return dentry;
 }
 
-static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+static void remove_one(struct dentry *victim)
 {
-	int ret = 0;
-
-	if (simple_positive(dentry)) {
-		if (dentry->d_inode) {
-			dget(dentry);
-			switch (dentry->d_inode->i_mode & S_IFMT) {
-			case S_IFDIR:
-				ret = simple_rmdir(parent->d_inode, dentry);
-				if (!ret)
-					fsnotify_rmdir(parent->d_inode, dentry);
-				break;
-			default:
-				simple_unlink(parent->d_inode, dentry);
-				fsnotify_unlink(parent->d_inode, dentry);
-				break;
-			}
-			if (!ret)
-				d_delete(dentry);
-			dput(dentry);
-		}
-	}
-	return ret;
-}
-
-/**
- * tracefs_remove - removes a file or directory from the tracefs filesystem
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.
- *
- * This function removes a file or directory in tracefs that was previously
- * created with a call to another tracefs function (like
- * tracefs_create_file() or variants thereof.)
- */
-void tracefs_remove(struct dentry *dentry)
-{
-	struct dentry *parent;
-	int ret;
-
-	if (IS_ERR_OR_NULL(dentry))
-		return;
-
-	parent = dentry->d_parent;
-	inode_lock(parent->d_inode);
-	ret = __tracefs_remove(dentry, parent);
-	inode_unlock(parent->d_inode);
-	if (!ret)
-		simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+	simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
 
 /**
- * tracefs_remove_recursive - recursively removes a directory
+ * tracefs_remove - recursively removes a directory
  * @dentry: a pointer to a the dentry of the directory to be removed.
  *
  * This function recursively removes a directory tree in tracefs that
  * was previously created with a call to another tracefs function
  * (like tracefs_create_file() or variants thereof.)
  */
-void tracefs_remove_recursive(struct dentry *dentry)
+void tracefs_remove(struct dentry *dentry)
 {
-	struct dentry *child, *parent;
-
 	if (IS_ERR_OR_NULL(dentry))
 		return;
 
-	parent = dentry;
- down:
-	inode_lock(parent->d_inode);
- loop:
-	/*
-	 * The parent->d_subdirs is protected by the d_lock. Outside that
-	 * lock, the child can be unlinked and set to be freed which can
-	 * use the d_u.d_child as the rcu head and corrupt this list.
-	 */
-	spin_lock(&parent->d_lock);
-	list_for_each_entry(child, &parent->d_subdirs, d_child) {
-		if (!simple_positive(child))
-			continue;
-
-		/* perhaps simple_empty(child) makes more sense */
-		if (!list_empty(&child->d_subdirs)) {
-			spin_unlock(&parent->d_lock);
-			inode_unlock(parent->d_inode);
-			parent = child;
-			goto down;
-		}
-
-		spin_unlock(&parent->d_lock);
-
-		if (!__tracefs_remove(child, parent))
-			simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-
-		/*
-		 * The parent->d_lock protects agaist child from unlinking
-		 * from d_subdirs. When releasing the parent->d_lock we can
-		 * no longer trust that the next pointer is valid.
-		 * Restart the loop. We'll skip this one with the
-		 * simple_positive() check.
-		 */
-		goto loop;
-	}
-	spin_unlock(&parent->d_lock);
-
-	inode_unlock(parent->d_inode);
-	child = parent;
-	parent = parent->d_parent;
-	inode_lock(parent->d_inode);
-
-	if (child != dentry)
-		/* go up */
-		goto loop;
-
-	if (!__tracefs_remove(child, parent))
-		simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-	inode_unlock(parent->d_inode);
+	simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count);
+	simple_recursive_removal(dentry, remove_one);
+	simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
 
 /**
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index bc4dec5b1633..743928efffc1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1080,18 +1080,12 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
 		inode->i_uid = attr->ia_uid;
 	if (attr->ia_valid & ATTR_GID)
 		inode->i_gid = attr->ia_gid;
-	if (attr->ia_valid & ATTR_ATIME) {
-		inode->i_atime = timestamp_truncate(attr->ia_atime,
-						  inode);
-	}
-	if (attr->ia_valid & ATTR_MTIME) {
-		inode->i_mtime = timestamp_truncate(attr->ia_mtime,
-						  inode);
-	}
-	if (attr->ia_valid & ATTR_CTIME) {
-		inode->i_ctime = timestamp_truncate(attr->ia_ctime,
-						  inode);
-	}
+	if (attr->ia_valid & ATTR_ATIME)
+		inode->i_atime = attr->ia_atime;
+	if (attr->ia_valid & ATTR_MTIME)
+		inode->i_mtime = attr->ia_mtime;
+	if (attr->ia_valid & ATTR_CTIME)
+		inode->i_ctime = attr->ia_ctime;
 	if (attr->ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 17c90dff7266..4b4b65b48c57 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -84,7 +84,6 @@ static int create_default_filesystem(struct ubifs_info *c)
 	int idx_node_size;
 	long long tmp64, main_bytes;
 	__le64 tmp_le64;
-	__le32 tmp_le32;
 	struct timespec64 ts;
 	u8 hash[UBIFS_HASH_ARR_SZ];
 	u8 hash_lpt[UBIFS_HASH_ARR_SZ];
@@ -291,16 +290,14 @@ static int create_default_filesystem(struct ubifs_info *c)
 	ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
 	ino->nlink = cpu_to_le32(2);
 
-	ktime_get_real_ts64(&ts);
-	ts = timespec64_trunc(ts, DEFAULT_TIME_GRAN);
+	ktime_get_coarse_real_ts64(&ts);
 	tmp_le64 = cpu_to_le64(ts.tv_sec);
 	ino->atime_sec   = tmp_le64;
 	ino->ctime_sec   = tmp_le64;
 	ino->mtime_sec   = tmp_le64;
-	tmp_le32 = cpu_to_le32(ts.tv_nsec);
-	ino->atime_nsec  = tmp_le32;
-	ino->ctime_nsec  = tmp_le32;
-	ino->mtime_nsec  = tmp_le32;
+	ino->atime_nsec  = 0;
+	ino->ctime_nsec  = 0;
+	ino->mtime_nsec  = 0;
 	ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);
 	ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
 
diff --git a/fs/utimes.c b/fs/utimes.c
index c952b6b3d8a0..1d17ce98cb80 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -36,14 +36,14 @@ static int utimes_common(const struct path *path, struct timespec64 *times)
 		if (times[0].tv_nsec == UTIME_OMIT)
 			newattrs.ia_valid &= ~ATTR_ATIME;
 		else if (times[0].tv_nsec != UTIME_NOW) {
-			newattrs.ia_atime = timestamp_truncate(times[0], inode);
+			newattrs.ia_atime = times[0];
 			newattrs.ia_valid |= ATTR_ATIME_SET;
 		}
 
 		if (times[1].tv_nsec == UTIME_OMIT)
 			newattrs.ia_valid &= ~ATTR_MTIME;
 		else if (times[1].tv_nsec != UTIME_NOW) {
-			newattrs.ia_mtime = timestamp_truncate(times[1], inode);
+			newattrs.ia_mtime = times[1];
 			newattrs.ia_valid |= ATTR_MTIME_SET;
 		}
 		/*
diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig
new file mode 100644
index 000000000000..b84586ae08b3
--- /dev/null
+++ b/fs/vboxsf/Kconfig
@@ -0,0 +1,10 @@
+config VBOXSF_FS
+	tristate "VirtualBox guest shared folder (vboxsf) support"
+	depends on X86 && VBOXGUEST
+	select NLS
+	help
+	  VirtualBox hosts can share folders with guests, this driver
+	  implements the Linux-guest side of this allowing folders exported
+	  by the host to be mounted under Linux.
+
+	  If you want to use shared folders in VirtualBox guests, answer Y or M.
diff --git a/fs/vboxsf/Makefile b/fs/vboxsf/Makefile
new file mode 100644
index 000000000000..9e4328e79623
--- /dev/null
+++ b/fs/vboxsf/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+
+obj-$(CONFIG_VBOXSF_FS) += vboxsf.o
+
+vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
new file mode 100644
index 000000000000..dd147b490982
--- /dev/null
+++ b/fs/vboxsf/dir.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Directory inode and file operations
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/namei.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+static int vboxsf_dir_open(struct inode *inode, struct file *file)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+	struct shfl_createparms params = {};
+	struct vboxsf_dir_info *sf_d;
+	int err;
+
+	sf_d = vboxsf_dir_info_alloc();
+	if (!sf_d)
+		return -ENOMEM;
+
+	params.handle = SHFL_HANDLE_NIL;
+	params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS |
+			      SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ;
+
+	err = vboxsf_create_at_dentry(file_dentry(file), &params);
+	if (err)
+		goto err_free_dir_info;
+
+	if (params.result != SHFL_FILE_EXISTS) {
+		err = -ENOENT;
+		goto err_close;
+	}
+
+	err = vboxsf_dir_read_all(sbi, sf_d, params.handle);
+	if (err)
+		goto err_close;
+
+	vboxsf_close(sbi->root, params.handle);
+	file->private_data = sf_d;
+	return 0;
+
+err_close:
+	vboxsf_close(sbi->root, params.handle);
+err_free_dir_info:
+	vboxsf_dir_info_free(sf_d);
+	return err;
+}
+
+static int vboxsf_dir_release(struct inode *inode, struct file *file)
+{
+	if (file->private_data)
+		vboxsf_dir_info_free(file->private_data);
+
+	return 0;
+}
+
+static unsigned int vboxsf_get_d_type(u32 mode)
+{
+	unsigned int d_type;
+
+	switch (mode & SHFL_TYPE_MASK) {
+	case SHFL_TYPE_FIFO:
+		d_type = DT_FIFO;
+		break;
+	case SHFL_TYPE_DEV_CHAR:
+		d_type = DT_CHR;
+		break;
+	case SHFL_TYPE_DIRECTORY:
+		d_type = DT_DIR;
+		break;
+	case SHFL_TYPE_DEV_BLOCK:
+		d_type = DT_BLK;
+		break;
+	case SHFL_TYPE_FILE:
+		d_type = DT_REG;
+		break;
+	case SHFL_TYPE_SYMLINK:
+		d_type = DT_LNK;
+		break;
+	case SHFL_TYPE_SOCKET:
+		d_type = DT_SOCK;
+		break;
+	case SHFL_TYPE_WHITEOUT:
+		d_type = DT_WHT;
+		break;
+	default:
+		d_type = DT_UNKNOWN;
+		break;
+	}
+	return d_type;
+}
+
+static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb);
+	struct vboxsf_dir_info *sf_d = dir->private_data;
+	struct shfl_dirinfo *info;
+	struct vboxsf_dir_buf *b;
+	unsigned int d_type;
+	loff_t i, cur = 0;
+	ino_t fake_ino;
+	void *end;
+	int err;
+
+	list_for_each_entry(b, &sf_d->info_list, head) {
+try_next_entry:
+		if (ctx->pos >= cur + b->entries) {
+			cur += b->entries;
+			continue;
+		}
+
+		/*
+		 * Note the vboxsf_dir_info objects we are iterating over here
+		 * are variable sized, so the info pointer may end up being
+		 * unaligned. This is how we get the data from the host.
+		 * Since vboxsf is only supported on x86 machines this is not
+		 * a problem.
+		 */
+		for (i = 0, info = b->buf; i < ctx->pos - cur; i++) {
+			end = &info->name.string.utf8[info->name.size];
+			/* Only happens if the host gives us corrupt data */
+			if (WARN_ON(end > (b->buf + b->used)))
+				return false;
+			info = end;
+		}
+
+		end = &info->name.string.utf8[info->name.size];
+		if (WARN_ON(end > (b->buf + b->used)))
+			return false;
+
+		/* Info now points to the right entry, emit it. */
+		d_type = vboxsf_get_d_type(info->info.attr.mode);
+
+		/*
+		 * On 32 bit systems pos is 64 signed, while ino is 32 bit
+		 * unsigned so fake_ino may overflow, check for this.
+		 */
+		if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) {
+			vbg_err("vboxsf: fake ino overflow, truncating dir\n");
+			return false;
+		}
+		fake_ino = ctx->pos + 1;
+
+		if (sbi->nls) {
+			char d_name[NAME_MAX];
+
+			err = vboxsf_nlscpy(sbi, d_name, NAME_MAX,
+					    info->name.string.utf8,
+					    info->name.length);
+			if (err) {
+				/* skip erroneous entry and proceed */
+				ctx->pos += 1;
+				goto try_next_entry;
+			}
+
+			return dir_emit(ctx, d_name, strlen(d_name),
+					fake_ino, d_type);
+		}
+
+		return dir_emit(ctx, info->name.string.utf8, info->name.length,
+				fake_ino, d_type);
+	}
+
+	return false;
+}
+
+static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
+{
+	bool emitted;
+
+	do {
+		emitted = vboxsf_dir_emit(dir, ctx);
+		if (emitted)
+			ctx->pos += 1;
+	} while (emitted);
+
+	return 0;
+}
+
+const struct file_operations vboxsf_dir_fops = {
+	.open = vboxsf_dir_open,
+	.iterate = vboxsf_dir_iterate,
+	.release = vboxsf_dir_release,
+	.read = generic_read_dir,
+	.llseek = generic_file_llseek,
+};
+
+/*
+ * This is called during name resolution/lookup to check if the @dentry in
+ * the cache is still valid. the job is handled by vboxsf_inode_revalidate.
+ */
+static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	if (d_really_is_positive(dentry))
+		return vboxsf_inode_revalidate(dentry) == 0;
+	else
+		return vboxsf_stat_dentry(dentry, NULL) == -ENOENT;
+}
+
+const struct dentry_operations vboxsf_dentry_ops = {
+	.d_revalidate = vboxsf_dentry_revalidate
+};
+
+/* iops */
+
+static struct dentry *vboxsf_dir_lookup(struct inode *parent,
+					struct dentry *dentry,
+					unsigned int flags)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+	struct shfl_fsobjinfo fsinfo;
+	struct inode *inode;
+	int err;
+
+	dentry->d_time = jiffies;
+
+	err = vboxsf_stat_dentry(dentry, &fsinfo);
+	if (err) {
+		inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
+	} else {
+		inode = vboxsf_new_inode(parent->i_sb);
+		if (!IS_ERR(inode))
+			vboxsf_init_inode(sbi, inode, &fsinfo);
+	}
+
+	return d_splice_alias(inode, dentry);
+}
+
+static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry,
+				  struct shfl_fsobjinfo *info)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+	struct vboxsf_inode *sf_i;
+	struct inode *inode;
+
+	inode = vboxsf_new_inode(parent->i_sb);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	sf_i = VBOXSF_I(inode);
+	/* The host may have given us different attr then requested */
+	sf_i->force_restat = 1;
+	vboxsf_init_inode(sbi, inode, info);
+
+	d_instantiate(dentry, inode);
+
+	return 0;
+}
+
+static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry,
+			     umode_t mode, int is_dir)
+{
+	struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+	struct shfl_createparms params = {};
+	int err;
+
+	params.handle = SHFL_HANDLE_NIL;
+	params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW |
+			      SHFL_CF_ACT_FAIL_IF_EXISTS |
+			      SHFL_CF_ACCESS_READWRITE |
+			      (is_dir ? SHFL_CF_DIRECTORY : 0);
+	params.info.attr.mode = (mode & 0777) |
+				(is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE);
+	params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING;
+
+	err = vboxsf_create_at_dentry(dentry, &params);
+	if (err)
+		return err;
+
+	if (params.result != SHFL_FILE_CREATED)
+		return -EPERM;
+
+	vboxsf_close(sbi->root, params.handle);
+
+	err = vboxsf_dir_instantiate(parent, dentry, &params.info);
+	if (err)
+		return err;
+
+	/* parent directory access/change time changed */
+	sf_parent_i->force_restat = 1;
+
+	return 0;
+}
+
+static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry,
+			     umode_t mode, bool excl)
+{
+	return vboxsf_dir_create(parent, dentry, mode, 0);
+}
+
+static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry,
+			    umode_t mode)
+{
+	return vboxsf_dir_create(parent, dentry, mode, 1);
+}
+
+static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+	struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+	struct inode *inode = d_inode(dentry);
+	struct shfl_string *path;
+	u32 flags;
+	int err;
+
+	if (S_ISDIR(inode->i_mode))
+		flags = SHFL_REMOVE_DIR;
+	else
+		flags = SHFL_REMOVE_FILE;
+
+	if (S_ISLNK(inode->i_mode))
+		flags |= SHFL_REMOVE_SYMLINK;
+
+	path = vboxsf_path_from_dentry(sbi, dentry);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	err = vboxsf_remove(sbi->root, path, flags);
+	__putname(path);
+	if (err)
+		return err;
+
+	/* parent directory access/change time changed */
+	sf_parent_i->force_restat = 1;
+
+	return 0;
+}
+
+static int vboxsf_dir_rename(struct inode *old_parent,
+			     struct dentry *old_dentry,
+			     struct inode *new_parent,
+			     struct dentry *new_dentry,
+			     unsigned int flags)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb);
+	struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent);
+	struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent);
+	u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS;
+	struct shfl_string *old_path, *new_path;
+	int err;
+
+	if (flags)
+		return -EINVAL;
+
+	old_path = vboxsf_path_from_dentry(sbi, old_dentry);
+	if (IS_ERR(old_path))
+		return PTR_ERR(old_path);
+
+	new_path = vboxsf_path_from_dentry(sbi, new_dentry);
+	if (IS_ERR(new_path)) {
+		err = PTR_ERR(new_path);
+		goto err_put_old_path;
+	}
+
+	if (d_inode(old_dentry)->i_mode & S_IFDIR)
+		shfl_flags = 0;
+
+	err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags);
+	if (err == 0) {
+		/* parent directories access/change time changed */
+		sf_new_parent_i->force_restat = 1;
+		sf_old_parent_i->force_restat = 1;
+	}
+
+	__putname(new_path);
+err_put_old_path:
+	__putname(old_path);
+	return err;
+}
+
+static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry,
+			      const char *symname)
+{
+	struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+	int symname_size = strlen(symname) + 1;
+	struct shfl_string *path, *ssymname;
+	struct shfl_fsobjinfo info;
+	int err;
+
+	path = vboxsf_path_from_dentry(sbi, dentry);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL);
+	if (!ssymname) {
+		__putname(path);
+		return -ENOMEM;
+	}
+	ssymname->length = symname_size - 1;
+	ssymname->size = symname_size;
+	memcpy(ssymname->string.utf8, symname, symname_size);
+
+	err = vboxsf_symlink(sbi->root, path, ssymname, &info);
+	kfree(ssymname);
+	__putname(path);
+	if (err) {
+		/* -EROFS means symlinks are note support -> -EPERM */
+		return (err == -EROFS) ? -EPERM : err;
+	}
+
+	err = vboxsf_dir_instantiate(parent, dentry, &info);
+	if (err)
+		return err;
+
+	/* parent directory access/change time changed */
+	sf_parent_i->force_restat = 1;
+	return 0;
+}
+
+const struct inode_operations vboxsf_dir_iops = {
+	.lookup  = vboxsf_dir_lookup,
+	.create  = vboxsf_dir_mkfile,
+	.mkdir   = vboxsf_dir_mkdir,
+	.rmdir   = vboxsf_dir_unlink,
+	.unlink  = vboxsf_dir_unlink,
+	.rename  = vboxsf_dir_rename,
+	.symlink = vboxsf_dir_symlink,
+	.getattr = vboxsf_getattr,
+	.setattr = vboxsf_setattr,
+};
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
new file mode 100644
index 000000000000..c4ab5996d97a
--- /dev/null
+++ b/fs/vboxsf/file.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/sizes.h>
+#include "vfsmod.h"
+
+struct vboxsf_handle {
+	u64 handle;
+	u32 root;
+	u32 access_flags;
+	struct kref refcount;
+	struct list_head head;
+};
+
+static int vboxsf_file_open(struct inode *inode, struct file *file)
+{
+	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+	struct shfl_createparms params = {};
+	struct vboxsf_handle *sf_handle;
+	u32 access_flags = 0;
+	int err;
+
+	sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
+	if (!sf_handle)
+		return -ENOMEM;
+
+	/*
+	 * We check the value of params.handle afterwards to find out if
+	 * the call succeeded or failed, as the API does not seem to cleanly
+	 * distinguish error and informational messages.
+	 *
+	 * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
+	 * make the shared folders host service use our mode parameter.
+	 */
+	params.handle = SHFL_HANDLE_NIL;
+	if (file->f_flags & O_CREAT) {
+		params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
+		/*
+		 * We ignore O_EXCL, as the Linux kernel seems to call create
+		 * beforehand itself, so O_EXCL should always fail.
+		 */
+		if (file->f_flags & O_TRUNC)
+			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+		else
+			params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
+	} else {
+		params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
+		if (file->f_flags & O_TRUNC)
+			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+	}
+
+	switch (file->f_flags & O_ACCMODE) {
+	case O_RDONLY:
+		access_flags |= SHFL_CF_ACCESS_READ;
+		break;
+
+	case O_WRONLY:
+		access_flags |= SHFL_CF_ACCESS_WRITE;
+		break;
+
+	case O_RDWR:
+		access_flags |= SHFL_CF_ACCESS_READWRITE;
+		break;
+
+	default:
+		WARN_ON(1);
+	}
+
+	if (file->f_flags & O_APPEND)
+		access_flags |= SHFL_CF_ACCESS_APPEND;
+
+	params.create_flags |= access_flags;
+	params.info.attr.mode = inode->i_mode;
+
+	err = vboxsf_create_at_dentry(file_dentry(file), &params);
+	if (err == 0 && params.handle == SHFL_HANDLE_NIL)
+		err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
+	if (err) {
+		kfree(sf_handle);
+		return err;
+	}
+
+	/* the host may have given us different attr then requested */
+	sf_i->force_restat = 1;
+
+	/* init our handle struct and add it to the inode's handles list */
+	sf_handle->handle = params.handle;
+	sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
+	sf_handle->access_flags = access_flags;
+	kref_init(&sf_handle->refcount);
+
+	mutex_lock(&sf_i->handle_list_mutex);
+	list_add(&sf_handle->head, &sf_i->handle_list);
+	mutex_unlock(&sf_i->handle_list_mutex);
+
+	file->private_data = sf_handle;
+	return 0;
+}
+
+static void vboxsf_handle_release(struct kref *refcount)
+{
+	struct vboxsf_handle *sf_handle =
+		container_of(refcount, struct vboxsf_handle, refcount);
+
+	vboxsf_close(sf_handle->root, sf_handle->handle);
+	kfree(sf_handle);
+}
+
+static int vboxsf_file_release(struct inode *inode, struct file *file)
+{
+	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+	struct vboxsf_handle *sf_handle = file->private_data;
+
+	/*
+	 * When a file is closed on our (the guest) side, we want any subsequent
+	 * accesses done on the host side to see all changes done from our side.
+	 */
+	filemap_write_and_wait(inode->i_mapping);
+
+	mutex_lock(&sf_i->handle_list_mutex);
+	list_del(&sf_handle->head);
+	mutex_unlock(&sf_i->handle_list_mutex);
+
+	kref_put(&sf_handle->refcount, vboxsf_handle_release);
+	return 0;
+}
+
+/*
+ * Write back dirty pages now, because there may not be any suitable
+ * open files later
+ */
+static void vboxsf_vma_close(struct vm_area_struct *vma)
+{
+	filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+static const struct vm_operations_struct vboxsf_file_vm_ops = {
+	.close		= vboxsf_vma_close,
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+};
+
+static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err;
+
+	err = generic_file_mmap(file, vma);
+	if (!err)
+		vma->vm_ops = &vboxsf_file_vm_ops;
+
+	return err;
+}
+
+/*
+ * Note that since we are accessing files on the host's filesystem, files
+ * may always be changed underneath us by the host!
+ *
+ * The vboxsf API between the guest and the host does not offer any functions
+ * to deal with this. There is no inode-generation to check for changes, no
+ * events / callback on changes and no way to lock files.
+ *
+ * To avoid returning stale data when a file gets *opened* on our (the guest)
+ * side, we do a "stat" on the host side, then compare the mtime with the
+ * last known mtime and invalidate the page-cache if they differ.
+ * This is done from vboxsf_inode_revalidate().
+ *
+ * When reads are done through the read_iter fop, it is possible to do
+ * further cache revalidation then, there are 3 options to deal with this:
+ *
+ * 1)  Rely solely on the revalidation done at open time
+ * 2)  Do another "stat" and compare mtime again. Unfortunately the vboxsf
+ *     host API does not allow stat on handles, so we would need to use
+ *     file->f_path.dentry and the stat will then fail if the file was unlinked
+ *     or renamed (and there is no thing like NFS' silly-rename). So we get:
+ * 2a) "stat" and compare mtime, on stat failure invalidate the cache
+ * 2b) "stat" and compare mtime, on stat failure do nothing
+ * 3)  Simply always call invalidate_inode_pages2_range on the range of the read
+ *
+ * Currently we are keeping things KISS and using option 1. this allows
+ * directly using generic_file_read_iter without wrapping it.
+ *
+ * This means that only data written on the host side before open() on
+ * the guest side is guaranteed to be seen by the guest. If necessary
+ * we may provide other read-cache strategies in the future and make this
+ * configurable through a mount option.
+ */
+const struct file_operations vboxsf_reg_fops = {
+	.llseek = generic_file_llseek,
+	.read_iter = generic_file_read_iter,
+	.write_iter = generic_file_write_iter,
+	.mmap = vboxsf_file_mmap,
+	.open = vboxsf_file_open,
+	.release = vboxsf_file_release,
+	.fsync = noop_fsync,
+	.splice_read = generic_file_splice_read,
+};
+
+const struct inode_operations vboxsf_reg_iops = {
+	.getattr = vboxsf_getattr,
+	.setattr = vboxsf_setattr
+};
+
+static int vboxsf_readpage(struct file *file, struct page *page)
+{
+	struct vboxsf_handle *sf_handle = file->private_data;
+	loff_t off = page_offset(page);
+	u32 nread = PAGE_SIZE;
+	u8 *buf;
+	int err;
+
+	buf = kmap(page);
+
+	err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
+	if (err == 0) {
+		memset(&buf[nread], 0, PAGE_SIZE - nread);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	} else {
+		SetPageError(page);
+	}
+
+	kunmap(page);
+	unlock_page(page);
+	return err;
+}
+
+static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
+{
+	struct vboxsf_handle *h, *sf_handle = NULL;
+
+	mutex_lock(&sf_i->handle_list_mutex);
+	list_for_each_entry(h, &sf_i->handle_list, head) {
+		if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
+		    h->access_flags == SHFL_CF_ACCESS_READWRITE) {
+			kref_get(&h->refcount);
+			sf_handle = h;
+			break;
+		}
+	}
+	mutex_unlock(&sf_i->handle_list_mutex);
+
+	return sf_handle;
+}
+
+static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+	struct vboxsf_handle *sf_handle;
+	loff_t off = page_offset(page);
+	loff_t size = i_size_read(inode);
+	u32 nwrite = PAGE_SIZE;
+	u8 *buf;
+	int err;
+
+	if (off + PAGE_SIZE > size)
+		nwrite = size & ~PAGE_MASK;
+
+	sf_handle = vboxsf_get_write_handle(sf_i);
+	if (!sf_handle)
+		return -EBADF;
+
+	buf = kmap(page);
+	err = vboxsf_write(sf_handle->root, sf_handle->handle,
+			   off, &nwrite, buf);
+	kunmap(page);
+
+	kref_put(&sf_handle->refcount, vboxsf_handle_release);
+
+	if (err == 0) {
+		ClearPageError(page);
+		/* mtime changed */
+		sf_i->force_restat = 1;
+	} else {
+		ClearPageUptodate(page);
+	}
+
+	unlock_page(page);
+	return err;
+}
+
+static int vboxsf_write_end(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned int len, unsigned int copied,
+			    struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct vboxsf_handle *sf_handle = file->private_data;
+	unsigned int from = pos & ~PAGE_MASK;
+	u32 nwritten = len;
+	u8 *buf;
+	int err;
+
+	/* zero the stale part of the page if we did a short copy */
+	if (!PageUptodate(page) && copied < len)
+		zero_user(page, from + copied, len - copied);
+
+	buf = kmap(page);
+	err = vboxsf_write(sf_handle->root, sf_handle->handle,
+			   pos, &nwritten, buf + from);
+	kunmap(page);
+
+	if (err) {
+		nwritten = 0;
+		goto out;
+	}
+
+	/* mtime changed */
+	VBOXSF_I(inode)->force_restat = 1;
+
+	if (!PageUptodate(page) && nwritten == PAGE_SIZE)
+		SetPageUptodate(page);
+
+	pos += nwritten;
+	if (pos > inode->i_size)
+		i_size_write(inode, pos);
+
+out:
+	unlock_page(page);
+	put_page(page);
+
+	return nwritten;
+}
+
+/*
+ * Note simple_write_begin does not read the page from disk on partial writes
+ * this is ok since vboxsf_write_end only writes the written parts of the
+ * page and it does not call SetPageUptodate for partial writes.
+ */
+const struct address_space_operations vboxsf_reg_aops = {
+	.readpage = vboxsf_readpage,
+	.writepage = vboxsf_writepage,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+	.write_begin = simple_write_begin,
+	.write_end = vboxsf_write_end,
+};
+
+static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
+				   struct delayed_call *done)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+	struct shfl_string *path;
+	char *link;
+	int err;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	path = vboxsf_path_from_dentry(sbi, dentry);
+	if (IS_ERR(path))
+		return ERR_CAST(path);
+
+	link = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!link) {
+		__putname(path);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
+	__putname(path);
+	if (err) {
+		kfree(link);
+		return ERR_PTR(err);
+	}
+
+	set_delayed_call(done, kfree_link, link);
+	return link;
+}
+
+const struct inode_operations vboxsf_lnk_iops = {
+	.get_link = vboxsf_get_link
+};
diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h
new file mode 100644
index 000000000000..aca829062c12
--- /dev/null
+++ b/fs/vboxsf/shfl_hostintf.h
@@ -0,0 +1,901 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * VirtualBox Shared Folders: host interface definition.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#ifndef SHFL_HOSTINTF_H
+#define SHFL_HOSTINTF_H
+
+#include <linux/vbox_vmmdev_types.h>
+
+/* The max in/out buffer size for a FN_READ or FN_WRITE call */
+#define SHFL_MAX_RW_COUNT           (16 * SZ_1M)
+
+/*
+ * Structures shared between guest and the service
+ * can be relocated and use offsets to point to variable
+ * length parts.
+ *
+ * Shared folders protocol works with handles.
+ * Before doing any action on a file system object,
+ * one have to obtain the object handle via a SHFL_FN_CREATE
+ * request. A handle must be closed with SHFL_FN_CLOSE.
+ */
+
+enum {
+	SHFL_FN_QUERY_MAPPINGS = 1,	/* Query mappings changes. */
+	SHFL_FN_QUERY_MAP_NAME = 2,	/* Query map name. */
+	SHFL_FN_CREATE = 3,		/* Open/create object. */
+	SHFL_FN_CLOSE = 4,		/* Close object handle. */
+	SHFL_FN_READ = 5,		/* Read object content. */
+	SHFL_FN_WRITE = 6,		/* Write new object content. */
+	SHFL_FN_LOCK = 7,		/* Lock/unlock a range in the object. */
+	SHFL_FN_LIST = 8,		/* List object content. */
+	SHFL_FN_INFORMATION = 9,	/* Query/set object information. */
+	/* Note function number 10 is not used! */
+	SHFL_FN_REMOVE = 11,		/* Remove object */
+	SHFL_FN_MAP_FOLDER_OLD = 12,	/* Map folder (legacy) */
+	SHFL_FN_UNMAP_FOLDER = 13,	/* Unmap folder */
+	SHFL_FN_RENAME = 14,		/* Rename object */
+	SHFL_FN_FLUSH = 15,		/* Flush file */
+	SHFL_FN_SET_UTF8 = 16,		/* Select UTF8 filename encoding */
+	SHFL_FN_MAP_FOLDER = 17,	/* Map folder */
+	SHFL_FN_READLINK = 18,		/* Read symlink dest (as of VBox 4.0) */
+	SHFL_FN_SYMLINK = 19,		/* Create symlink (as of VBox 4.0) */
+	SHFL_FN_SET_SYMLINKS = 20,	/* Ask host to show symlinks (4.0+) */
+};
+
+/* Root handles for a mapping are of type u32, Root handles are unique. */
+#define SHFL_ROOT_NIL		UINT_MAX
+
+/* Shared folders handle for an opened object are of type u64. */
+#define SHFL_HANDLE_NIL		ULLONG_MAX
+
+/* Hardcoded maximum length (in chars) of a shared folder name. */
+#define SHFL_MAX_LEN         (256)
+/* Hardcoded maximum number of shared folder mapping available to the guest. */
+#define SHFL_MAX_MAPPINGS    (64)
+
+/** Shared folder string buffer structure. */
+struct shfl_string {
+	/** Allocated size of the string member in bytes. */
+	u16 size;
+
+	/** Length of string without trailing nul in bytes. */
+	u16 length;
+
+	/** UTF-8 or UTF-16 string. Nul terminated. */
+	union {
+		u8 utf8[2];
+		u16 utf16[1];
+		u16 ucs2[1]; /* misnomer, use utf16. */
+	} string;
+};
+VMMDEV_ASSERT_SIZE(shfl_string, 6);
+
+/* The size of shfl_string w/o the string part. */
+#define SHFLSTRING_HEADER_SIZE  4
+
+/* Calculate size of the string. */
+static inline u32 shfl_string_buf_size(const struct shfl_string *string)
+{
+	return string ? SHFLSTRING_HEADER_SIZE + string->size : 0;
+}
+
+/* Set user id on execution (S_ISUID). */
+#define SHFL_UNIX_ISUID             0004000U
+/* Set group id on execution (S_ISGID). */
+#define SHFL_UNIX_ISGID             0002000U
+/* Sticky bit (S_ISVTX / S_ISTXT). */
+#define SHFL_UNIX_ISTXT             0001000U
+
+/* Owner readable (S_IRUSR). */
+#define SHFL_UNIX_IRUSR             0000400U
+/* Owner writable (S_IWUSR). */
+#define SHFL_UNIX_IWUSR             0000200U
+/* Owner executable (S_IXUSR). */
+#define SHFL_UNIX_IXUSR             0000100U
+
+/* Group readable (S_IRGRP). */
+#define SHFL_UNIX_IRGRP             0000040U
+/* Group writable (S_IWGRP). */
+#define SHFL_UNIX_IWGRP             0000020U
+/* Group executable (S_IXGRP). */
+#define SHFL_UNIX_IXGRP             0000010U
+
+/* Other readable (S_IROTH). */
+#define SHFL_UNIX_IROTH             0000004U
+/* Other writable (S_IWOTH). */
+#define SHFL_UNIX_IWOTH             0000002U
+/* Other executable (S_IXOTH). */
+#define SHFL_UNIX_IXOTH             0000001U
+
+/* Named pipe (fifo) (S_IFIFO). */
+#define SHFL_TYPE_FIFO              0010000U
+/* Character device (S_IFCHR). */
+#define SHFL_TYPE_DEV_CHAR          0020000U
+/* Directory (S_IFDIR). */
+#define SHFL_TYPE_DIRECTORY         0040000U
+/* Block device (S_IFBLK). */
+#define SHFL_TYPE_DEV_BLOCK         0060000U
+/* Regular file (S_IFREG). */
+#define SHFL_TYPE_FILE              0100000U
+/* Symbolic link (S_IFLNK). */
+#define SHFL_TYPE_SYMLINK           0120000U
+/* Socket (S_IFSOCK). */
+#define SHFL_TYPE_SOCKET            0140000U
+/* Whiteout (S_IFWHT). */
+#define SHFL_TYPE_WHITEOUT          0160000U
+/* Type mask (S_IFMT). */
+#define SHFL_TYPE_MASK              0170000U
+
+/* Checks the mode flags indicate a directory (S_ISDIR). */
+#define SHFL_IS_DIRECTORY(m)   (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY)
+/* Checks the mode flags indicate a symbolic link (S_ISLNK). */
+#define SHFL_IS_SYMLINK(m)     (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK)
+
+/** The available additional information in a shfl_fsobjattr object. */
+enum shfl_fsobjattr_add {
+	/** No additional information is available / requested. */
+	SHFLFSOBJATTRADD_NOTHING = 1,
+	/**
+	 * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are
+	 *  available / requested.
+	 */
+	SHFLFSOBJATTRADD_UNIX,
+	/**
+	 * The additional extended attribute size (shfl_fsobjattr::u::size) is
+	 *  available / requested.
+	 */
+	SHFLFSOBJATTRADD_EASIZE,
+	/**
+	 * The last valid item (inclusive).
+	 * The valid range is SHFLFSOBJATTRADD_NOTHING thru
+	 * SHFLFSOBJATTRADD_LAST.
+	 */
+	SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE,
+
+	/** The usual 32-bit hack. */
+	SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff
+};
+
+/**
+ * Additional unix Attributes, these are available when
+ * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX.
+ */
+struct shfl_fsobjattr_unix {
+	/**
+	 * The user owning the filesystem object (st_uid).
+	 * This field is ~0U if not supported.
+	 */
+	u32 uid;
+
+	/**
+	 * The group the filesystem object is assigned (st_gid).
+	 * This field is ~0U if not supported.
+	 */
+	u32 gid;
+
+	/**
+	 * Number of hard links to this filesystem object (st_nlink).
+	 * This field is 1 if the filesystem doesn't support hardlinking or
+	 * the information isn't available.
+	 */
+	u32 hardlinks;
+
+	/**
+	 * The device number of the device which this filesystem object resides
+	 * on (st_dev). This field is 0 if this information is not available.
+	 */
+	u32 inode_id_device;
+
+	/**
+	 * The unique identifier (within the filesystem) of this filesystem
+	 * object (st_ino). Together with inode_id_device, this field can be
+	 * used as a OS wide unique id, when both their values are not 0.
+	 * This field is 0 if the information is not available.
+	 */
+	u64 inode_id;
+
+	/**
+	 * User flags (st_flags).
+	 * This field is 0 if this information is not available.
+	 */
+	u32 flags;
+
+	/**
+	 * The current generation number (st_gen).
+	 * This field is 0 if this information is not available.
+	 */
+	u32 generation_id;
+
+	/**
+	 * The device number of a char. or block device type object (st_rdev).
+	 * This field is 0 if the file isn't a char. or block device or when
+	 * the OS doesn't use the major+minor device idenfication scheme.
+	 */
+	u32 device;
+} __packed;
+
+/** Extended attribute size. */
+struct shfl_fsobjattr_easize {
+	/** Size of EAs. */
+	s64 cb;
+} __packed;
+
+/** Shared folder filesystem object attributes. */
+struct shfl_fsobjattr {
+	/** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */
+	u32 mode;
+
+	/** The additional attributes available. */
+	enum shfl_fsobjattr_add additional;
+
+	/**
+	 * Additional attributes.
+	 *
+	 * Unless explicitly specified to an API, the API can provide additional
+	 * data as it is provided by the underlying OS.
+	 */
+	union {
+		struct shfl_fsobjattr_unix unix_attr;
+		struct shfl_fsobjattr_easize size;
+	} __packed u;
+} __packed;
+VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44);
+
+struct shfl_timespec {
+	s64 ns_relative_to_unix_epoch;
+};
+
+/** Filesystem object information structure. */
+struct shfl_fsobjinfo {
+	/**
+	 * Logical size (st_size).
+	 * For normal files this is the size of the file.
+	 * For symbolic links, this is the length of the path name contained
+	 * in the symbolic link.
+	 * For other objects this fields needs to be specified.
+	 */
+	s64 size;
+
+	/** Disk allocation size (st_blocks * DEV_BSIZE). */
+	s64 allocated;
+
+	/** Time of last access (st_atime). */
+	struct shfl_timespec access_time;
+
+	/** Time of last data modification (st_mtime). */
+	struct shfl_timespec modification_time;
+
+	/**
+	 * Time of last status change (st_ctime).
+	 * If not available this is set to modification_time.
+	 */
+	struct shfl_timespec change_time;
+
+	/**
+	 * Time of file birth (st_birthtime).
+	 * If not available this is set to change_time.
+	 */
+	struct shfl_timespec birth_time;
+
+	/** Attributes. */
+	struct shfl_fsobjattr attr;
+
+} __packed;
+VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92);
+
+/**
+ * result of an open/create request.
+ * Along with handle value the result code
+ * identifies what has happened while
+ * trying to open the object.
+ */
+enum shfl_create_result {
+	SHFL_NO_RESULT,
+	/** Specified path does not exist. */
+	SHFL_PATH_NOT_FOUND,
+	/** Path to file exists, but the last component does not. */
+	SHFL_FILE_NOT_FOUND,
+	/** File already exists and either has been opened or not. */
+	SHFL_FILE_EXISTS,
+	/** New file was created. */
+	SHFL_FILE_CREATED,
+	/** Existing file was replaced or overwritten. */
+	SHFL_FILE_REPLACED
+};
+
+/* No flags. Initialization value. */
+#define SHFL_CF_NONE                  (0x00000000)
+
+/*
+ * Only lookup the object, do not return a handle. When this is set all other
+ * flags are ignored.
+ */
+#define SHFL_CF_LOOKUP                (0x00000001)
+
+/*
+ * Open parent directory of specified object.
+ * Useful for the corresponding Windows FSD flag
+ * and for opening paths like \\dir\\*.* to search the 'dir'.
+ */
+#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002)
+
+/* Create/open a directory. */
+#define SHFL_CF_DIRECTORY             (0x00000004)
+
+/*
+ *  Open/create action to do if object exists
+ *  and if the object does not exists.
+ *  REPLACE file means atomically DELETE and CREATE.
+ *  OVERWRITE file means truncating the file to 0 and
+ *  setting new size.
+ *  When opening an existing directory REPLACE and OVERWRITE
+ *  actions are considered invalid, and cause returning
+ *  FILE_EXISTS with NIL handle.
+ */
+#define SHFL_CF_ACT_MASK_IF_EXISTS      (0x000000f0)
+#define SHFL_CF_ACT_MASK_IF_NEW         (0x00000f00)
+
+/* What to do if object exists. */
+#define SHFL_CF_ACT_OPEN_IF_EXISTS      (0x00000000)
+#define SHFL_CF_ACT_FAIL_IF_EXISTS      (0x00000010)
+#define SHFL_CF_ACT_REPLACE_IF_EXISTS   (0x00000020)
+#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030)
+
+/* What to do if object does not exist. */
+#define SHFL_CF_ACT_CREATE_IF_NEW       (0x00000000)
+#define SHFL_CF_ACT_FAIL_IF_NEW         (0x00000100)
+
+/* Read/write requested access for the object. */
+#define SHFL_CF_ACCESS_MASK_RW          (0x00003000)
+
+/* No access requested. */
+#define SHFL_CF_ACCESS_NONE             (0x00000000)
+/* Read access requested. */
+#define SHFL_CF_ACCESS_READ             (0x00001000)
+/* Write access requested. */
+#define SHFL_CF_ACCESS_WRITE            (0x00002000)
+/* Read/Write access requested. */
+#define SHFL_CF_ACCESS_READWRITE	(0x00003000)
+
+/* Requested share access for the object. */
+#define SHFL_CF_ACCESS_MASK_DENY        (0x0000c000)
+
+/* Allow any access. */
+#define SHFL_CF_ACCESS_DENYNONE         (0x00000000)
+/* Do not allow read. */
+#define SHFL_CF_ACCESS_DENYREAD         (0x00004000)
+/* Do not allow write. */
+#define SHFL_CF_ACCESS_DENYWRITE        (0x00008000)
+/* Do not allow access. */
+#define SHFL_CF_ACCESS_DENYALL          (0x0000c000)
+
+/* Requested access to attributes of the object. */
+#define SHFL_CF_ACCESS_MASK_ATTR        (0x00030000)
+
+/* No access requested. */
+#define SHFL_CF_ACCESS_ATTR_NONE        (0x00000000)
+/* Read access requested. */
+#define SHFL_CF_ACCESS_ATTR_READ        (0x00010000)
+/* Write access requested. */
+#define SHFL_CF_ACCESS_ATTR_WRITE       (0x00020000)
+/* Read/Write access requested. */
+#define SHFL_CF_ACCESS_ATTR_READWRITE   (0x00030000)
+
+/*
+ * The file is opened in append mode.
+ * Ignored if SHFL_CF_ACCESS_WRITE is not set.
+ */
+#define SHFL_CF_ACCESS_APPEND           (0x00040000)
+
+/** Create parameters buffer struct for SHFL_FN_CREATE call */
+struct shfl_createparms {
+	/** Returned handle of opened object. */
+	u64 handle;
+
+	/** Returned result of the operation */
+	enum shfl_create_result result;
+
+	/** SHFL_CF_* */
+	u32 create_flags;
+
+	/**
+	 * Attributes of object to create and
+	 * returned actual attributes of opened/created object.
+	 */
+	struct shfl_fsobjinfo info;
+} __packed;
+
+/** Shared Folder directory information */
+struct shfl_dirinfo {
+	/** Full information about the object. */
+	struct shfl_fsobjinfo info;
+	/**
+	 * The length of the short field (number of UTF16 chars).
+	 * It is 16-bit for reasons of alignment.
+	 */
+	u16 short_name_len;
+	/**
+	 * The short name for 8.3 compatibility.
+	 * Empty string if not available.
+	 */
+	u16 short_name[14];
+	struct shfl_string name;
+};
+
+/** Shared folder filesystem properties. */
+struct shfl_fsproperties {
+	/**
+	 * The maximum size of a filesystem object name.
+	 * This does not include the '\\0'.
+	 */
+	u32 max_component_len;
+
+	/**
+	 * True if the filesystem is remote.
+	 * False if the filesystem is local.
+	 */
+	bool remote;
+
+	/**
+	 * True if the filesystem is case sensitive.
+	 * False if the filesystem is case insensitive.
+	 */
+	bool case_sensitive;
+
+	/**
+	 * True if the filesystem is mounted read only.
+	 * False if the filesystem is mounted read write.
+	 */
+	bool read_only;
+
+	/**
+	 * True if the filesystem can encode unicode object names.
+	 * False if it can't.
+	 */
+	bool supports_unicode;
+
+	/**
+	 * True if the filesystem is compresses.
+	 * False if it isn't or we don't know.
+	 */
+	bool compressed;
+
+	/**
+	 * True if the filesystem compresses of individual files.
+	 * False if it doesn't or we don't know.
+	 */
+	bool file_compression;
+};
+VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12);
+
+struct shfl_volinfo {
+	s64 total_allocation_bytes;
+	s64 available_allocation_bytes;
+	u32 bytes_per_allocation_unit;
+	u32 bytes_per_sector;
+	u32 serial;
+	struct shfl_fsproperties properties;
+};
+
+
+/** SHFL_FN_MAP_FOLDER Parameters structure. */
+struct shfl_map_folder {
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string buffer.
+	 */
+	struct vmmdev_hgcm_function_parameter path;
+
+	/**
+	 * pointer, out: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in: UTF16
+	 * Path delimiter
+	 */
+	struct vmmdev_hgcm_function_parameter delimiter;
+
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Case senstive flag
+	 */
+	struct vmmdev_hgcm_function_parameter case_sensitive;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_MAP_FOLDER (4)
+
+
+/** SHFL_FN_UNMAP_FOLDER Parameters structure. */
+struct shfl_unmap_folder {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_UNMAP_FOLDER (1)
+
+
+/** SHFL_FN_CREATE Parameters structure. */
+struct shfl_create {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string buffer.
+	 */
+	struct vmmdev_hgcm_function_parameter path;
+
+	/**
+	 * pointer, in/out:
+	 * Points to struct shfl_createparms buffer.
+	 */
+	struct vmmdev_hgcm_function_parameter parms;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_CREATE (3)
+
+
+/** SHFL_FN_CLOSE Parameters structure. */
+struct shfl_close {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * value64, in:
+	 * SHFLHANDLE (u64) of object to close.
+	 */
+	struct vmmdev_hgcm_function_parameter handle;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_CLOSE (2)
+
+
+/** SHFL_FN_READ Parameters structure. */
+struct shfl_read {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * value64, in:
+	 * SHFLHANDLE (u64) of object to read from.
+	 */
+	struct vmmdev_hgcm_function_parameter handle;
+
+	/**
+	 * value64, in:
+	 * Offset to read from.
+	 */
+	struct vmmdev_hgcm_function_parameter offset;
+
+	/**
+	 * value64, in/out:
+	 * Bytes to read/How many were read.
+	 */
+	struct vmmdev_hgcm_function_parameter cb;
+
+	/**
+	 * pointer, out:
+	 * Buffer to place data to.
+	 */
+	struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_READ (5)
+
+
+/** SHFL_FN_WRITE Parameters structure. */
+struct shfl_write {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * value64, in:
+	 * SHFLHANDLE (u64) of object to write to.
+	 */
+	struct vmmdev_hgcm_function_parameter handle;
+
+	/**
+	 * value64, in:
+	 * Offset to write to.
+	 */
+	struct vmmdev_hgcm_function_parameter offset;
+
+	/**
+	 * value64, in/out:
+	 * Bytes to write/How many were written.
+	 */
+	struct vmmdev_hgcm_function_parameter cb;
+
+	/**
+	 * pointer, in:
+	 * Data to write.
+	 */
+	struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_WRITE (5)
+
+
+/*
+ * SHFL_FN_LIST
+ * Listing information includes variable length RTDIRENTRY[EX] structures.
+ */
+
+#define SHFL_LIST_NONE			0
+#define SHFL_LIST_RETURN_ONE		1
+
+/** SHFL_FN_LIST Parameters structure. */
+struct shfl_list {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * value64, in:
+	 * SHFLHANDLE (u64) of object to be listed.
+	 */
+	struct vmmdev_hgcm_function_parameter handle;
+
+	/**
+	 * value32, in:
+	 * List flags SHFL_LIST_*.
+	 */
+	struct vmmdev_hgcm_function_parameter flags;
+
+	/**
+	 * value32, in/out:
+	 * Bytes to be used for listing information/How many bytes were used.
+	 */
+	struct vmmdev_hgcm_function_parameter cb;
+
+	/**
+	 * pointer, in/optional
+	 * Points to struct shfl_string buffer that specifies a search path.
+	 */
+	struct vmmdev_hgcm_function_parameter path;
+
+	/**
+	 * pointer, out:
+	 * Buffer to place listing information to. (struct shfl_dirinfo)
+	 */
+	struct vmmdev_hgcm_function_parameter buffer;
+
+	/**
+	 * value32, in/out:
+	 * Indicates a key where the listing must be resumed.
+	 * in: 0 means start from begin of object.
+	 * out: 0 means listing completed.
+	 */
+	struct vmmdev_hgcm_function_parameter resume_point;
+
+	/**
+	 * pointer, out:
+	 * Number of files returned
+	 */
+	struct vmmdev_hgcm_function_parameter file_count;
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_LIST (8)
+
+
+/** SHFL_FN_READLINK Parameters structure. */
+struct shfl_readLink {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string buffer.
+	 */
+	struct vmmdev_hgcm_function_parameter path;
+
+	/**
+	 * pointer, out:
+	 * Buffer to place data to.
+	 */
+	struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_READLINK (3)
+
+
+/* SHFL_FN_INFORMATION */
+
+/* Mask of Set/Get bit. */
+#define SHFL_INFO_MODE_MASK    (0x1)
+/* Get information */
+#define SHFL_INFO_GET          (0x0)
+/* Set information */
+#define SHFL_INFO_SET          (0x1)
+
+/* Get name of the object. */
+#define SHFL_INFO_NAME         (0x2)
+/* Set size of object (extend/trucate); only applies to file objects */
+#define SHFL_INFO_SIZE         (0x4)
+/* Get/Set file object info. */
+#define SHFL_INFO_FILE         (0x8)
+/* Get volume information. */
+#define SHFL_INFO_VOLUME       (0x10)
+
+/** SHFL_FN_INFORMATION Parameters structure. */
+struct shfl_information {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * value64, in:
+	 * SHFLHANDLE (u64) of object to be listed.
+	 */
+	struct vmmdev_hgcm_function_parameter handle;
+
+	/**
+	 * value32, in:
+	 * SHFL_INFO_*
+	 */
+	struct vmmdev_hgcm_function_parameter flags;
+
+	/**
+	 * value32, in/out:
+	 * Bytes to be used for information/How many bytes were used.
+	 */
+	struct vmmdev_hgcm_function_parameter cb;
+
+	/**
+	 * pointer, in/out:
+	 * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not
+	 * forget to set the shfl_fsobjinfo::attr::additional for a get
+	 * operation as well.
+	 */
+	struct vmmdev_hgcm_function_parameter info;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_INFORMATION (5)
+
+
+/* SHFL_FN_REMOVE */
+
+#define SHFL_REMOVE_FILE        (0x1)
+#define SHFL_REMOVE_DIR         (0x2)
+#define SHFL_REMOVE_SYMLINK     (0x4)
+
+/** SHFL_FN_REMOVE Parameters structure. */
+struct shfl_remove {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string buffer.
+	 */
+	struct vmmdev_hgcm_function_parameter path;
+
+	/**
+	 * value32, in:
+	 * remove flags (file/directory)
+	 */
+	struct vmmdev_hgcm_function_parameter flags;
+
+};
+
+#define SHFL_CPARMS_REMOVE  (3)
+
+
+/* SHFL_FN_RENAME */
+
+#define SHFL_RENAME_FILE                (0x1)
+#define SHFL_RENAME_DIR                 (0x2)
+#define SHFL_RENAME_REPLACE_IF_EXISTS   (0x4)
+
+/** SHFL_FN_RENAME Parameters structure. */
+struct shfl_rename {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string src.
+	 */
+	struct vmmdev_hgcm_function_parameter src;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string dest.
+	 */
+	struct vmmdev_hgcm_function_parameter dest;
+
+	/**
+	 * value32, in:
+	 * rename flags (file/directory)
+	 */
+	struct vmmdev_hgcm_function_parameter flags;
+
+};
+
+#define SHFL_CPARMS_RENAME  (4)
+
+
+/** SHFL_FN_SYMLINK Parameters structure. */
+struct shfl_symlink {
+	/**
+	 * pointer, in: SHFLROOT (u32)
+	 * Root handle of the mapping which name is queried.
+	 */
+	struct vmmdev_hgcm_function_parameter root;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string of path for the new symlink.
+	 */
+	struct vmmdev_hgcm_function_parameter new_path;
+
+	/**
+	 * pointer, in:
+	 * Points to struct shfl_string of destination for symlink.
+	 */
+	struct vmmdev_hgcm_function_parameter old_path;
+
+	/**
+	 * pointer, out:
+	 * Information about created symlink.
+	 */
+	struct vmmdev_hgcm_function_parameter info;
+
+};
+
+#define SHFL_CPARMS_SYMLINK  (4)
+
+#endif
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
new file mode 100644
index 000000000000..675e26989376
--- /dev/null
+++ b/fs/vboxsf/super.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Virtual File System.
+ *
+ * Module initialization/finalization
+ * File system registration/deregistration
+ * Superblock reading
+ * Few utility functions
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/idr.h>
+#include <linux/fs_parser.h>
+#include <linux/magic.h>
+#include <linux/module.h>
+#include <linux/nls.h>
+#include <linux/statfs.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
+
+#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000')
+#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377')
+#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376')
+#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375')
+
+static int follow_symlinks;
+module_param(follow_symlinks, int, 0444);
+MODULE_PARM_DESC(follow_symlinks,
+		 "Let host resolve symlinks rather than showing them");
+
+static DEFINE_IDA(vboxsf_bdi_ida);
+static DEFINE_MUTEX(vboxsf_setup_mutex);
+static bool vboxsf_setup_done;
+static struct super_operations vboxsf_super_ops; /* forward declaration */
+static struct kmem_cache *vboxsf_inode_cachep;
+
+static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT;
+
+enum  { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode,
+	opt_dmask, opt_fmask };
+
+static const struct fs_parameter_spec vboxsf_fs_parameters[] = {
+	fsparam_string	("nls",		opt_nls),
+	fsparam_u32	("uid",		opt_uid),
+	fsparam_u32	("gid",		opt_gid),
+	fsparam_u32	("ttl",		opt_ttl),
+	fsparam_u32oct	("dmode",	opt_dmode),
+	fsparam_u32oct	("fmode",	opt_fmode),
+	fsparam_u32oct	("dmask",	opt_dmask),
+	fsparam_u32oct	("fmask",	opt_fmask),
+	{}
+};
+
+static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct vboxsf_fs_context *ctx = fc->fs_private;
+	struct fs_parse_result result;
+	kuid_t uid;
+	kgid_t gid;
+	int opt;
+
+	opt = fs_parse(fc, vboxsf_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case opt_nls:
+		if (ctx->nls_name || fc->purpose != FS_CONTEXT_FOR_MOUNT) {
+			vbg_err("vboxsf: Cannot reconfigure nls option\n");
+			return -EINVAL;
+		}
+		ctx->nls_name = param->string;
+		param->string = NULL;
+		break;
+	case opt_uid:
+		uid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(uid))
+			return -EINVAL;
+		ctx->o.uid = uid;
+		break;
+	case opt_gid:
+		gid = make_kgid(current_user_ns(), result.uint_32);
+		if (!gid_valid(gid))
+			return -EINVAL;
+		ctx->o.gid = gid;
+		break;
+	case opt_ttl:
+		ctx->o.ttl = msecs_to_jiffies(result.uint_32);
+		break;
+	case opt_dmode:
+		if (result.uint_32 & ~0777)
+			return -EINVAL;
+		ctx->o.dmode = result.uint_32;
+		ctx->o.dmode_set = true;
+		break;
+	case opt_fmode:
+		if (result.uint_32 & ~0777)
+			return -EINVAL;
+		ctx->o.fmode = result.uint_32;
+		ctx->o.fmode_set = true;
+		break;
+	case opt_dmask:
+		if (result.uint_32 & ~07777)
+			return -EINVAL;
+		ctx->o.dmask = result.uint_32;
+		break;
+	case opt_fmask:
+		if (result.uint_32 & ~07777)
+			return -EINVAL;
+		ctx->o.fmask = result.uint_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct vboxsf_fs_context *ctx = fc->fs_private;
+	struct shfl_string *folder_name, root_path;
+	struct vboxsf_sbi *sbi;
+	struct dentry *droot;
+	struct inode *iroot;
+	char *nls_name;
+	size_t size;
+	int err;
+
+	if (!fc->source)
+		return -EINVAL;
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+
+	sbi->o = ctx->o;
+	idr_init(&sbi->ino_idr);
+	spin_lock_init(&sbi->ino_idr_lock);
+	sbi->next_generation = 1;
+	sbi->bdi_id = -1;
+
+	/* Load nls if not utf8 */
+	nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls;
+	if (strcmp(nls_name, "utf8") != 0) {
+		if (nls_name == vboxsf_default_nls)
+			sbi->nls = load_nls_default();
+		else
+			sbi->nls = load_nls(nls_name);
+
+		if (!sbi->nls) {
+			vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
+			err = -EINVAL;
+			goto fail_free;
+		}
+	}
+
+	sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+	if (sbi->bdi_id < 0) {
+		err = sbi->bdi_id;
+		goto fail_free;
+	}
+
+	err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
+	if (err)
+		goto fail_free;
+
+	/* Turn source into a shfl_string and map the folder */
+	size = strlen(fc->source) + 1;
+	folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL);
+	if (!folder_name) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+	folder_name->size = size;
+	folder_name->length = size - 1;
+	strlcpy(folder_name->string.utf8, fc->source, size);
+	err = vboxsf_map_folder(folder_name, &sbi->root);
+	kfree(folder_name);
+	if (err) {
+		vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n",
+			fc->source, err);
+		goto fail_free;
+	}
+
+	root_path.length = 1;
+	root_path.size = 2;
+	root_path.string.utf8[0] = '/';
+	root_path.string.utf8[1] = 0;
+	err = vboxsf_stat(sbi, &root_path, &sbi->root_info);
+	if (err)
+		goto fail_unmap;
+
+	sb->s_magic = VBOXSF_SUPER_MAGIC;
+	sb->s_blocksize = 1024;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_op = &vboxsf_super_ops;
+	sb->s_d_op = &vboxsf_dentry_ops;
+
+	iroot = iget_locked(sb, 0);
+	if (!iroot) {
+		err = -ENOMEM;
+		goto fail_unmap;
+	}
+	vboxsf_init_inode(sbi, iroot, &sbi->root_info);
+	unlock_new_inode(iroot);
+
+	droot = d_make_root(iroot);
+	if (!droot) {
+		err = -ENOMEM;
+		goto fail_unmap;
+	}
+
+	sb->s_root = droot;
+	sb->s_fs_info = sbi;
+	return 0;
+
+fail_unmap:
+	vboxsf_unmap_folder(sbi->root);
+fail_free:
+	if (sbi->bdi_id >= 0)
+		ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+	if (sbi->nls)
+		unload_nls(sbi->nls);
+	idr_destroy(&sbi->ino_idr);
+	kfree(sbi);
+	return err;
+}
+
+static void vboxsf_inode_init_once(void *data)
+{
+	struct vboxsf_inode *sf_i = data;
+
+	mutex_init(&sf_i->handle_list_mutex);
+	inode_init_once(&sf_i->vfs_inode);
+}
+
+static struct inode *vboxsf_alloc_inode(struct super_block *sb)
+{
+	struct vboxsf_inode *sf_i;
+
+	sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS);
+	if (!sf_i)
+		return NULL;
+
+	sf_i->force_restat = 0;
+	INIT_LIST_HEAD(&sf_i->handle_list);
+
+	return &sf_i->vfs_inode;
+}
+
+static void vboxsf_free_inode(struct inode *inode)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->ino_idr_lock, flags);
+	idr_remove(&sbi->ino_idr, inode->i_ino);
+	spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
+	kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode));
+}
+
+static void vboxsf_put_super(struct super_block *sb)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
+
+	vboxsf_unmap_folder(sbi->root);
+	if (sbi->bdi_id >= 0)
+		ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+	if (sbi->nls)
+		unload_nls(sbi->nls);
+
+	/*
+	 * vboxsf_free_inode uses the idr, make sure all delayed rcu free
+	 * inodes are flushed.
+	 */
+	rcu_barrier();
+	idr_destroy(&sbi->ino_idr);
+	kfree(sbi);
+}
+
+static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct shfl_volinfo shfl_volinfo;
+	struct vboxsf_sbi *sbi;
+	u32 buf_len;
+	int err;
+
+	sbi = VBOXSF_SBI(sb);
+	buf_len = sizeof(shfl_volinfo);
+	err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME,
+			    &buf_len, &shfl_volinfo);
+	if (err)
+		return err;
+
+	stat->f_type = VBOXSF_SUPER_MAGIC;
+	stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit;
+
+	do_div(shfl_volinfo.total_allocation_bytes,
+	       shfl_volinfo.bytes_per_allocation_unit);
+	stat->f_blocks = shfl_volinfo.total_allocation_bytes;
+
+	do_div(shfl_volinfo.available_allocation_bytes,
+	       shfl_volinfo.bytes_per_allocation_unit);
+	stat->f_bfree  = shfl_volinfo.available_allocation_bytes;
+	stat->f_bavail = shfl_volinfo.available_allocation_bytes;
+
+	stat->f_files = 1000;
+	/*
+	 * Don't return 0 here since the guest may then think that it is not
+	 * possible to create any more files.
+	 */
+	stat->f_ffree = 1000000;
+	stat->f_fsid.val[0] = 0;
+	stat->f_fsid.val[1] = 0;
+	stat->f_namelen = 255;
+	return 0;
+}
+
+static struct super_operations vboxsf_super_ops = {
+	.alloc_inode	= vboxsf_alloc_inode,
+	.free_inode	= vboxsf_free_inode,
+	.put_super	= vboxsf_put_super,
+	.statfs		= vboxsf_statfs,
+};
+
+static int vboxsf_setup(void)
+{
+	int err;
+
+	mutex_lock(&vboxsf_setup_mutex);
+
+	if (vboxsf_setup_done)
+		goto success;
+
+	vboxsf_inode_cachep =
+		kmem_cache_create("vboxsf_inode_cache",
+				  sizeof(struct vboxsf_inode), 0,
+				  (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
+				   SLAB_ACCOUNT),
+				  vboxsf_inode_init_once);
+	if (!vboxsf_inode_cachep) {
+		err = -ENOMEM;
+		goto fail_nomem;
+	}
+
+	err = vboxsf_connect();
+	if (err) {
+		vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err);
+		vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n");
+		vbg_err("vboxsf: and check dmesg for vboxguest errors\n");
+		goto fail_free_cache;
+	}
+
+	err = vboxsf_set_utf8();
+	if (err) {
+		vbg_err("vboxsf_setutf8 error %d\n", err);
+		goto fail_disconnect;
+	}
+
+	if (!follow_symlinks) {
+		err = vboxsf_set_symlinks();
+		if (err)
+			vbg_warn("vboxsf: Unable to show symlinks: %d\n", err);
+	}
+
+	vboxsf_setup_done = true;
+success:
+	mutex_unlock(&vboxsf_setup_mutex);
+	return 0;
+
+fail_disconnect:
+	vboxsf_disconnect();
+fail_free_cache:
+	kmem_cache_destroy(vboxsf_inode_cachep);
+fail_nomem:
+	mutex_unlock(&vboxsf_setup_mutex);
+	return err;
+}
+
+static int vboxsf_parse_monolithic(struct fs_context *fc, void *data)
+{
+	char *options = data;
+
+	if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 &&
+		       options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 &&
+		       options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 &&
+		       options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) {
+		vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n");
+		return -EINVAL;
+	}
+
+	return generic_parse_monolithic(fc, data);
+}
+
+static int vboxsf_get_tree(struct fs_context *fc)
+{
+	int err;
+
+	err = vboxsf_setup();
+	if (err)
+		return err;
+
+	return get_tree_nodev(fc, vboxsf_fill_super);
+}
+
+static int vboxsf_reconfigure(struct fs_context *fc)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb);
+	struct vboxsf_fs_context *ctx = fc->fs_private;
+	struct inode *iroot = fc->root->d_sb->s_root->d_inode;
+
+	/* Apply changed options to the root inode */
+	sbi->o = ctx->o;
+	vboxsf_init_inode(sbi, iroot, &sbi->root_info);
+
+	return 0;
+}
+
+static void vboxsf_free_fc(struct fs_context *fc)
+{
+	struct vboxsf_fs_context *ctx = fc->fs_private;
+
+	kfree(ctx->nls_name);
+	kfree(ctx);
+}
+
+static const struct fs_context_operations vboxsf_context_ops = {
+	.free			= vboxsf_free_fc,
+	.parse_param		= vboxsf_parse_param,
+	.parse_monolithic	= vboxsf_parse_monolithic,
+	.get_tree		= vboxsf_get_tree,
+	.reconfigure		= vboxsf_reconfigure,
+};
+
+static int vboxsf_init_fs_context(struct fs_context *fc)
+{
+	struct vboxsf_fs_context *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	current_uid_gid(&ctx->o.uid, &ctx->o.gid);
+
+	fc->fs_private = ctx;
+	fc->ops = &vboxsf_context_ops;
+	return 0;
+}
+
+static struct file_system_type vboxsf_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= "vboxsf",
+	.init_fs_context	= vboxsf_init_fs_context,
+	.kill_sb		= kill_anon_super
+};
+
+/* Module initialization/finalization handlers */
+static int __init vboxsf_init(void)
+{
+	return register_filesystem(&vboxsf_fs_type);
+}
+
+static void __exit vboxsf_fini(void)
+{
+	unregister_filesystem(&vboxsf_fs_type);
+
+	mutex_lock(&vboxsf_setup_mutex);
+	if (vboxsf_setup_done) {
+		vboxsf_disconnect();
+		/*
+		 * Make sure all delayed rcu free inodes are flushed
+		 * before we destroy the cache.
+		 */
+		rcu_barrier();
+		kmem_cache_destroy(vboxsf_inode_cachep);
+	}
+	mutex_unlock(&vboxsf_setup_mutex);
+}
+
+module_init(vboxsf_init);
+module_exit(vboxsf_fini);
+
+MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access");
+MODULE_AUTHOR("Oracle Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_FS("vboxsf");
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
new file mode 100644
index 000000000000..96bd160da48b
--- /dev/null
+++ b/fs/vboxsf/utils.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Utility functions.
+ * Mainly conversion from/to VirtualBox/Linux data structures.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/namei.h>
+#include <linux/nls.h>
+#include <linux/sizes.h>
+#include <linux/vfs.h>
+#include "vfsmod.h"
+
+struct inode *vboxsf_new_inode(struct super_block *sb)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
+	struct inode *inode;
+	unsigned long flags;
+	int cursor, ret;
+	u32 gen;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irqsave(&sbi->ino_idr_lock, flags);
+	cursor = idr_get_cursor(&sbi->ino_idr);
+	ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC);
+	if (ret >= 0 && ret < cursor)
+		sbi->next_generation++;
+	gen = sbi->next_generation;
+	spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
+	idr_preload_end();
+
+	if (ret < 0) {
+		iput(inode);
+		return ERR_PTR(ret);
+	}
+
+	inode->i_ino = ret;
+	inode->i_generation = gen;
+	return inode;
+}
+
+/* set [inode] attributes based on [info], uid/gid based on [sbi] */
+void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
+		       const struct shfl_fsobjinfo *info)
+{
+	const struct shfl_fsobjattr *attr;
+	s64 allocated;
+	int mode;
+
+	attr = &info->attr;
+
+#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0)
+
+	mode = mode_set(IRUSR);
+	mode |= mode_set(IWUSR);
+	mode |= mode_set(IXUSR);
+
+	mode |= mode_set(IRGRP);
+	mode |= mode_set(IWGRP);
+	mode |= mode_set(IXGRP);
+
+	mode |= mode_set(IROTH);
+	mode |= mode_set(IWOTH);
+	mode |= mode_set(IXOTH);
+
+#undef mode_set
+
+	/* We use the host-side values for these */
+	inode->i_flags |= S_NOATIME | S_NOCMTIME;
+	inode->i_mapping->a_ops = &vboxsf_reg_aops;
+
+	if (SHFL_IS_DIRECTORY(attr->mode)) {
+		inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode;
+		inode->i_mode &= ~sbi->o.dmask;
+		inode->i_mode |= S_IFDIR;
+		inode->i_op = &vboxsf_dir_iops;
+		inode->i_fop = &vboxsf_dir_fops;
+		/*
+		 * XXX: this probably should be set to the number of entries
+		 * in the directory plus two (. ..)
+		 */
+		set_nlink(inode, 1);
+	} else if (SHFL_IS_SYMLINK(attr->mode)) {
+		inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
+		inode->i_mode &= ~sbi->o.fmask;
+		inode->i_mode |= S_IFLNK;
+		inode->i_op = &vboxsf_lnk_iops;
+		set_nlink(inode, 1);
+	} else {
+		inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
+		inode->i_mode &= ~sbi->o.fmask;
+		inode->i_mode |= S_IFREG;
+		inode->i_op = &vboxsf_reg_iops;
+		inode->i_fop = &vboxsf_reg_fops;
+		set_nlink(inode, 1);
+	}
+
+	inode->i_uid = sbi->o.uid;
+	inode->i_gid = sbi->o.gid;
+
+	inode->i_size = info->size;
+	inode->i_blkbits = 12;
+	/* i_blocks always in units of 512 bytes! */
+	allocated = info->allocated + 511;
+	do_div(allocated, 512);
+	inode->i_blocks = allocated;
+
+	inode->i_atime = ns_to_timespec64(
+				 info->access_time.ns_relative_to_unix_epoch);
+	inode->i_ctime = ns_to_timespec64(
+				 info->change_time.ns_relative_to_unix_epoch);
+	inode->i_mtime = ns_to_timespec64(
+			   info->modification_time.ns_relative_to_unix_epoch);
+}
+
+int vboxsf_create_at_dentry(struct dentry *dentry,
+			    struct shfl_createparms *params)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+	struct shfl_string *path;
+	int err;
+
+	path = vboxsf_path_from_dentry(sbi, dentry);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	err = vboxsf_create(sbi->root, path, params);
+	__putname(path);
+
+	return err;
+}
+
+int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
+		struct shfl_fsobjinfo *info)
+{
+	struct shfl_createparms params = {};
+	int err;
+
+	params.handle = SHFL_HANDLE_NIL;
+	params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
+
+	err = vboxsf_create(sbi->root, path, &params);
+	if (err)
+		return err;
+
+	if (params.result != SHFL_FILE_EXISTS)
+		return -ENOENT;
+
+	if (info)
+		*info = params.info;
+
+	return 0;
+}
+
+int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+	struct shfl_string *path;
+	int err;
+
+	path = vboxsf_path_from_dentry(sbi, dentry);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	err = vboxsf_stat(sbi, path, info);
+	__putname(path);
+	return err;
+}
+
+int vboxsf_inode_revalidate(struct dentry *dentry)
+{
+	struct vboxsf_sbi *sbi;
+	struct vboxsf_inode *sf_i;
+	struct shfl_fsobjinfo info;
+	struct timespec64 prev_mtime;
+	struct inode *inode;
+	int err;
+
+	if (!dentry || !d_really_is_positive(dentry))
+		return -EINVAL;
+
+	inode = d_inode(dentry);
+	prev_mtime = inode->i_mtime;
+	sf_i = VBOXSF_I(inode);
+	sbi = VBOXSF_SBI(dentry->d_sb);
+	if (!sf_i->force_restat) {
+		if (time_before(jiffies, dentry->d_time + sbi->o.ttl))
+			return 0;
+	}
+
+	err = vboxsf_stat_dentry(dentry, &info);
+	if (err)
+		return err;
+
+	dentry->d_time = jiffies;
+	sf_i->force_restat = 0;
+	vboxsf_init_inode(sbi, inode, &info);
+
+	/*
+	 * If the file was changed on the host side we need to invalidate the
+	 * page-cache for it.  Note this also gets triggered by our own writes,
+	 * this is unavoidable.
+	 */
+	if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0)
+		invalidate_inode_pages2(inode->i_mapping);
+
+	return 0;
+}
+
+int vboxsf_getattr(const struct path *path, struct kstat *kstat,
+		   u32 request_mask, unsigned int flags)
+{
+	int err;
+	struct dentry *dentry = path->dentry;
+	struct inode *inode = d_inode(dentry);
+	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+
+	switch (flags & AT_STATX_SYNC_TYPE) {
+	case AT_STATX_DONT_SYNC:
+		err = 0;
+		break;
+	case AT_STATX_FORCE_SYNC:
+		sf_i->force_restat = 1;
+		/* fall-through */
+	default:
+		err = vboxsf_inode_revalidate(dentry);
+	}
+	if (err)
+		return err;
+
+	generic_fillattr(d_inode(dentry), kstat);
+	return 0;
+}
+
+int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+	struct shfl_createparms params = {};
+	struct shfl_fsobjinfo info = {};
+	u32 buf_len;
+	int err;
+
+	params.handle = SHFL_HANDLE_NIL;
+	params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS |
+			      SHFL_CF_ACT_FAIL_IF_NEW |
+			      SHFL_CF_ACCESS_ATTR_WRITE;
+
+	/* this is at least required for Posix hosts */
+	if (iattr->ia_valid & ATTR_SIZE)
+		params.create_flags |= SHFL_CF_ACCESS_WRITE;
+
+	err = vboxsf_create_at_dentry(dentry, &params);
+	if (err || params.result != SHFL_FILE_EXISTS)
+		return err ? err : -ENOENT;
+
+#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0)
+
+	/*
+	 * Setting the file size and setting the other attributes has to
+	 * be handled separately.
+	 */
+	if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) {
+		if (iattr->ia_valid & ATTR_MODE) {
+			info.attr.mode = mode_set(IRUSR);
+			info.attr.mode |= mode_set(IWUSR);
+			info.attr.mode |= mode_set(IXUSR);
+			info.attr.mode |= mode_set(IRGRP);
+			info.attr.mode |= mode_set(IWGRP);
+			info.attr.mode |= mode_set(IXGRP);
+			info.attr.mode |= mode_set(IROTH);
+			info.attr.mode |= mode_set(IWOTH);
+			info.attr.mode |= mode_set(IXOTH);
+
+			if (iattr->ia_mode & S_IFDIR)
+				info.attr.mode |= SHFL_TYPE_DIRECTORY;
+			else
+				info.attr.mode |= SHFL_TYPE_FILE;
+		}
+
+		if (iattr->ia_valid & ATTR_ATIME)
+			info.access_time.ns_relative_to_unix_epoch =
+					    timespec64_to_ns(&iattr->ia_atime);
+
+		if (iattr->ia_valid & ATTR_MTIME)
+			info.modification_time.ns_relative_to_unix_epoch =
+					    timespec64_to_ns(&iattr->ia_mtime);
+
+		/*
+		 * Ignore ctime (inode change time) as it can't be set
+		 * from userland anyway.
+		 */
+
+		buf_len = sizeof(info);
+		err = vboxsf_fsinfo(sbi->root, params.handle,
+				    SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len,
+				    &info);
+		if (err) {
+			vboxsf_close(sbi->root, params.handle);
+			return err;
+		}
+
+		/* the host may have given us different attr then requested */
+		sf_i->force_restat = 1;
+	}
+
+#undef mode_set
+
+	if (iattr->ia_valid & ATTR_SIZE) {
+		memset(&info, 0, sizeof(info));
+		info.size = iattr->ia_size;
+		buf_len = sizeof(info);
+		err = vboxsf_fsinfo(sbi->root, params.handle,
+				    SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len,
+				    &info);
+		if (err) {
+			vboxsf_close(sbi->root, params.handle);
+			return err;
+		}
+
+		/* the host may have given us different attr then requested */
+		sf_i->force_restat = 1;
+	}
+
+	vboxsf_close(sbi->root, params.handle);
+
+	/* Update the inode with what the host has actually given us. */
+	if (sf_i->force_restat)
+		vboxsf_inode_revalidate(dentry);
+
+	return 0;
+}
+
+/*
+ * [dentry] contains string encoded in coding system that corresponds
+ * to [sbi]->nls, we must convert it to UTF8 here.
+ * Returns a shfl_string allocated through __getname (must be freed using
+ * __putname), or an ERR_PTR on error.
+ */
+struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
+					    struct dentry *dentry)
+{
+	struct shfl_string *shfl_path;
+	int path_len, out_len, nb;
+	char *buf, *path;
+	wchar_t uni;
+	u8 *out;
+
+	buf = __getname();
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	path = dentry_path_raw(dentry, buf, PATH_MAX);
+	if (IS_ERR(path)) {
+		__putname(buf);
+		return ERR_CAST(path);
+	}
+	path_len = strlen(path);
+
+	if (sbi->nls) {
+		shfl_path = __getname();
+		if (!shfl_path) {
+			__putname(buf);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		out = shfl_path->string.utf8;
+		out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1;
+
+		while (path_len) {
+			nb = sbi->nls->char2uni(path, path_len, &uni);
+			if (nb < 0) {
+				__putname(shfl_path);
+				__putname(buf);
+				return ERR_PTR(-EINVAL);
+			}
+			path += nb;
+			path_len -= nb;
+
+			nb = utf32_to_utf8(uni, out, out_len);
+			if (nb < 0) {
+				__putname(shfl_path);
+				__putname(buf);
+				return ERR_PTR(-ENAMETOOLONG);
+			}
+			out += nb;
+			out_len -= nb;
+		}
+		*out = 0;
+		shfl_path->length = out - shfl_path->string.utf8;
+		shfl_path->size = shfl_path->length + 1;
+		__putname(buf);
+	} else {
+		if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) {
+			__putname(buf);
+			return ERR_PTR(-ENAMETOOLONG);
+		}
+		/*
+		 * dentry_path stores the name at the end of buf, but the
+		 * shfl_string string we return must be properly aligned.
+		 */
+		shfl_path = (struct shfl_string *)buf;
+		memmove(shfl_path->string.utf8, path, path_len);
+		shfl_path->string.utf8[path_len] = 0;
+		shfl_path->length = path_len;
+		shfl_path->size = path_len + 1;
+	}
+
+	return shfl_path;
+}
+
+int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
+		  const unsigned char *utf8_name, size_t utf8_len)
+{
+	const char *in;
+	char *out;
+	size_t out_len;
+	size_t out_bound_len;
+	size_t in_bound_len;
+
+	in = utf8_name;
+	in_bound_len = utf8_len;
+
+	out = name;
+	out_len = 0;
+	/* Reserve space for terminating 0 */
+	out_bound_len = name_bound_len - 1;
+
+	while (in_bound_len) {
+		int nb;
+		unicode_t uni;
+
+		nb = utf8_to_utf32(in, in_bound_len, &uni);
+		if (nb < 0)
+			return -EINVAL;
+
+		in += nb;
+		in_bound_len -= nb;
+
+		nb = sbi->nls->uni2char(uni, out, out_bound_len);
+		if (nb < 0)
+			return nb;
+
+		out += nb;
+		out_bound_len -= nb;
+		out_len += nb;
+	}
+
+	*out = 0;
+
+	return 0;
+}
+
+static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list)
+{
+	struct vboxsf_dir_buf *b;
+
+	b = kmalloc(sizeof(*b), GFP_KERNEL);
+	if (!b)
+		return NULL;
+
+	b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL);
+	if (!b->buf) {
+		kfree(b);
+		return NULL;
+	}
+
+	b->entries = 0;
+	b->used = 0;
+	b->free = DIR_BUFFER_SIZE;
+	list_add(&b->head, list);
+
+	return b;
+}
+
+static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b)
+{
+	list_del(&b->head);
+	kfree(b->buf);
+	kfree(b);
+}
+
+struct vboxsf_dir_info *vboxsf_dir_info_alloc(void)
+{
+	struct vboxsf_dir_info *p;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	INIT_LIST_HEAD(&p->info_list);
+	return p;
+}
+
+void vboxsf_dir_info_free(struct vboxsf_dir_info *p)
+{
+	struct list_head *list, *pos, *tmp;
+
+	list = &p->info_list;
+	list_for_each_safe(pos, tmp, list) {
+		struct vboxsf_dir_buf *b;
+
+		b = list_entry(pos, struct vboxsf_dir_buf, head);
+		vboxsf_dir_buf_free(b);
+	}
+	kfree(p);
+}
+
+int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
+			u64 handle)
+{
+	struct vboxsf_dir_buf *b;
+	u32 entries, size;
+	int err = 0;
+	void *buf;
+
+	/* vboxsf_dirinfo returns 1 on end of dir */
+	while (err == 0) {
+		b = vboxsf_dir_buf_alloc(&sf_d->info_list);
+		if (!b) {
+			err = -ENOMEM;
+			break;
+		}
+
+		buf = b->buf;
+		size = b->free;
+
+		err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0,
+				     &size, buf, &entries);
+		if (err < 0)
+			break;
+
+		b->entries += entries;
+		b->free -= size;
+		b->used += size;
+	}
+
+	if (b && b->used == 0)
+		vboxsf_dir_buf_free(b);
+
+	/* -EILSEQ means the host could not translate a filename, ignore */
+	if (err > 0 || err == -EILSEQ)
+		err = 0;
+
+	return err;
+}
diff --git a/fs/vboxsf/vboxsf_wrappers.c b/fs/vboxsf/vboxsf_wrappers.c
new file mode 100644
index 000000000000..bfc78a097dae
--- /dev/null
+++ b/fs/vboxsf/vboxsf_wrappers.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Wrapper functions for the shfl host calls.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vbox_err.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+#define SHFL_REQUEST \
+	(VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \
+	 VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN)
+
+static u32 vboxsf_client_id;
+
+int vboxsf_connect(void)
+{
+	struct vbg_dev *gdev;
+	struct vmmdev_hgcm_service_location loc;
+	int err, vbox_status;
+
+	loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING;
+	strcpy(loc.u.localhost.service_name, "VBoxSharedFolders");
+
+	gdev = vbg_get_gdev();
+	if (IS_ERR(gdev))
+		return -ENODEV;	/* No guest-device */
+
+	err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc,
+			       &vboxsf_client_id, &vbox_status);
+	vbg_put_gdev(gdev);
+
+	return err ? err : vbg_status_code_to_errno(vbox_status);
+}
+
+void vboxsf_disconnect(void)
+{
+	struct vbg_dev *gdev;
+	int vbox_status;
+
+	gdev = vbg_get_gdev();
+	if (IS_ERR(gdev))
+		return;   /* guest-device is gone, already disconnected */
+
+	vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status);
+	vbg_put_gdev(gdev);
+}
+
+static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status)
+{
+	struct vbg_dev *gdev;
+	int err, vbox_status;
+
+	gdev = vbg_get_gdev();
+	if (IS_ERR(gdev))
+		return -ESHUTDOWN; /* guest-dev removed underneath us */
+
+	err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function,
+			    U32_MAX, parms, parm_count, &vbox_status);
+	vbg_put_gdev(gdev);
+
+	if (err < 0)
+		return err;
+
+	if (status)
+		*status = vbox_status;
+
+	return vbg_status_code_to_errno(vbox_status);
+}
+
+int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root)
+{
+	struct shfl_map_folder parms;
+	int err, status;
+
+	parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+	parms.path.u.pointer.size = shfl_string_buf_size(folder_name);
+	parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = 0;
+
+	parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.delimiter.u.value32 = '/';
+
+	parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.case_sensitive.u.value32 = 1;
+
+	err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER,
+			  &status);
+	if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED)
+		vbg_err("%s: Error host is too old\n", __func__);
+
+	*root = parms.root.u.value32;
+	return err;
+}
+
+int vboxsf_unmap_folder(u32 root)
+{
+	struct shfl_unmap_folder parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms,
+			   SHFL_CPARMS_UNMAP_FOLDER, NULL);
+}
+
+/**
+ * vboxsf_create - Create a new file or folder
+ * @root:         Root of the shared folder in which to create the file
+ * @parsed_path:  The path of the file or folder relative to the shared folder
+ * @param:        create_parms Parameters for file/folder creation.
+ *
+ * Create a new file or folder or open an existing one in a shared folder.
+ * Note this function always returns 0 / success unless an exceptional condition
+ * occurs - out of memory, invalid arguments, etc. If the file or folder could
+ * not be opened or created, create_parms->handle will be set to
+ * SHFL_HANDLE_NIL on return.  In this case the value in create_parms->result
+ * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result
+ * is also set on success as additional information.
+ *
+ * Returns:
+ * 0 or negative errno value.
+ */
+int vboxsf_create(u32 root, struct shfl_string *parsed_path,
+		  struct shfl_createparms *create_parms)
+{
+	struct shfl_create parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+	parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+	parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+	parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+	parms.parms.u.pointer.size = sizeof(struct shfl_createparms);
+	parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms;
+
+	return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL);
+}
+
+int vboxsf_close(u32 root, u64 handle)
+{
+	struct shfl_close parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.handle.u.value64 = handle;
+
+	return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL);
+}
+
+int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags)
+{
+	struct shfl_remove parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+	parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+	parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.flags.u.value32 = flags;
+
+	return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL);
+}
+
+int vboxsf_rename(u32 root, struct shfl_string *src_path,
+		  struct shfl_string *dest_path, u32 flags)
+{
+	struct shfl_rename parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.src.u.pointer.size = shfl_string_buf_size(src_path);
+	parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path;
+
+	parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.dest.u.pointer.size = shfl_string_buf_size(dest_path);
+	parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path;
+
+	parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.flags.u.value32 = flags;
+
+	return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL);
+}
+
+int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
+{
+	struct shfl_read parms;
+	int err;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.handle.u.value64 = handle;
+	parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.offset.u.value64 = offset;
+	parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.cb.u.value32 = *buf_len;
+	parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+	parms.buffer.u.pointer.size = *buf_len;
+	parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL);
+
+	*buf_len = parms.cb.u.value32;
+	return err;
+}
+
+int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
+{
+	struct shfl_write parms;
+	int err;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.handle.u.value64 = handle;
+	parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.offset.u.value64 = offset;
+	parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.cb.u.value32 = *buf_len;
+	parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.buffer.u.pointer.size = *buf_len;
+	parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL);
+
+	*buf_len = parms.cb.u.value32;
+	return err;
+}
+
+/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */
+int vboxsf_dirinfo(u32 root, u64 handle,
+		   struct shfl_string *parsed_path, u32 flags, u32 index,
+		   u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count)
+{
+	struct shfl_list parms;
+	int err, status;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.handle.u.value64 = handle;
+	parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.flags.u.value32 = flags;
+	parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.cb.u.value32 = *buf_len;
+	if (parsed_path) {
+		parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+		parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+		parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+	} else {
+		parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN;
+		parms.path.u.pointer.size = 0;
+		parms.path.u.pointer.u.linear_addr = 0;
+	}
+
+	parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+	parms.buffer.u.pointer.size = *buf_len;
+	parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.resume_point.u.value32 = index;
+	parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.file_count.u.value32 = 0;	/* out parameter only */
+
+	err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status);
+	if (err == -ENODATA && status == VERR_NO_MORE_FILES)
+		err = 1;
+
+	*buf_len = parms.cb.u.value32;
+	*file_count = parms.file_count.u.value32;
+	return err;
+}
+
+int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
+		  u32 *buf_len, void *buf)
+{
+	struct shfl_information parms;
+	int err;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+	parms.handle.u.value64 = handle;
+	parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.flags.u.value32 = flags;
+	parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.cb.u.value32 = *buf_len;
+	parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+	parms.info.u.pointer.size = *buf_len;
+	parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION,
+			  NULL);
+
+	*buf_len = parms.cb.u.value32;
+	return err;
+}
+
+int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
+		    u32 buf_len, u8 *buf)
+{
+	struct shfl_readLink parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+	parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+	parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+	parms.buffer.u.pointer.size = buf_len;
+	parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK,
+			   NULL);
+}
+
+int vboxsf_symlink(u32 root, struct shfl_string *new_path,
+		   struct shfl_string *old_path, struct shfl_fsobjinfo *buf)
+{
+	struct shfl_symlink parms;
+
+	parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+	parms.root.u.value32 = root;
+
+	parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.new_path.u.pointer.size = shfl_string_buf_size(new_path);
+	parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path;
+
+	parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+	parms.old_path.u.pointer.size = shfl_string_buf_size(old_path);
+	parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path;
+
+	parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+	parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo);
+	parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+	return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL);
+}
+
+int vboxsf_set_utf8(void)
+{
+	return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL);
+}
+
+int vboxsf_set_symlinks(void)
+{
+	return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL);
+}
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
new file mode 100644
index 000000000000..18f95b00fc33
--- /dev/null
+++ b/fs/vboxsf/vfsmod.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * VirtualBox Guest Shared Folders support: module header.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#ifndef VFSMOD_H
+#define VFSMOD_H
+
+#include <linux/backing-dev.h>
+#include <linux/idr.h>
+#include "shfl_hostintf.h"
+
+#define DIR_BUFFER_SIZE SZ_16K
+
+/* The cast is to prevent assignment of void * to pointers of arbitrary type */
+#define VBOXSF_SBI(sb)	((struct vboxsf_sbi *)(sb)->s_fs_info)
+#define VBOXSF_I(i)	container_of(i, struct vboxsf_inode, vfs_inode)
+
+struct vboxsf_options {
+	unsigned long ttl;
+	kuid_t uid;
+	kgid_t gid;
+	bool dmode_set;
+	bool fmode_set;
+	umode_t dmode;
+	umode_t fmode;
+	umode_t dmask;
+	umode_t fmask;
+};
+
+struct vboxsf_fs_context {
+	struct vboxsf_options o;
+	char *nls_name;
+};
+
+/* per-shared folder information */
+struct vboxsf_sbi {
+	struct vboxsf_options o;
+	struct shfl_fsobjinfo root_info;
+	struct idr ino_idr;
+	spinlock_t ino_idr_lock; /* This protects ino_idr */
+	struct nls_table *nls;
+	u32 next_generation;
+	u32 root;
+	int bdi_id;
+};
+
+/* per-inode information */
+struct vboxsf_inode {
+	/* some information was changed, update data on next revalidate */
+	int force_restat;
+	/* list of open handles for this inode + lock protecting it */
+	struct list_head handle_list;
+	/* This mutex protects handle_list accesses */
+	struct mutex handle_list_mutex;
+	/* The VFS inode struct */
+	struct inode vfs_inode;
+};
+
+struct vboxsf_dir_info {
+	struct list_head info_list;
+};
+
+struct vboxsf_dir_buf {
+	size_t entries;
+	size_t free;
+	size_t used;
+	void *buf;
+	struct list_head head;
+};
+
+/* globals */
+extern const struct inode_operations vboxsf_dir_iops;
+extern const struct inode_operations vboxsf_lnk_iops;
+extern const struct inode_operations vboxsf_reg_iops;
+extern const struct file_operations vboxsf_dir_fops;
+extern const struct file_operations vboxsf_reg_fops;
+extern const struct address_space_operations vboxsf_reg_aops;
+extern const struct dentry_operations vboxsf_dentry_ops;
+
+/* from utils.c */
+struct inode *vboxsf_new_inode(struct super_block *sb);
+void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
+		       const struct shfl_fsobjinfo *info);
+int vboxsf_create_at_dentry(struct dentry *dentry,
+			    struct shfl_createparms *params);
+int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
+		struct shfl_fsobjinfo *info);
+int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
+int vboxsf_inode_revalidate(struct dentry *dentry);
+int vboxsf_getattr(const struct path *path, struct kstat *kstat,
+		   u32 request_mask, unsigned int query_flags);
+int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr);
+struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
+					    struct dentry *dentry);
+int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
+		  const unsigned char *utf8_name, size_t utf8_len);
+struct vboxsf_dir_info *vboxsf_dir_info_alloc(void);
+void vboxsf_dir_info_free(struct vboxsf_dir_info *p);
+int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
+			u64 handle);
+
+/* from vboxsf_wrappers.c */
+int vboxsf_connect(void);
+void vboxsf_disconnect(void);
+
+int vboxsf_create(u32 root, struct shfl_string *parsed_path,
+		  struct shfl_createparms *create_parms);
+
+int vboxsf_close(u32 root, u64 handle);
+int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags);
+int vboxsf_rename(u32 root, struct shfl_string *src_path,
+		  struct shfl_string *dest_path, u32 flags);
+
+int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
+int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
+
+int vboxsf_dirinfo(u32 root, u64 handle,
+		   struct shfl_string *parsed_path, u32 flags, u32 index,
+		   u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count);
+int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
+		  u32 *buf_len, void *buf);
+
+int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root);
+int vboxsf_unmap_folder(u32 root);
+
+int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
+		    u32 buf_len, u8 *buf);
+int vboxsf_symlink(u32 root, struct shfl_string *new_path,
+		   struct shfl_string *old_path, struct shfl_fsobjinfo *buf);
+
+int vboxsf_set_utf8(void);
+int vboxsf_set_symlinks(void);
+
+#endif
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 14fbdf22b7e7..08d6beb54f8c 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -23,25 +23,28 @@
 #include "xfs_ag_resv.h"
 #include "xfs_health.h"
 
-static struct xfs_buf *
+static int
 xfs_get_aghdr_buf(
 	struct xfs_mount	*mp,
 	xfs_daddr_t		blkno,
 	size_t			numblks,
+	struct xfs_buf		**bpp,
 	const struct xfs_buf_ops *ops)
 {
 	struct xfs_buf		*bp;
+	int			error;
 
-	bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0);
-	if (!bp)
-		return NULL;
+	error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
+	if (error)
+		return error;
 
 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
 	bp->b_bn = blkno;
 	bp->b_maps[0].bm_bn = blkno;
 	bp->b_ops = ops;
 
-	return bp;
+	*bpp = bp;
+	return 0;
 }
 
 static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
@@ -340,13 +343,13 @@ xfs_ag_init_hdr(
 	struct aghdr_init_data	*id,
 	aghdr_init_work_f	work,
 	const struct xfs_buf_ops *ops)
-
 {
 	struct xfs_buf		*bp;
+	int			error;
 
-	bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops);
-	if (!bp)
-		return -ENOMEM;
+	error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops);
+	if (error)
+		return error;
 
 	(*work)(mp, bp, id);
 
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fc93fd88ec89..d8053bc96c4d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1070,11 +1070,11 @@ xfs_alloc_ag_vextent_small(
 	if (args->datatype & XFS_ALLOC_USERDATA) {
 		struct xfs_buf	*bp;
 
-		bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
-		if (XFS_IS_CORRUPT(args->mp, !bp)) {
-			error = -EFSCORRUPTED;
+		error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
+				XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
+				args->mp->m_bsize, 0, &bp);
+		if (error)
 			goto error;
-		}
 		xfs_trans_binval(args->tp, bp);
 	}
 	*fbnop = args->agbno = fbno;
@@ -2347,9 +2347,11 @@ xfs_free_agfl_block(
 	if (error)
 		return error;
 
-	bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
-	if (XFS_IS_CORRUPT(tp->t_mountp, !bp))
-		return -EFSCORRUPTED;
+	error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
+			XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
+			tp->t_mountp->m_bsize, 0, &bp);
+	if (error)
+		return error;
 	xfs_trans_binval(tp, bp);
 
 	return 0;
@@ -2500,12 +2502,11 @@ xfs_alloc_fix_freelist(
 
 	if (!pag->pagf_init) {
 		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
-		if (error)
+		if (error) {
+			/* Couldn't lock the AGF so skip this AG. */
+			if (error == -EAGAIN)
+				error = 0;
 			goto out_no_agbp;
-		if (!pag->pagf_init) {
-			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
-			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-			goto out_agbp_relse;
 		}
 	}
 
@@ -2531,11 +2532,10 @@ xfs_alloc_fix_freelist(
 	 */
 	if (!agbp) {
 		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
-		if (error)
-			goto out_no_agbp;
-		if (!agbp) {
-			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
-			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+		if (error) {
+			/* Couldn't lock the AGF so skip this AG. */
+			if (error == -EAGAIN)
+				error = 0;
 			goto out_no_agbp;
 		}
 	}
@@ -2766,11 +2766,10 @@ xfs_alloc_pagf_init(
 	xfs_buf_t		*bp;
 	int			error;
 
-	if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
-		return error;
-	if (bp)
+	error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
+	if (!error)
 		xfs_trans_brelse(tp, bp);
-	return 0;
+	return error;
 }
 
 /*
@@ -2956,14 +2955,11 @@ xfs_read_agf(
 	trace_xfs_read_agf(mp, agno);
 
 	ASSERT(agno != NULLAGNUMBER);
-	error = xfs_trans_read_buf(
-			mp, tp, mp->m_ddev_targp,
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
 	if (error)
 		return error;
-	if (!*bpp)
-		return 0;
 
 	ASSERT(!(*bpp)->b_error);
 	xfs_buf_set_ref(*bpp, XFS_AGF_REF);
@@ -2987,14 +2983,15 @@ xfs_alloc_read_agf(
 
 	trace_xfs_alloc_read_agf(mp, agno);
 
+	/* We don't support trylock when freeing. */
+	ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
+			(XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
 	ASSERT(agno != NULLAGNUMBER);
 	error = xfs_read_agf(mp, tp, agno,
 			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
 			bpp);
 	if (error)
 		return error;
-	if (!*bpp)
-		return 0;
 	ASSERT(!(*bpp)->b_error);
 
 	agf = XFS_BUF_TO_AGF(*bpp);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index a266d05df146..8b7f74b3bea2 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -418,20 +418,10 @@ xfs_attr_rmtval_get(
 			       (map[i].br_startblock != HOLESTARTBLOCK));
 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
 			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0,
-					&xfs_attr3_rmt_buf_ops);
-			if (!bp)
-				return -ENOMEM;
-			error = bp->b_error;
-			if (error) {
-				xfs_buf_ioerror_alert(bp, __func__);
-				xfs_buf_relse(bp);
-
-				/* bad CRC means corrupted metadata */
-				if (error == -EFSBADCRC)
-					error = -EFSCORRUPTED;
+			error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt,
+					0, &bp, &xfs_attr3_rmt_buf_ops);
+			if (error)
 				return error;
-			}
 
 			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
 							&offset, &valuelen,
@@ -555,9 +545,9 @@ xfs_attr_rmtval_set(
 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
 		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
 
-		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
-		if (!bp)
-			return -ENOMEM;
+		error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp);
+		if (error)
+			return error;
 		bp->b_ops = &xfs_attr3_rmt_buf_ops;
 
 		xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 4c2e046fbfad..9a6d7a84689a 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -730,11 +730,11 @@ xfs_bmap_extents_to_btree(
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
-	abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
-	if (XFS_IS_CORRUPT(mp, !abp)) {
-		error = -EFSCORRUPTED;
+	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+			XFS_FSB_TO_DADDR(mp, args.fsbno),
+			mp->m_bsize, 0, &abp);
+	if (error)
 		goto out_unreserve_dquot;
-	}
 
 	/*
 	 * Fill in the child block.
@@ -878,7 +878,11 @@ xfs_bmap_local_to_extents(
 	ASSERT(args.fsbno != NULLFSBLOCK);
 	ASSERT(args.len == 1);
 	tp->t_firstblock = args.fsbno;
-	bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
+	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
+			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
+			args.mp->m_bsize, 0, &bp);
+	if (error)
+		goto done;
 
 	/*
 	 * Initialize the block, copy the data and log the remote buffer.
@@ -3307,11 +3311,12 @@ xfs_bmap_longest_free_extent(
 	pag = xfs_perag_get(mp, ag);
 	if (!pag->pagf_init) {
 		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
-		if (error)
-			goto out;
-
-		if (!pag->pagf_init) {
-			*notinit = 1;
+		if (error) {
+			/* Couldn't lock the AGF, so skip this AG. */
+			if (error == -EAGAIN) {
+				*notinit = 1;
+				error = 0;
+			}
 			goto out;
 		}
 	}
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b22c7e928eb1..fd300dc93ca4 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -679,42 +679,6 @@ xfs_btree_get_block(
 }
 
 /*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-xfs_buf_t *				/* buffer for fsbno */
-xfs_btree_get_bufl(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_fsblock_t	fsbno)		/* file system block number */
-{
-	xfs_daddr_t		d;		/* real disk block address */
-
-	ASSERT(fsbno != NULLFSBLOCK);
-	d = XFS_FSB_TO_DADDR(mp, fsbno);
-	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
-}
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-xfs_buf_t *				/* buffer for agno/agbno */
-xfs_btree_get_bufs(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_agnumber_t	agno,		/* allocation group number */
-	xfs_agblock_t	agbno)		/* allocation group block number */
-{
-	xfs_daddr_t		d;		/* real disk block address */
-
-	ASSERT(agno != NULLAGNUMBER);
-	ASSERT(agbno != NULLAGBLOCK);
-	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
-}
-
-/*
  * Change the cursor to point to the first record at the given level.
  * Other levels are unaffected.
  */
@@ -1270,11 +1234,10 @@ xfs_btree_get_buf_block(
 	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
 	if (error)
 		return error;
-	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
-				 mp->m_bsize, 0);
-
-	if (!*bpp)
-		return -ENOMEM;
+	error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
+			0, bpp);
+	if (error)
+		return error;
 
 	(*bpp)->b_ops = cur->bc_ops->buf_ops;
 	*block = XFS_BUF_TO_BLOCK(*bpp);
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index fb9b2121c628..3eff7c321d43 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -297,27 +297,6 @@ xfs_btree_dup_cursor(
 	xfs_btree_cur_t		**ncur);/* output cursor */
 
 /*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-struct xfs_buf *				/* buffer for fsbno */
-xfs_btree_get_bufl(
-	struct xfs_mount	*mp,	/* file system mount point */
-	struct xfs_trans	*tp,	/* transaction pointer */
-	xfs_fsblock_t		fsbno);	/* file system block number */
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-struct xfs_buf *				/* buffer for agno/agbno */
-xfs_btree_get_bufs(
-	struct xfs_mount	*mp,	/* file system mount point */
-	struct xfs_trans	*tp,	/* transaction pointer */
-	xfs_agnumber_t		agno,	/* allocation group number */
-	xfs_agblock_t		agbno);	/* allocation group block number */
-
-/*
  * Compute first and last byte offsets for the fields given.
  * Interprets the offsets table, which contains struct field offsets.
  */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 8c3eafe280ed..875e04f82541 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2591,13 +2591,9 @@ xfs_da_get_buf(
 	if (error || nmap == 0)
 		goto out_free;
 
-	bp = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0);
-	error = bp ? bp->b_error : -EIO;
-	if (error) {
-		if (bp)
-			xfs_trans_brelse(tp, bp);
+	error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
+	if (error)
 		goto out_free;
-	}
 
 	*bpp = bp;
 
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 5b759af4d165..bf161e930f1d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -276,6 +276,7 @@ xfs_ialloc_inode_init(
 	int			i, j;
 	xfs_daddr_t		d;
 	xfs_ino_t		ino = 0;
+	int			error;
 
 	/*
 	 * Loop over the new block(s), filling in the inodes.  For small block
@@ -327,12 +328,11 @@ xfs_ialloc_inode_init(
 		 */
 		d = XFS_AGB_TO_DADDR(mp, agno, agbno +
 				(j * M_IGEO(mp)->blocks_per_cluster));
-		fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-					 mp->m_bsize *
-					 M_IGEO(mp)->blocks_per_cluster,
-					 XBF_UNMAPPED);
-		if (!fbuf)
-			return -ENOMEM;
+		error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+				mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
+				XBF_UNMAPPED, &fbuf);
+		if (error)
+			return error;
 
 		/* Initialize the inode buffers and log them appropriately. */
 		fbuf->b_ops = &xfs_inode_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index d7d702ee4d1a..6e1665f2cb67 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1177,8 +1177,6 @@ xfs_refcount_finish_one(
 				XFS_ALLOC_FLAG_FREEING, &agbp);
 		if (error)
 			return error;
-		if (XFS_IS_CORRUPT(tp->t_mountp, !agbp))
-			return -EFSCORRUPTED;
 
 		rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 		if (!rcur) {
@@ -1718,10 +1716,6 @@ xfs_refcount_recover_cow_leftovers(
 	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		goto out_trans;
-	if (!agbp) {
-		error = -ENOMEM;
-		goto out_trans;
-	}
 	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 
 	/* Find all the leftover CoW staging extents. */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 0ac69751fe85..2f60fc3c99a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -985,9 +985,9 @@ xfs_update_secondary_sbs(
 	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
 		struct xfs_buf		*bp;
 
-		bp = xfs_buf_get(mp->m_ddev_targp,
+		error = xfs_buf_get(mp->m_ddev_targp,
 				 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
-				 XFS_FSS_TO_BB(mp, 1));
+				 XFS_FSS_TO_BB(mp, 1), &bp);
 		/*
 		 * If we get an error reading or writing alternate superblocks,
 		 * continue.  xfs_repair chooses the "best" superblock based
@@ -995,12 +995,12 @@ xfs_update_secondary_sbs(
 		 * superblocks un-updated than updated, and xfs_repair may
 		 * pick them over the properly-updated primary.
 		 */
-		if (!bp) {
+		if (error) {
 			xfs_warn(mp,
 		"error allocating secondary superblock for ag %d",
 				agno);
 			if (!saved_error)
-				saved_error = -ENOMEM;
+				saved_error = error;
 			continue;
 		}
 
@@ -1185,13 +1185,14 @@ xfs_sb_get_secondary(
 	struct xfs_buf		**bpp)
 {
 	struct xfs_buf		*bp;
+	int			error;
 
 	ASSERT(agno != 0 && agno != NULLAGNUMBER);
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 			XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
-			XFS_FSS_TO_BB(mp, 1), 0);
-	if (!bp)
-		return -ENOMEM;
+			XFS_FSS_TO_BB(mp, 1), 0, &bp);
+	if (error)
+		return error;
 	bp->b_ops = &xfs_sb_buf_ops;
 	xfs_buf_oneshot(bp);
 	*bpp = bp;
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 7a1a38b636a9..d5e6db9af434 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -659,8 +659,6 @@ xrep_agfl(
 	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
 	if (error)
 		return error;
-	if (!agf_bp)
-		return -ENOMEM;
 
 	/*
 	 * Make sure we have the AGFL buffer, as scrub might have decided it
@@ -735,8 +733,6 @@ xrep_agi_find_btrees(
 	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
 	if (error)
 		return error;
-	if (!agf_bp)
-		return -ENOMEM;
 
 	/* Find the btree roots. */
 	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 7251c66a82c9..ec2064ed3c30 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -83,9 +83,6 @@ xchk_fscount_warmup(
 		error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
 		if (error)
 			break;
-		error = -ENOMEM;
-		if (!agf_bp || !agi_bp)
-			break;
 
 		/*
 		 * These are supposed to be initialized by the header read
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index b70a88bc975e..e489d7a8446a 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -341,13 +341,17 @@ xrep_init_btblock(
 	struct xfs_trans		*tp = sc->tp;
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_buf			*bp;
+	int				error;
 
 	trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
 			XFS_FSB_TO_AGBNO(mp, fsb), btnum);
 
 	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
-			XFS_FSB_TO_BB(mp, 1), 0);
+	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+			XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
+			&bp);
+	if (error)
+		return error;
 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
 	xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
@@ -542,8 +546,6 @@ xrep_reap_block(
 		error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp);
 		if (error)
 			return error;
-		if (!agf_bp)
-			return -ENOMEM;
 	} else {
 		agf_bp = sc->sa.agf_bp;
 	}
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 8fbb841cd6fe..bbfa6ba84dcd 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -205,11 +205,12 @@ xfs_attr3_node_inactive(
 		/*
 		 * Remove the subsidiary block from the cache and from the log.
 		 */
-		child_bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp,
+		error = xfs_trans_get_buf(*trans, mp->m_ddev_targp,
 				child_blkno,
-				XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0);
-		if (!child_bp)
-			return -EIO;
+				XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0,
+				&child_bp);
+		if (error)
+			return error;
 		error = bp->b_error;
 		if (error) {
 			xfs_trans_brelse(*trans, child_bp);
@@ -298,10 +299,10 @@ xfs_attr3_root_inactive(
 	/*
 	 * Invalidate the incore copy of the root block.
 	 */
-	bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
-			XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0);
-	if (!bp)
-		return -EIO;
+	error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
+			XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
+	if (error)
+		return error;
 	error = bp->b_error;
 	if (error) {
 		xfs_trans_brelse(*trans, bp);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a0229c368e78..217e4f82a44a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -198,20 +198,22 @@ xfs_buf_free_maps(
 	}
 }
 
-static struct xfs_buf *
+static int
 _xfs_buf_alloc(
 	struct xfs_buftarg	*target,
 	struct xfs_buf_map	*map,
 	int			nmaps,
-	xfs_buf_flags_t		flags)
+	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp)
 {
 	struct xfs_buf		*bp;
 	int			error;
 	int			i;
 
+	*bpp = NULL;
 	bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
 	if (unlikely(!bp))
-		return NULL;
+		return -ENOMEM;
 
 	/*
 	 * We don't want certain flags to appear in b_flags unless they are
@@ -239,7 +241,7 @@ _xfs_buf_alloc(
 	error = xfs_buf_get_maps(bp, nmaps);
 	if (error)  {
 		kmem_cache_free(xfs_buf_zone, bp);
-		return NULL;
+		return error;
 	}
 
 	bp->b_bn = map[0].bm_bn;
@@ -256,7 +258,8 @@ _xfs_buf_alloc(
 	XFS_STATS_INC(bp->b_mount, xb_create);
 	trace_xfs_buf_init(bp, _RET_IP_);
 
-	return bp;
+	*bpp = bp;
+	return 0;
 }
 
 /*
@@ -682,53 +685,39 @@ xfs_buf_incore(
  * cache hits, as metadata intensive workloads will see 3 orders of magnitude
  * more hits than misses.
  */
-struct xfs_buf *
+int
 xfs_buf_get_map(
 	struct xfs_buftarg	*target,
 	struct xfs_buf_map	*map,
 	int			nmaps,
-	xfs_buf_flags_t		flags)
+	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp)
 {
 	struct xfs_buf		*bp;
 	struct xfs_buf		*new_bp;
 	int			error = 0;
 
+	*bpp = NULL;
 	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
-
-	switch (error) {
-	case 0:
-		/* cache hit */
+	if (!error)
 		goto found;
-	case -EAGAIN:
-		/* cache hit, trylock failure, caller handles failure */
-		ASSERT(flags & XBF_TRYLOCK);
-		return NULL;
-	case -ENOENT:
-		/* cache miss, go for insert */
-		break;
-	case -EFSCORRUPTED:
-	default:
-		/*
-		 * None of the higher layers understand failure types
-		 * yet, so return NULL to signal a fatal lookup error.
-		 */
-		return NULL;
-	}
+	if (error != -ENOENT)
+		return error;
 
-	new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
-	if (unlikely(!new_bp))
-		return NULL;
+	error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
+	if (error)
+		return error;
 
 	error = xfs_buf_allocate_memory(new_bp, flags);
 	if (error) {
 		xfs_buf_free(new_bp);
-		return NULL;
+		return error;
 	}
 
 	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
 	if (error) {
 		xfs_buf_free(new_bp);
-		return NULL;
+		return error;
 	}
 
 	if (bp != new_bp)
@@ -741,7 +730,7 @@ found:
 			xfs_warn(target->bt_mount,
 				"%s: failed to map pagesn", __func__);
 			xfs_buf_relse(bp);
-			return NULL;
+			return error;
 		}
 	}
 
@@ -754,7 +743,8 @@ found:
 
 	XFS_STATS_INC(target->bt_mount, xb_get);
 	trace_xfs_buf_get(bp, flags, _RET_IP_);
-	return bp;
+	*bpp = bp;
+	return 0;
 }
 
 STATIC int
@@ -806,46 +796,77 @@ xfs_buf_reverify(
 	return bp->b_error;
 }
 
-xfs_buf_t *
+int
 xfs_buf_read_map(
 	struct xfs_buftarg	*target,
 	struct xfs_buf_map	*map,
 	int			nmaps,
 	xfs_buf_flags_t		flags,
-	const struct xfs_buf_ops *ops)
+	struct xfs_buf		**bpp,
+	const struct xfs_buf_ops *ops,
+	xfs_failaddr_t		fa)
 {
 	struct xfs_buf		*bp;
+	int			error;
 
 	flags |= XBF_READ;
+	*bpp = NULL;
 
-	bp = xfs_buf_get_map(target, map, nmaps, flags);
-	if (!bp)
-		return NULL;
+	error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
+	if (error)
+		return error;
 
 	trace_xfs_buf_read(bp, flags, _RET_IP_);
 
 	if (!(bp->b_flags & XBF_DONE)) {
+		/* Initiate the buffer read and wait. */
 		XFS_STATS_INC(target->bt_mount, xb_get_read);
 		bp->b_ops = ops;
-		_xfs_buf_read(bp, flags);
-		return bp;
+		error = _xfs_buf_read(bp, flags);
+
+		/* Readahead iodone already dropped the buffer, so exit. */
+		if (flags & XBF_ASYNC)
+			return 0;
+	} else {
+		/* Buffer already read; all we need to do is check it. */
+		error = xfs_buf_reverify(bp, ops);
+
+		/* Readahead already finished; drop the buffer and exit. */
+		if (flags & XBF_ASYNC) {
+			xfs_buf_relse(bp);
+			return 0;
+		}
+
+		/* We do not want read in the flags */
+		bp->b_flags &= ~XBF_READ;
+		ASSERT(bp->b_ops != NULL || ops == NULL);
 	}
 
-	xfs_buf_reverify(bp, ops);
+	/*
+	 * If we've had a read error, then the contents of the buffer are
+	 * invalid and should not be used. To ensure that a followup read tries
+	 * to pull the buffer from disk again, we clear the XBF_DONE flag and
+	 * mark the buffer stale. This ensures that anyone who has a current
+	 * reference to the buffer will interpret it's contents correctly and
+	 * future cache lookups will also treat it as an empty, uninitialised
+	 * buffer.
+	 */
+	if (error) {
+		if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
+			xfs_buf_ioerror_alert(bp, fa);
 
-	if (flags & XBF_ASYNC) {
-		/*
-		 * Read ahead call which is already satisfied,
-		 * drop the buffer
-		 */
+		bp->b_flags &= ~XBF_DONE;
+		xfs_buf_stale(bp);
 		xfs_buf_relse(bp);
-		return NULL;
+
+		/* bad CRC means corrupted metadata */
+		if (error == -EFSBADCRC)
+			error = -EFSCORRUPTED;
+		return error;
 	}
 
-	/* We do not want read in the flags */
-	bp->b_flags &= ~XBF_READ;
-	ASSERT(bp->b_ops != NULL || ops == NULL);
-	return bp;
+	*bpp = bp;
+	return 0;
 }
 
 /*
@@ -859,11 +880,14 @@ xfs_buf_readahead_map(
 	int			nmaps,
 	const struct xfs_buf_ops *ops)
 {
+	struct xfs_buf		*bp;
+
 	if (bdi_read_congested(target->bt_bdev->bd_bdi))
 		return;
 
 	xfs_buf_read_map(target, map, nmaps,
-		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
+		     XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
+		     __this_address);
 }
 
 /*
@@ -880,12 +904,13 @@ xfs_buf_read_uncached(
 	const struct xfs_buf_ops *ops)
 {
 	struct xfs_buf		*bp;
+	int			error;
 
 	*bpp = NULL;
 
-	bp = xfs_buf_get_uncached(target, numblks, flags);
-	if (!bp)
-		return -ENOMEM;
+	error = xfs_buf_get_uncached(target, numblks, flags, &bp);
+	if (error)
+		return error;
 
 	/* set up the buffer for a read IO */
 	ASSERT(bp->b_map_count == 1);
@@ -896,7 +921,7 @@ xfs_buf_read_uncached(
 
 	xfs_buf_submit(bp);
 	if (bp->b_error) {
-		int	error = bp->b_error;
+		error = bp->b_error;
 		xfs_buf_relse(bp);
 		return error;
 	}
@@ -905,20 +930,23 @@ xfs_buf_read_uncached(
 	return 0;
 }
 
-xfs_buf_t *
+int
 xfs_buf_get_uncached(
 	struct xfs_buftarg	*target,
 	size_t			numblks,
-	int			flags)
+	int			flags,
+	struct xfs_buf		**bpp)
 {
 	unsigned long		page_count;
 	int			error, i;
 	struct xfs_buf		*bp;
 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
 
+	*bpp = NULL;
+
 	/* flags might contain irrelevant bits, pass only what we care about */
-	bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
-	if (unlikely(bp == NULL))
+	error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
+	if (error)
 		goto fail;
 
 	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
@@ -928,8 +956,10 @@ xfs_buf_get_uncached(
 
 	for (i = 0; i < page_count; i++) {
 		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-		if (!bp->b_pages[i])
+		if (!bp->b_pages[i]) {
+			error = -ENOMEM;
 			goto fail_free_mem;
+		}
 	}
 	bp->b_flags |= _XBF_PAGES;
 
@@ -941,7 +971,8 @@ xfs_buf_get_uncached(
 	}
 
 	trace_xfs_buf_get_uncached(bp, _RET_IP_);
-	return bp;
+	*bpp = bp;
+	return 0;
 
  fail_free_mem:
 	while (--i >= 0)
@@ -951,7 +982,7 @@ xfs_buf_get_uncached(
 	xfs_buf_free_maps(bp);
 	kmem_cache_free(xfs_buf_zone, bp);
  fail:
-	return NULL;
+	return error;
 }
 
 /*
@@ -1205,10 +1236,10 @@ __xfs_buf_ioerror(
 void
 xfs_buf_ioerror_alert(
 	struct xfs_buf		*bp,
-	const char		*func)
+	xfs_failaddr_t		func)
 {
 	xfs_alert(bp->b_mount,
-"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
+"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
 			func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
 			-bp->b_error);
 }
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 56e081dd1d96..d79a1fe5d738 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -192,37 +192,40 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
 			   xfs_daddr_t blkno, size_t numblks,
 			   xfs_buf_flags_t flags);
 
-struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
-			       struct xfs_buf_map *map, int nmaps,
-			       xfs_buf_flags_t flags);
-struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target,
-			       struct xfs_buf_map *map, int nmaps,
-			       xfs_buf_flags_t flags,
-			       const struct xfs_buf_ops *ops);
+int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
+		int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp);
+int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
+		int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp,
+		const struct xfs_buf_ops *ops, xfs_failaddr_t fa);
 void xfs_buf_readahead_map(struct xfs_buftarg *target,
 			       struct xfs_buf_map *map, int nmaps,
 			       const struct xfs_buf_ops *ops);
 
-static inline struct xfs_buf *
+static inline int
 xfs_buf_get(
 	struct xfs_buftarg	*target,
 	xfs_daddr_t		blkno,
-	size_t			numblks)
+	size_t			numblks,
+	struct xfs_buf		**bpp)
 {
 	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-	return xfs_buf_get_map(target, &map, 1, 0);
+
+	return xfs_buf_get_map(target, &map, 1, 0, bpp);
 }
 
-static inline struct xfs_buf *
+static inline int
 xfs_buf_read(
 	struct xfs_buftarg	*target,
 	xfs_daddr_t		blkno,
 	size_t			numblks,
 	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp,
 	const struct xfs_buf_ops *ops)
 {
 	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-	return xfs_buf_read_map(target, &map, 1, flags, ops);
+
+	return xfs_buf_read_map(target, &map, 1, flags, bpp, ops,
+			__builtin_return_address(0));
 }
 
 static inline void
@@ -236,8 +239,8 @@ xfs_buf_readahead(
 	return xfs_buf_readahead_map(target, &map, 1, ops);
 }
 
-struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
-				int flags);
+int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
+		struct xfs_buf **bpp);
 int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
 			  size_t numblks, int flags, struct xfs_buf **bpp,
 			  const struct xfs_buf_ops *ops);
@@ -259,7 +262,7 @@ extern void xfs_buf_ioend(struct xfs_buf *bp);
 extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 		xfs_failaddr_t failaddr);
 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
-extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
+extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
 
 extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
 static inline int xfs_buf_submit(struct xfs_buf *bp)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 5be8973a452c..663810e6cd59 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1113,7 +1113,7 @@ xfs_buf_iodone_callback_error(
 	if (bp->b_target != lasttarg ||
 	    time_after(jiffies, (lasttime + 5*HZ))) {
 		lasttime = jiffies;
-		xfs_buf_ioerror_alert(bp, __func__);
+		xfs_buf_ioerror_alert(bp, __this_address);
 	}
 	lasttarg = bp->b_target;
 
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index cae613620175..0b8350e84d28 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -45,7 +45,7 @@ xfs_trim_extents(
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
 	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-	if (error || !agbp)
+	if (error)
 		goto out_put_perag;
 
 	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9cfd3209f52b..d223e1ae90a6 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -320,10 +320,10 @@ xfs_dquot_disk_alloc(
 	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 
 	/* now we can just get the buffer (there's nothing to read yet) */
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
-			mp->m_quotainfo->qi_dqchunklen, 0);
-	if (!bp)
-		return -ENOMEM;
+	error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
+			mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+	if (error)
+		return error;
 	bp->b_ops = &xfs_dquot_buf_ops;
 
 	/*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 5f12b5d8527a..1a88025e68a3 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -159,16 +159,15 @@ xfs_filestream_pick_ag(
 
 		if (!pag->pagf_init) {
 			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
-			if (err && !trylock) {
+			if (err) {
 				xfs_perag_put(pag);
-				return err;
+				if (err != -EAGAIN)
+					return err;
+				/* Couldn't lock the AGF, skip this AG. */
+				continue;
 			}
 		}
 
-		/* Might fail sometimes during the 1st pass with trylock set. */
-		if (!pag->pagf_init)
-			goto next_ag;
-
 		/* Keep track of the AG with the most free blocks. */
 		if (pag->pagf_freeblks > maxfree) {
 			maxfree = pag->pagf_freeblks;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1979a0055763..c5077e6326c7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2546,6 +2546,7 @@ xfs_ifree_cluster(
 	struct xfs_perag	*pag;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
 	xfs_ino_t		inum;
+	int			error;
 
 	inum = xic->first_ino;
 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
@@ -2574,12 +2575,11 @@ xfs_ifree_cluster(
 		 * complete before we get a lock on it, and hence we may fail
 		 * to mark all the active inodes on the buffer stale.
 		 */
-		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
-					mp->m_bsize * igeo->blocks_per_cluster,
-					XBF_UNMAPPED);
-
-		if (!bp)
-			return -ENOMEM;
+		error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+				mp->m_bsize * igeo->blocks_per_cluster,
+				XBF_UNMAPPED, &bp);
+		if (error)
+			return error;
 
 		/*
 		 * This buffer may not have been correctly initialised as we
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0d683fb96396..25cfc85dbaa7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -294,7 +294,7 @@ xlog_recover_iodone(
 		 * this during recovery. One strike!
 		 */
 		if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
-			xfs_buf_ioerror_alert(bp, __func__);
+			xfs_buf_ioerror_alert(bp, __this_address);
 			xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
 		}
 	}
@@ -2745,15 +2745,10 @@ xlog_recover_buffer_pass2(
 	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
 		buf_flags |= XBF_UNMAPPED;
 
-	bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
-			  buf_flags, NULL);
-	if (!bp)
-		return -ENOMEM;
-	error = bp->b_error;
-	if (error) {
-		xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
-		goto out_release;
-	}
+	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+			  buf_flags, &bp, NULL);
+	if (error)
+		return error;
 
 	/*
 	 * Recover the buffer only if we get an LSN from it and it's less than
@@ -2950,17 +2945,10 @@ xlog_recover_inode_pass2(
 	}
 	trace_xfs_log_recover_inode_recover(log, in_f);
 
-	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
-			  &xfs_inode_buf_ops);
-	if (!bp) {
-		error = -ENOMEM;
+	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+			0, &bp, &xfs_inode_buf_ops);
+	if (error)
 		goto error;
-	}
-	error = bp->b_error;
-	if (error) {
-		xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
-		goto out_release;
-	}
 	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
 	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
 
@@ -5639,7 +5627,7 @@ xlog_do_recover(
 	error = xfs_buf_submit(bp);
 	if (error) {
 		if (!XFS_FORCED_SHUTDOWN(mp)) {
-			xfs_buf_ioerror_alert(bp, __func__);
+			xfs_buf_ioerror_alert(bp, __this_address);
 			ASSERT(0);
 		}
 		xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e723b267a247..b0ce04ffd3cd 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -143,8 +143,6 @@ xfs_reflink_find_shared(
 	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		return error;
-	if (!agbp)
-		return -ENOMEM;
 
 	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index d42b5a2047e0..6209e7b6b895 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -826,12 +826,10 @@ xfs_growfs_rt_alloc(
 			 * Get a buffer for the block.
 			 */
 			d = XFS_FSB_TO_DADDR(mp, fsbno);
-			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-				mp->m_bsize, 0);
-			if (bp == NULL) {
-				error = -EIO;
+			error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+					mp->m_bsize, 0, &bp);
+			if (error)
 				goto out_trans_cancel;
-			}
 			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 			/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 760901783944..2094386af8ac 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -62,7 +62,7 @@ enum {
 	Opt_discard, Opt_nodiscard, Opt_dax,
 };
 
-static const struct fs_parameter_spec xfs_param_specs[] = {
+static const struct fs_parameter_spec xfs_fs_parameters[] = {
 	fsparam_u32("logbufs",		Opt_logbufs),
 	fsparam_string("logbsize",	Opt_logbsize),
 	fsparam_string("logdev",	Opt_logdev),
@@ -106,11 +106,6 @@ static const struct fs_parameter_spec xfs_param_specs[] = {
 	{}
 };
 
-static const struct fs_parameter_description xfs_fs_parameters = {
-	.name		= "xfs",
-	.specs		= xfs_param_specs,
-};
-
 struct proc_xfs_info {
 	uint64_t	flag;
 	char		*str;
@@ -1120,7 +1115,7 @@ xfs_fc_parse_param(
 	int			size = 0;
 	int			opt;
 
-	opt = fs_parse(fc, &xfs_fs_parameters, param, &result);
+	opt = fs_parse(fc, xfs_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -1782,7 +1777,7 @@ static struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
 	.init_fs_context	= xfs_init_fs_context,
-	.parameters		= &xfs_fs_parameters,
+	.parameters		= xfs_fs_parameters,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a25502bc2071..d762d42ed0ff 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -53,20 +53,10 @@ xfs_readlink_bmap_ilocked(
 		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
 		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
 
-		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
-				  &xfs_symlink_buf_ops);
-		if (!bp)
-			return -ENOMEM;
-		error = bp->b_error;
-		if (error) {
-			xfs_buf_ioerror_alert(bp, __func__);
-			xfs_buf_relse(bp);
-
-			/* bad CRC means corrupted metadata */
-			if (error == -EFSBADCRC)
-				error = -EFSCORRUPTED;
-			goto out;
-		}
+		error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
+				&bp, &xfs_symlink_buf_ops);
+		if (error)
+			return error;
 		byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
 		if (pathlen < byte_cnt)
 			byte_cnt = pathlen;
@@ -290,12 +280,10 @@ xfs_symlink(
 
 			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
-			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-					       BTOBB(byte_cnt), 0);
-			if (!bp) {
-				error = -ENOMEM;
+			error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+					       BTOBB(byte_cnt), 0, &bp);
+			if (error)
 				goto out_trans_cancel;
-			}
 			bp->b_ops = &xfs_symlink_buf_ops;
 
 			byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -433,13 +421,12 @@ xfs_inactive_symlink_rmt(
 	 * Invalidate the block(s). No validation is done.
 	 */
 	for (i = 0; i < nmaps; i++) {
-		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
-			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
-		if (!bp) {
-			error = -ENOMEM;
+		error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+				XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
+				XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0,
+				&bp);
+		if (error)
 			goto error_trans_cancel;
-		}
 		xfs_trans_binval(tp, bp);
 	}
 	/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f171ebd3..752c7fef9de7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -169,21 +169,21 @@ int		xfs_trans_alloc_empty(struct xfs_mount *mp,
 			struct xfs_trans **tpp);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
 
-struct xfs_buf	*xfs_trans_get_buf_map(struct xfs_trans *tp,
-				       struct xfs_buftarg *target,
-				       struct xfs_buf_map *map, int nmaps,
-				       uint flags);
+int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target,
+		struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags,
+		struct xfs_buf **bpp);
 
-static inline struct xfs_buf *
+static inline int
 xfs_trans_get_buf(
 	struct xfs_trans	*tp,
 	struct xfs_buftarg	*target,
 	xfs_daddr_t		blkno,
 	int			numblks,
-	uint			flags)
+	uint			flags,
+	struct xfs_buf		**bpp)
 {
 	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-	return xfs_trans_get_buf_map(tp, target, &map, 1, flags);
+	return xfs_trans_get_buf_map(tp, target, &map, 1, flags, bpp);
 }
 
 int		xfs_trans_read_buf_map(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index b5b3a78ef31c..08174ffa2118 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -112,19 +112,22 @@ xfs_trans_bjoin(
  * If the transaction pointer is NULL, make this just a normal
  * get_buf() call.
  */
-struct xfs_buf *
+int
 xfs_trans_get_buf_map(
 	struct xfs_trans	*tp,
 	struct xfs_buftarg	*target,
 	struct xfs_buf_map	*map,
 	int			nmaps,
-	xfs_buf_flags_t		flags)
+	xfs_buf_flags_t		flags,
+	struct xfs_buf		**bpp)
 {
 	xfs_buf_t		*bp;
 	struct xfs_buf_log_item	*bip;
+	int			error;
 
+	*bpp = NULL;
 	if (!tp)
-		return xfs_buf_get_map(target, map, nmaps, flags);
+		return xfs_buf_get_map(target, map, nmaps, flags, bpp);
 
 	/*
 	 * If we find the buffer in the cache with this transaction
@@ -146,19 +149,20 @@ xfs_trans_get_buf_map(
 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
 		bip->bli_recur++;
 		trace_xfs_trans_get_buf_recur(bip);
-		return bp;
+		*bpp = bp;
+		return 0;
 	}
 
-	bp = xfs_buf_get_map(target, map, nmaps, flags);
-	if (bp == NULL) {
-		return NULL;
-	}
+	error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
+	if (error)
+		return error;
 
 	ASSERT(!bp->b_error);
 
 	_xfs_trans_bjoin(tp, bp, 1);
 	trace_xfs_trans_get_buf(bp->b_log_item);
-	return bp;
+	*bpp = bp;
+	return 0;
 }
 
 /*
@@ -276,7 +280,7 @@ xfs_trans_read_buf_map(
 		ASSERT(bp->b_ops != NULL);
 		error = xfs_buf_reverify(bp, ops);
 		if (error) {
-			xfs_buf_ioerror_alert(bp, __func__);
+			xfs_buf_ioerror_alert(bp, __return_address);
 
 			if (tp->t_flags & XFS_TRANS_DIRTY)
 				xfs_force_shutdown(tp->t_mountp,
@@ -298,36 +302,17 @@ xfs_trans_read_buf_map(
 		return 0;
 	}
 
-	bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
-	if (!bp) {
-		if (!(flags & XBF_TRYLOCK))
-			return -ENOMEM;
-		return tp ? 0 : -EAGAIN;
-	}
-
-	/*
-	 * If we've had a read error, then the contents of the buffer are
-	 * invalid and should not be used. To ensure that a followup read tries
-	 * to pull the buffer from disk again, we clear the XBF_DONE flag and
-	 * mark the buffer stale. This ensures that anyone who has a current
-	 * reference to the buffer will interpret it's contents correctly and
-	 * future cache lookups will also treat it as an empty, uninitialised
-	 * buffer.
-	 */
-	if (bp->b_error) {
-		error = bp->b_error;
-		if (!XFS_FORCED_SHUTDOWN(mp))
-			xfs_buf_ioerror_alert(bp, __func__);
-		bp->b_flags &= ~XBF_DONE;
-		xfs_buf_stale(bp);
-
+	error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops,
+			__return_address);
+	switch (error) {
+	case 0:
+		break;
+	default:
 		if (tp && (tp->t_flags & XFS_TRANS_DIRTY))
 			xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
-		xfs_buf_relse(bp);
-
-		/* bad CRC means corrupted metadata */
-		if (error == -EFSBADCRC)
-			error = -EFSCORRUPTED;
+		/* fall through */
+	case -ENOMEM:
+	case -EAGAIN:
 		return error;
 	}