62 files changed, 1301 insertions, 1292 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6b868f0e0c4c..067e3d340c35 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,10 +167,25 @@ static int __init aio_setup(void)
 }
 __initcall(aio_setup);
 
+static void put_aio_ring_file(struct kioctx *ctx)
+{
+	struct file *aio_ring_file = ctx->aio_ring_file;
+	if (aio_ring_file) {
+		truncate_setsize(aio_ring_file->f_inode, 0);
+
+		/* Prevent further access to the kioctx from migratepages */
+		spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
+		aio_ring_file->f_inode->i_mapping->private_data = NULL;
+		ctx->aio_ring_file = NULL;
+		spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
+
+		fput(aio_ring_file);
+	}
+}
+
 static void aio_free_ring(struct kioctx *ctx)
 {
 	int i;
-	struct file *aio_ring_file = ctx->aio_ring_file;
 
 	for (i = 0; i < ctx->nr_pages; i++) {
 		pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx)
 		put_page(ctx->ring_pages[i]);
 	}
 
+	put_aio_ring_file(ctx);
+
 	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
 		kfree(ctx->ring_pages);
-
-	if (aio_ring_file) {
-		truncate_setsize(aio_ring_file->f_inode, 0);
-		fput(aio_ring_file);
-		ctx->aio_ring_file = NULL;
-	}
 }
 
 static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page)
 static int aio_migratepage(struct address_space *mapping, struct page *new,
 			struct page *old, enum migrate_mode mode)
 {
-	struct kioctx *ctx = mapping->private_data;
+	struct kioctx *ctx;
 	unsigned long flags;
-	unsigned idx = old->index;
 	int rc;
 
 	/* Writeback must be complete */
@@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 
 	get_page(new);
 
-	spin_lock_irqsave(&ctx->completion_lock, flags);
-	migrate_page_copy(new, old);
-	ctx->ring_pages[idx] = new;
-	spin_unlock_irqrestore(&ctx->completion_lock, flags);
+	/* We can potentially race against kioctx teardown here.  Use the
+	 * address_space's private data lock to protect the mapping's
+	 * private_data.
+	 */
+	spin_lock(&mapping->private_lock);
+	ctx = mapping->private_data;
+	if (ctx) {
+		pgoff_t idx;
+		spin_lock_irqsave(&ctx->completion_lock, flags);
+		migrate_page_copy(new, old);
+		idx = old->index;
+		if (idx < (pgoff_t)ctx->nr_pages)
+			ctx->ring_pages[idx] = new;
+		spin_unlock_irqrestore(&ctx->completion_lock, flags);
+	} else
+		rc = -EBUSY;
+	spin_unlock(&mapping->private_lock);
 
 	return rc;
 }
@@ -617,8 +640,7 @@ out_freepcpu:
 out_freeref:
 	free_percpu(ctx->users.pcpu_count);
 out_freectx:
-	if (ctx->aio_ring_file)
-		fput(ctx->aio_ring_file);
+	put_aio_ring_file(ctx);
 	kmem_cache_free(kioctx_cachep, ctx);
 	pr_debug("error allocating ioctx %d\n", err);
 	return ERR_PTR(err);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 58b7d14b08ee..08cc08f037a6 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
 		worker->idle = 1;
 
 		/* the list may be empty if the worker is just starting */
-		if (!list_empty(&worker->worker_list)) {
+		if (!list_empty(&worker->worker_list) &&
+		    !worker->workers->stopping) {
 			list_move(&worker->worker_list,
 				 &worker->workers->idle_list);
 		}
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
 		spin_lock_irqsave(&worker->workers->lock, flags);
 		worker->idle = 0;
 
-		if (!list_empty(&worker->worker_list)) {
+		if (!list_empty(&worker->worker_list) &&
+		    !worker->workers->stopping) {
 			list_move_tail(&worker->worker_list,
 				      &worker->workers->worker_list);
 		}
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
 	int can_stop;
 
 	spin_lock_irq(&workers->lock);
+	workers->stopping = 1;
 	list_splice_init(&workers->idle_list, &workers->worker_list);
 	while (!list_empty(&workers->worker_list)) {
 		cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
 	workers->ordered = 0;
 	workers->atomic_start_pending = 0;
 	workers->atomic_worker_start = async_helper;
+	workers->stopping = 0;
 }
 
 /*
@@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
 	atomic_set(&worker->num_pending, 0);
 	atomic_set(&worker->refs, 1);
 	worker->workers = workers;
-	worker->task = kthread_run(worker_loop, worker,
-				   "btrfs-%s-%d", workers->name,
-				   workers->num_workers + 1);
+	worker->task = kthread_create(worker_loop, worker,
+				      "btrfs-%s-%d", workers->name,
+				      workers->num_workers + 1);
 	if (IS_ERR(worker->task)) {
 		ret = PTR_ERR(worker->task);
-		kfree(worker);
 		goto fail;
 	}
+
 	spin_lock_irq(&workers->lock);
+	if (workers->stopping) {
+		spin_unlock_irq(&workers->lock);
+		goto fail_kthread;
+	}
 	list_add_tail(&worker->worker_list, &workers->idle_list);
 	worker->idle = 1;
 	workers->num_workers++;
@@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
 	WARN_ON(workers->num_workers_starting < 0);
 	spin_unlock_irq(&workers->lock);
 
+	wake_up_process(worker->task);
 	return 0;
+
+fail_kthread:
+	kthread_stop(worker->task);
 fail:
+	kfree(worker);
 	spin_lock_irq(&workers->lock);
 	workers->num_workers_starting--;
 	spin_unlock_irq(&workers->lock);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 063698b90ce2..1f26792683ed 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -107,6 +107,8 @@ struct btrfs_workers {
 
 	/* extra name for this worker, used for current->name */
 	char *name;
+
+	int stopping;
 };
 
 void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 70681686e8dc..9efb94e95858 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -535,10 +535,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
 
 	btrfs_rm_dev_replace_srcdev(fs_info, src_device);
-	if (src_device->bdev) {
-		/* zero out the old super */
-		btrfs_scratch_superblock(src_device);
-	}
+
 	/*
 	 * this is again a consistent state where no dev_replace procedure
 	 * is running, the target device is part of the filesystem, the
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4ae17ed13b32..62176ad89846 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-					      struct btrfs_key *location)
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+				     struct btrfs_key *location,
+				     bool check_ref)
 {
 	struct btrfs_root *root;
 	int ret;
@@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 again:
 	root = btrfs_lookup_fs_root(fs_info, location->objectid);
 	if (root) {
-		if (btrfs_root_refs(&root->root_item) == 0)
+		if (check_ref && btrfs_root_refs(&root->root_item) == 0)
 			return ERR_PTR(-ENOENT);
 		return root;
 	}
@@ -1595,7 +1596,7 @@ again:
 	if (IS_ERR(root))
 		return root;
 
-	if (btrfs_root_refs(&root->root_item) == 0) {
+	if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
 		ret = -ENOENT;
 		goto fail;
 	}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b71acd6e1e5b..5ce2a7da8b11 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 int btrfs_init_fs_root(struct btrfs_root *root);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 			 struct btrfs_root *root);
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-					      struct btrfs_key *location);
+
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+				     struct btrfs_key *key,
+				     bool check_ref);
+static inline struct btrfs_root *
+btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
+			   struct btrfs_key *location)
+{
+	return btrfs_get_fs_root(fs_info, location, true);
+}
+
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty(struct btrfs_root *root);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c09a40db53db..51731b76900d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -145,8 +145,16 @@ int __init extent_io_init(void)
 				     offsetof(struct btrfs_io_bio, bio));
 	if (!btrfs_bioset)
 		goto free_buffer_cache;
+
+	if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+		goto free_bioset;
+
 	return 0;
 
+free_bioset:
+	bioset_free(btrfs_bioset);
+	btrfs_bioset = NULL;
+
 free_buffer_cache:
 	kmem_cache_destroy(extent_buffer_cache);
 	extent_buffer_cache = NULL;
@@ -1482,10 +1490,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
 		cur_start = state->end + 1;
 		node = rb_next(node);
 		total_bytes += state->end - state->start + 1;
-		if (total_bytes >= max_bytes) {
-			*end = *start + max_bytes - 1;
+		if (total_bytes >= max_bytes)
 			break;
-		}
 		if (!node)
 			break;
 	}
@@ -1614,7 +1620,7 @@ again:
 		*start = delalloc_start;
 		*end = delalloc_end;
 		free_extent_state(cached_state);
-		return found;
+		return 0;
 	}
 
 	/*
@@ -1627,10 +1633,9 @@ again:
 
 	/*
 	 * make sure to limit the number of pages we try to lock down
-	 * if we're looping.
 	 */
-	if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
-		delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+	if (delalloc_end + 1 - delalloc_start > max_bytes)
+		delalloc_end = delalloc_start + max_bytes - 1;
 
 	/* step two, lock all the pages after the page that has start */
 	ret = lock_delalloc_pages(inode, locked_page,
@@ -1641,8 +1646,7 @@ again:
 		 */
 		free_extent_state(cached_state);
 		if (!loops) {
-			unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
-			max_bytes = PAGE_CACHE_SIZE - offset;
+			max_bytes = PAGE_CACHE_SIZE;
 			loops = 1;
 			goto again;
 		} else {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 22ebc13b6c99..51e3afa78354 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6437,6 +6437,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
 
 	if (btrfs_extent_readonly(root, disk_bytenr))
 		goto out;
+	btrfs_release_path(path);
 
 	/*
 	 * look for other files referencing this extent, if we
@@ -7986,7 +7987,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 
 	/* check for collisions, even if the  name isn't there */
-	ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+	ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a5a26320503f..4a355726151e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
 	else
 		key.offset = (u64)-1;
 
-	return btrfs_read_fs_root_no_name(fs_info, &key);
+	return btrfs_get_fs_root(fs_info, &key, false);
 }
 
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0b1f4ef8db98..ec71ea44d2b4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 			continue;
 		}
 
-		if (btrfs_root_refs(&root->root_item) == 0) {
-			btrfs_add_dead_root(root);
-			continue;
-		}
-
 		err = btrfs_init_fs_root(root);
 		if (err) {
 			btrfs_free_fs_root(root);
@@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 			btrfs_free_fs_root(root);
 			break;
 		}
+
+		if (btrfs_root_refs(&root->root_item) == 0)
+			btrfs_add_dead_root(root);
 	}
 
 	btrfs_free_path(path);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e7a95356df83..8c81bdc1ef9b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1838,11 +1838,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	assert_qgroups_uptodate(trans);
 	update_super_roots(root);
 
-	if (!root->fs_info->log_root_recovering) {
-		btrfs_set_super_log_root(root->fs_info->super_copy, 0);
-		btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
-	}
-
+	btrfs_set_super_log_root(root->fs_info->super_copy, 0);
+	btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
 	memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
 	       sizeof(*root->fs_info->super_copy));
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a10645830223..043b215769c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1716,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
 				 struct btrfs_device *srcdev)
 {
 	WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
+
 	list_del_rcu(&srcdev->dev_list);
 	list_del_rcu(&srcdev->dev_alloc_list);
 	fs_info->fs_devices->num_devices--;
@@ -1725,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
 	}
 	if (srcdev->can_discard)
 		fs_info->fs_devices->num_can_discard--;
-	if (srcdev->bdev)
+	if (srcdev->bdev) {
 		fs_info->fs_devices->open_devices--;
 
+		/* zero out the old super */
+		btrfs_scratch_superblock(srcdev);
+	}
+
 	call_rcu(&srcdev->rcu, free_device);
 }
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 4d7433534f5c..6024877335ca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	struct buffer_head *bh;
 	sector_t end_block;
 	int ret = 0;		/* Will call free_more_memory() */
+	gfp_t gfp_mask;
 
-	page = find_or_create_page(inode->i_mapping, index,
-		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
+	gfp_mask |= __GFP_MOVABLE;
+	/*
+	 * XXX: __getblk_slow() can not really deal with failure and
+	 * will endlessly loop on improvised global reclaim.  Prefer
+	 * looping in the allocator rather than here, at least that
+	 * code knows what it's doing.
+	 */
+	gfp_mask |= __GFP_NOFAIL;
+
+	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
 	if (!page)
 		return ret;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a16b4e58bcc6..77fc5e181077 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb)
 {
 	struct inode *inode;
 	struct cifs_sb_info *cifs_sb;
+	struct cifs_tcon *tcon;
 	int rc = 0;
 
 	cifs_sb = CIFS_SB(sb);
+	tcon = cifs_sb_master_tcon(cifs_sb);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
 		sb->s_flags |= MS_POSIXACL;
 
-	if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
+	if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 	else
 		sb->s_maxbytes = MAX_NON_LFS;
@@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb)
 		goto out_no_root;
 	}
 
-	if (cifs_sb_master_tcon(cifs_sb)->nocase)
+	if (tcon->nocase)
 		sb->s_d_op = &cifs_ci_dentry_ops;
 	else
 		sb->s_d_op = &cifs_dentry_ops;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ea723a5e8226..6d0b07217ac9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.01"
+#define CIFS_VERSION   "2.02"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cfa14c80ef3b..52b6f6c26bfc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -547,9 +547,6 @@ struct TCP_Server_Info {
 	unsigned int max_rw;	/* maxRw specifies the maximum */
 	/* message size the server can send or receive for */
 	/* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
-	unsigned int max_vcs;	/* maximum number of smb sessions, at least
-				   those that can be specified uniquely with
-				   vcnumbers */
 	unsigned int capabilities; /* selective disabling of caps by smb sess */
 	int timeAdj;  /* Adjust for difference in server time zone in sec */
 	__u64 CurrentMid;         /* multiplex id - rotating counter */
@@ -715,7 +712,6 @@ struct cifs_ses {
 	enum statusEnum status;
 	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
 	__u16 ipc_tid;		/* special tid for connection to IPC share */
-	__u16 vcnum;
 	char *serverOS;		/* name of operating system underlying server */
 	char *serverNOS;	/* name of network operating system of server */
 	char *serverDomain;	/* security realm of server */
@@ -1272,6 +1268,7 @@ struct dfs_info3_param {
 #define CIFS_FATTR_DELETE_PENDING	0x2
 #define CIFS_FATTR_NEED_REVAL		0x4
 #define CIFS_FATTR_INO_COLLISION	0x8
+#define CIFS_FATTR_UNKNOWN_NLINK	0x10
 
 struct cifs_fattr {
 	u32		cf_flags;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 948676db8e2e..08f9dfb1a894 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1491,15 +1491,30 @@ struct file_notify_information {
 	__u8  FileName[0];
 } __attribute__((packed));
 
-struct reparse_data {
-	__u32	ReparseTag;
-	__u16	ReparseDataLength;
+/* For IO_REPARSE_TAG_SYMLINK */
+struct reparse_symlink_data {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
 	__u16	Reserved;
-	__u16	SubstituteNameOffset;
-	__u16	SubstituteNameLength;
-	__u16	PrintNameOffset;
-	__u16	PrintNameLength;
-	__u32	Flags;
+	__le16	SubstituteNameOffset;
+	__le16	SubstituteNameLength;
+	__le16	PrintNameOffset;
+	__le16	PrintNameLength;
+	__le32	Flags;
+	char	PathBuffer[0];
+} __attribute__((packed));
+
+/* For IO_REPARSE_TAG_NFS */
+#define NFS_SPECFILE_LNK	0x00000000014B4E4C
+#define NFS_SPECFILE_CHR	0x0000000000524843
+#define NFS_SPECFILE_BLK	0x00000000004B4C42
+#define NFS_SPECFILE_FIFO	0x000000004F464946
+#define NFS_SPECFILE_SOCK	0x000000004B434F53
+struct reparse_posix_data {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__le64	InodeType; /* LNK, FIFO, CHR etc. */
 	char	PathBuffer[0];
 } __attribute__((packed));
 
@@ -2652,26 +2667,7 @@ typedef struct file_xattr_info {
 } __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info
 					      level 0x205 */
 
-
-/* flags for chattr command */
-#define EXT_SECURE_DELETE		0x00000001 /* EXT3_SECRM_FL */
-#define EXT_ENABLE_UNDELETE		0x00000002 /* EXT3_UNRM_FL */
-/* Reserved for compress file 0x4 */
-#define EXT_SYNCHRONOUS			0x00000008 /* EXT3_SYNC_FL */
-#define EXT_IMMUTABLE_FL		0x00000010 /* EXT3_IMMUTABLE_FL */
-#define EXT_OPEN_APPEND_ONLY		0x00000020 /* EXT3_APPEND_FL */
-#define EXT_DO_NOT_BACKUP		0x00000040 /* EXT3_NODUMP_FL */
-#define EXT_NO_UPDATE_ATIME		0x00000080 /* EXT3_NOATIME_FL */
-/* 0x100 through 0x800 reserved for compression flags and are GET-ONLY */
-#define EXT_HASH_TREE_INDEXED_DIR	0x00001000 /* GET-ONLY EXT3_INDEX_FL */
-/* 0x2000 reserved for IMAGIC_FL */
-#define EXT_JOURNAL_THIS_FILE	0x00004000 /* GET-ONLY EXT3_JOURNAL_DATA_FL */
-/* 0x8000 reserved for EXT3_NOTAIL_FL */
-#define EXT_SYNCHRONOUS_DIR		0x00010000 /* EXT3_DIRSYNC_FL */
-#define EXT_TOPDIR			0x00020000 /* EXT3_TOPDIR_FL */
-
-#define EXT_SET_MASK			0x000300FF
-#define EXT_GET_MASK			0x0003DFFF
+/* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */
 
 typedef struct file_chattr_info {
 	__le64	mask; /* list of all possible attribute bits */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a3d74fea1623..ccd31ab815d4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -463,7 +463,6 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
 			       cifs_max_pending);
 	set_credits(server, server->maxReq);
 	server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
-	server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
 	/* even though we do not use raw we might as well set this
 	accurately, in case we ever find a need for it */
 	if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
@@ -3089,7 +3088,8 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
 	bool is_unicode;
 	unsigned int sub_len;
 	char *sub_start;
-	struct reparse_data *reparse_buf;
+	struct reparse_symlink_data *reparse_buf;
+	struct reparse_posix_data *posix_buf;
 	__u32 data_offset, data_count;
 	char *end_of_smb;
 
@@ -3138,20 +3138,47 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
 		goto qreparse_out;
 	}
 	end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
-	reparse_buf = (struct reparse_data *)
+	reparse_buf = (struct reparse_symlink_data *)
 				((char *)&pSMBr->hdr.Protocol + data_offset);
 	if ((char *)reparse_buf >= end_of_smb) {
 		rc = -EIO;
 		goto qreparse_out;
 	}
-	if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset +
-				reparse_buf->PrintNameLength) > end_of_smb) {
+	if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) {
+		cifs_dbg(FYI, "NFS style reparse tag\n");
+		posix_buf =  (struct reparse_posix_data *)reparse_buf;
+
+		if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) {
+			cifs_dbg(FYI, "unsupported file type 0x%llx\n",
+				 le64_to_cpu(posix_buf->InodeType));
+			rc = -EOPNOTSUPP;
+			goto qreparse_out;
+		}
+		is_unicode = true;
+		sub_len = le16_to_cpu(reparse_buf->ReparseDataLength);
+		if (posix_buf->PathBuffer + sub_len > end_of_smb) {
+			cifs_dbg(FYI, "reparse buf beyond SMB\n");
+			rc = -EIO;
+			goto qreparse_out;
+		}
+		*symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer,
+				sub_len, is_unicode, nls_codepage);
+		goto qreparse_out;
+	} else if (reparse_buf->ReparseTag !=
+			cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) {
+		rc = -EOPNOTSUPP;
+		goto qreparse_out;
+	}
+
+	/* Reparse tag is NTFS symlink */
+	sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) +
+				reparse_buf->PathBuffer;
+	sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength);
+	if (sub_start + sub_len > end_of_smb) {
 		cifs_dbg(FYI, "reparse buf beyond SMB\n");
 		rc = -EIO;
 		goto qreparse_out;
 	}
-	sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer;
-	sub_len = reparse_buf->SubstituteNameLength;
 	if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
 		is_unicode = true;
 	else
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index eb955b525e55..7ddddf2e2504 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3254,6 +3254,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	/*
 	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
 	 * immediately if the cookie is negative
+	 *
+	 * After this point, every page in the list might have PG_fscache set,
+	 * so we will need to clean that up off of every page we don't use.
 	 */
 	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
 					 &num_pages);
@@ -3376,6 +3379,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		kref_put(&rdata->refcount, cifs_readdata_release);
 	}
 
+	/* Any pages that have been shown to fscache but didn't get added to
+	 * the pagecache must be uncached before they get returned to the
+	 * allocator.
+	 */
+	cifs_fscache_readpages_cancel(mapping->host, page_list);
 	return rc;
 }
 
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index fe2492d2a8fc..8d4b7bc8ae91 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -223,6 +223,13 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
 		fscache_uncache_page(CIFS_I(inode)->fscache, page);
 }
 
+void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
+{
+	cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
+		 __func__, CIFS_I(inode)->fscache, inode);
+	fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
+}
+
 void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 63539323e0b9..24794b6cd8ec 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -54,6 +54,7 @@ extern int __cifs_readpages_from_fscache(struct inode *,
 					 struct address_space *,
 					 struct list_head *,
 					 unsigned *);
+extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
 
 extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
 
@@ -91,6 +92,13 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
 		__cifs_readpage_to_fscache(inode, page);
 }
 
+static inline void cifs_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+	if (CIFS_I(inode)->fscache)
+		return __cifs_fscache_readpages_cancel(inode, pages);
+}
+
 #else /* CONFIG_CIFS_FSCACHE */
 static inline int cifs_fscache_register(void) { return 0; }
 static inline void cifs_fscache_unregister(void) {}
@@ -131,6 +139,11 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
 static inline void cifs_readpage_to_fscache(struct inode *inode,
 			struct page *page) {}
 
+static inline void cifs_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+}
+
 #endif /* CONFIG_CIFS_FSCACHE */
 
 #endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f9ff9c173f78..867b7cdc794a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -120,6 +120,33 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
 	cifs_i->invalid_mapping = true;
 }
 
+/*
+ * copy nlink to the inode, unless it wasn't provided.  Provide
+ * sane values if we don't have an existing one and none was provided
+ */
+static void
+cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
+{
+	/*
+	 * if we're in a situation where we can't trust what we
+	 * got from the server (readdir, some non-unix cases)
+	 * fake reasonable values
+	 */
+	if (fattr->cf_flags & CIFS_FATTR_UNKNOWN_NLINK) {
+		/* only provide fake values on a new inode */
+		if (inode->i_state & I_NEW) {
+			if (fattr->cf_cifsattrs & ATTR_DIRECTORY)
+				set_nlink(inode, 2);
+			else
+				set_nlink(inode, 1);
+		}
+		return;
+	}
+
+	/* we trust the server, so update it */
+	set_nlink(inode, fattr->cf_nlink);
+}
+
 /* populate an inode with info from a cifs_fattr struct */
 void
 cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
@@ -134,7 +161,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 	inode->i_mtime = fattr->cf_mtime;
 	inode->i_ctime = fattr->cf_ctime;
 	inode->i_rdev = fattr->cf_rdev;
-	set_nlink(inode, fattr->cf_nlink);
+	cifs_nlink_fattr_to_inode(inode, fattr);
 	inode->i_uid = fattr->cf_uid;
 	inode->i_gid = fattr->cf_gid;
 
@@ -541,6 +568,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 	fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
 	fattr->cf_createtime = le64_to_cpu(info->CreationTime);
 
+	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
 	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
 		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
 		fattr->cf_dtype = DT_DIR;
@@ -548,7 +576,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 		 * Server can return wrong NumberOfLinks value for directories
 		 * when Unix extensions are disabled - fake it.
 		 */
-		fattr->cf_nlink = 2;
+		if (!tcon->unix_ext)
+			fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
 	} else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
 		fattr->cf_mode = S_IFLNK;
 		fattr->cf_dtype = DT_LNK;
@@ -561,11 +590,15 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 		if (fattr->cf_cifsattrs & ATTR_READONLY)
 			fattr->cf_mode &= ~(S_IWUGO);
 
-		fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
-		if (fattr->cf_nlink < 1) {
-			cifs_dbg(1, "replacing bogus file nlink value %u\n",
+		/*
+		 * Don't accept zero nlink from non-unix servers unless
+		 * delete is pending.  Instead mark it as unknown.
+		 */
+		if ((fattr->cf_nlink < 1) && !tcon->unix_ext &&
+		    !info->DeletePending) {
+			cifs_dbg(1, "bogus file nlink value %u\n",
 				fattr->cf_nlink);
-			fattr->cf_nlink = 1;
+			fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
 		}
 	}
 
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index af847e1cf1c1..651a5279607b 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -780,7 +780,9 @@ static const struct {
 	ERRDOS, ERRnoaccess, 0xc0000290}, {
 	ERRDOS, ERRbadfunc, 0xc000029c}, {
 	ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, {
-	ERRDOS, ERRinvlevel, 0x007c0001}, };
+	ERRDOS, ERRinvlevel, 0x007c0001}, {
+	0, 0, 0 }
+};
 
 /*****************************************************************************
  Print an error message from the status code
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 42ef03be089f..53a75f3d0179 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -180,6 +180,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 		fattr->cf_dtype = DT_REG;
 	}
 
+	/* non-unix readdir doesn't provide nlink */
+	fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
+
 	if (fattr->cf_cifsattrs & ATTR_READONLY)
 		fattr->cf_mode &= ~S_IWUGO;
 
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5f99b7f19e78..e87387dbf39f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,88 +32,6 @@
 #include <linux/slab.h>
 #include "cifs_spnego.h"
 
-/*
- * Checks if this is the first smb session to be reconnected after
- * the socket has been reestablished (so we know whether to use vc 0).
- * Called while holding the cifs_tcp_ses_lock, so do not block
- */
-static bool is_first_ses_reconnect(struct cifs_ses *ses)
-{
-	struct list_head *tmp;
-	struct cifs_ses *tmp_ses;
-
-	list_for_each(tmp, &ses->server->smb_ses_list) {
-		tmp_ses = list_entry(tmp, struct cifs_ses,
-				     smb_ses_list);
-		if (tmp_ses->need_reconnect == false)
-			return false;
-	}
-	/* could not find a session that was already connected,
-	   this must be the first one we are reconnecting */
-	return true;
-}
-
-/*
- *	vc number 0 is treated specially by some servers, and should be the
- *      first one we request.  After that we can use vcnumbers up to maxvcs,
- *	one for each smb session (some Windows versions set maxvcs incorrectly
- *	so maxvc=1 can be ignored).  If we have too many vcs, we can reuse
- *	any vc but zero (some servers reset the connection on vcnum zero)
- *
- */
-static __le16 get_next_vcnum(struct cifs_ses *ses)
-{
-	__u16 vcnum = 0;
-	struct list_head *tmp;
-	struct cifs_ses *tmp_ses;
-	__u16 max_vcs = ses->server->max_vcs;
-	__u16 i;
-	int free_vc_found = 0;
-
-	/* Quoting the MS-SMB specification: "Windows-based SMB servers set this
-	field to one but do not enforce this limit, which allows an SMB client
-	to establish more virtual circuits than allowed by this value ... but
-	other server implementations can enforce this limit." */
-	if (max_vcs < 2)
-		max_vcs = 0xFFFF;
-
-	spin_lock(&cifs_tcp_ses_lock);
-	if ((ses->need_reconnect) && is_first_ses_reconnect(ses))
-			goto get_vc_num_exit;  /* vcnum will be zero */
-	for (i = ses->server->srv_count - 1; i < max_vcs; i++) {
-		if (i == 0) /* this is the only connection, use vc 0 */
-			break;
-
-		free_vc_found = 1;
-
-		list_for_each(tmp, &ses->server->smb_ses_list) {
-			tmp_ses = list_entry(tmp, struct cifs_ses,
-					     smb_ses_list);
-			if (tmp_ses->vcnum == i) {
-				free_vc_found = 0;
-				break; /* found duplicate, try next vcnum */
-			}
-		}
-		if (free_vc_found)
-			break; /* we found a vcnumber that will work - use it */
-	}
-
-	if (i == 0)
-		vcnum = 0; /* for most common case, ie if one smb session, use
-			      vc zero.  Also for case when no free vcnum, zero
-			      is safest to send (some clients only send zero) */
-	else if (free_vc_found == 0)
-		vcnum = 1;  /* we can not reuse vc=0 safely, since some servers
-				reset all uids on that, but 1 is ok. */
-	else
-		vcnum = i;
-	ses->vcnum = vcnum;
-get_vc_num_exit:
-	spin_unlock(&cifs_tcp_ses_lock);
-
-	return cpu_to_le16(vcnum);
-}
-
 static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 {
 	__u32 capabilities = 0;
@@ -128,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 					CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
 					USHRT_MAX));
 	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-	pSMB->req.VcNumber = get_next_vcnum(ses);
+	pSMB->req.VcNumber = __constant_cpu_to_le16(1);
 
 	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 
@@ -582,9 +500,9 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 				return NTLMv2;
 			if (global_secflags & CIFSSEC_MAY_NTLM)
 				return NTLM;
-			/* Fallthrough */
 		default:
-			return Unspecified;
+			/* Fallthrough to attempt LANMAN authentication next */
+			break;
 		}
 	case CIFS_NEGFLAVOR_LANMAN:
 		switch (requested) {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index eba0efde66d7..edccb5252462 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -687,6 +687,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
 	else
 		return -EIO;
 
+	/* no need to send SMB logoff if uid already closed due to reconnect */
+	if (ses->need_reconnect)
+		goto smb2_session_already_dead;
+
 	rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req);
 	if (rc)
 		return rc;
@@ -701,6 +705,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
 	 * No tcon so can't do
 	 * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
 	 */
+
+smb2_session_already_dead:
 	return rc;
 }
 
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index d952ee48f4dc..a4b2391fe66e 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -97,9 +97,23 @@
 #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */
 #define FSCTL_SRV_READ_HASH          0x001441BB /* BB add struct */
 
+/* See FSCC 2.1.2.5 */
 #define IO_REPARSE_TAG_MOUNT_POINT   0xA0000003
 #define IO_REPARSE_TAG_HSM           0xC0000004
 #define IO_REPARSE_TAG_SIS           0x80000007
+#define IO_REPARSE_TAG_HSM2          0x80000006
+#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005
+/* Used by the DFS filter. See MS-DFSC */
+#define IO_REPARSE_TAG_DFS           0x8000000A
+/* Used by the DFS filter See MS-DFSC */
+#define IO_REPARSE_TAG_DFSR          0x80000012
+#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B
+/* See section MS-FSCC 2.1.2.4 */
+#define IO_REPARSE_TAG_SYMLINK       0xA000000C
+#define IO_REPARSE_TAG_DEDUP         0x80000013
+#define IO_REPARSE_APPXSTREAM	     0xC0000014
+/* NFS symlinks, Win 8/SMB3 and later */
+#define IO_REPARSE_TAG_NFS           0x80000014
 
 /* fsctl flags */
 /* If Flags is set to this value, the request is an FSCTL not ioctl request */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 6fdcb1b4a106..800b938e4061 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -410,8 +410,13 @@ static int
 wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
 		      const int optype)
 {
-	return wait_for_free_credits(server, timeout,
-				server->ops->get_credits_field(server, optype));
+	int *val;
+
+	val = server->ops->get_credits_field(server, optype);
+	/* Since an echo is already inflight, no need to wait to send another */
+	if (*val <= 0 && optype == CIFS_ECHO_OP)
+		return -EAGAIN;
+	return wait_for_free_credits(server, timeout, val);
 }
 
 static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
diff --git a/fs/dcache.c b/fs/dcache.c
index 41000305d716..ae6ebb88ceff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop);
  * If ref is non-zero, then decrement the refcount too.
  * Returns dentry requiring refcount drop, or NULL if we're done.
  */
-static inline struct dentry *
+static struct dentry *
 dentry_kill(struct dentry *dentry, int unlock_on_failure)
 	__releases(dentry->d_lock)
 {
@@ -630,7 +630,8 @@ repeat:
 			goto kill_it;
 	}
 
-	dentry->d_flags |= DCACHE_REFERENCED;
+	if (!(dentry->d_flags & DCACHE_REFERENCED))
+		dentry->d_flags |= DCACHE_REFERENCED;
 	dentry_lru_add(dentry);
 
 	dentry->d_lockref.count--;
@@ -1331,14 +1332,6 @@ rename_retry:
  * list is non-empty and continue searching.
  */
 
-/**
- * have_submounts - check for mounts over a dentry
- * @parent: dentry to check.
- *
- * Return true if the parent or its subdirectories contain
- * a mount point
- */
-
 static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
 {
 	int *ret = data;
@@ -1349,6 +1342,13 @@ static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
 	return D_WALK_CONTINUE;
 }
 
+/**
+ * have_submounts - check for mounts over a dentry
+ * @parent: dentry to check.
+ *
+ * Return true if the parent or its subdirectories contain
+ * a mount point
+ */
 int have_submounts(struct dentry *parent)
 {
 	int ret = 0;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c88e355f7635..000eae2782b6 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat,
 				    struct page *page)
 {
 	return ecryptfs_lower_header_size(crypt_stat) +
-	       (page->index << PAGE_CACHE_SHIFT);
+	       ((loff_t)page->index << PAGE_CACHE_SHIFT);
 }
 
 /**
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7d52806c2119..4725a07f003c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
 	struct ecryptfs_msg_ctx *msg_ctx;
 	struct ecryptfs_message *msg = NULL;
 	char *auth_tok_sig;
-	char *payload;
+	char *payload = NULL;
 	size_t payload_len = 0;
 	int rc;
 
@@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
 	}
 out:
 	kfree(msg);
+	kfree(payload);
 	return rc;
 }
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 473e09da7d02..810c28fb8c3c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,7 +34,6 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
-#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
@@ -1605,8 +1604,7 @@ fetch_events:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			if (!freezable_schedule_hrtimeout_range(to, slack,
-								HRTIMER_MODE_ABS))
+			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
 				timed_out = 1;
 
 			spin_lock_irqsave(&ep->lock, flags);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1194b1f0f839..f8cde46de9cd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1783,7 +1783,7 @@ retry:
 		d_tmpfile(dentry, inode);
 		err = ext3_orphan_add(handle, inode);
 		if (err)
-			goto err_drop_inode;
+			goto err_unlock_inode;
 		mark_inode_dirty(inode);
 		unlock_new_inode(inode);
 	}
@@ -1791,10 +1791,9 @@ retry:
 	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
-err_drop_inode:
+err_unlock_inode:
 	ext3_journal_stop(handle);
 	unlock_new_inode(inode);
-	iput(inode);
 	return err;
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d424d7ac02b..e274e9c1171f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2563,7 +2563,7 @@ retry:
 			break;
 	}
 	blk_finish_plug(&plug);
-	if (!ret && !cycled) {
+	if (!ret && !cycled && wbc->nr_to_write > 0) {
 		cycled = 1;
 		mpd.last_page = writeback_index - 1;
 		mpd.first_page = 0;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1bec5a5c1e45..5a0408d7b114 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2319,7 +2319,7 @@ retry:
 		d_tmpfile(dentry, inode);
 		err = ext4_orphan_add(handle, inode);
 		if (err)
-			goto err_drop_inode;
+			goto err_unlock_inode;
 		mark_inode_dirty(inode);
 		unlock_new_inode(inode);
 	}
@@ -2328,10 +2328,9 @@ retry:
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
-err_drop_inode:
+err_unlock_inode:
 	ext4_journal_stop(handle);
 	unlock_new_inode(inode);
-	iput(inode);
 	return err;
 }
 
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c081e34f717f..03e9bebba198 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1350,6 +1350,8 @@ retry:
 				    s_min_extra_isize) {
 					tried_min_extra_isize++;
 					new_extra_isize = s_min_extra_isize;
+					kfree(is); is = NULL;
+					kfree(bs); bs = NULL;
 					goto retry;
 				}
 				error = -1;
diff --git a/fs/file_table.c b/fs/file_table.c
index abdd15ad13c9..e900ca518635 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -297,7 +297,7 @@ void flush_delayed_fput(void)
 	delayed_fput(NULL);
 }
 
-static DECLARE_WORK(delayed_fput_work, delayed_fput);
+static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
 void fput(struct file *file)
 {
@@ -317,7 +317,7 @@ void fput(struct file *file)
 		}
 
 		if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
-			schedule_work(&delayed_fput_work);
+			schedule_delayed_work(&delayed_fput_work, 1);
 	}
 }
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 62b43b577bfc..b7989f2ab4c4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 	struct inode *inode;
 	struct dentry *parent;
 	struct fuse_conn *fc;
+	struct fuse_inode *fi;
 	int ret;
 
 	inode = ACCESS_ONCE(entry->d_inode);
@@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 		if (!err && !outarg.nodeid)
 			err = -ENOENT;
 		if (!err) {
-			struct fuse_inode *fi = get_fuse_inode(inode);
+			fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
 				goto invalid;
@@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 				       attr_version);
 		fuse_change_entry_timeout(entry, &outarg);
 	} else if (inode) {
-		fc = get_fuse_conn(inode);
-		if (fc->readdirplus_auto) {
+		fi = get_fuse_inode(inode);
+		if (flags & LOOKUP_RCU) {
+			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
+				return -ECHILD;
+		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
 			parent = dget_parent(entry);
 			fuse_advise_use_readdirplus(parent->d_inode);
 			dput(parent);
@@ -259,7 +263,8 @@ out:
 
 invalid:
 	ret = 0;
-	if (check_submounts_and_drop(entry) != 0)
+
+	if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0)
 		ret = 1;
 	goto out;
 }
@@ -1063,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask)
 	struct fuse_access_in inarg;
 	int err;
 
+	BUG_ON(mask & MAY_NOT_BLOCK);
+
 	if (fc->no_access)
 		return 0;
 
@@ -1150,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask)
 		   noticed immediately, only after the attribute
 		   timeout has expired */
 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
-		if (mask & MAY_NOT_BLOCK)
-			return -ECHILD;
-
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
@@ -1291,6 +1295,8 @@ static int fuse_direntplus_link(struct file *file,
 	}
 
 found:
+	if (fc->readdirplus_auto)
+		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
 	fuse_change_entry_timeout(dentry, o);
 
 	err = 0;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d409deafc67b..4598345ab87d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 {
 	struct fuse_file *ff = file->private_data;
 	struct inode *inode = file->f_inode;
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = ff->fc;
 	struct fuse_req *req;
 	struct fuse_fallocate_in inarg = {
@@ -2484,10 +2485,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 
 	if (lock_inode) {
 		mutex_lock(&inode->i_mutex);
-		if (mode & FALLOC_FL_PUNCH_HOLE)
-			fuse_set_nowrite(inode);
+		if (mode & FALLOC_FL_PUNCH_HOLE) {
+			loff_t endbyte = offset + length - 1;
+			err = filemap_write_and_wait_range(inode->i_mapping,
+							   offset, endbyte);
+			if (err)
+				goto out;
+
+			fuse_sync_writes(inode);
+		}
 	}
 
+	if (!(mode & FALLOC_FL_KEEP_SIZE))
+		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
 	req = fuse_get_req_nopages(fc);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -2520,11 +2531,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 	fuse_invalidate_attr(inode);
 
 out:
-	if (lock_inode) {
-		if (mode & FALLOC_FL_PUNCH_HOLE)
-			fuse_release_nowrite(inode);
+	if (!(mode & FALLOC_FL_KEEP_SIZE))
+		clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+	if (lock_inode)
 		mutex_unlock(&inode->i_mutex);
-	}
 
 	return err;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5ced199b50bb..5b9e6f3b6aef 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -115,6 +115,8 @@ struct fuse_inode {
 enum {
 	/** Advise readdirplus  */
 	FUSE_I_ADVISE_RDPLUS,
+	/** Initialized with readdirplus */
+	FUSE_I_INIT_RDPLUS,
 	/** An operation changing file size is in progress  */
 	FUSE_I_SIZE_UNSTABLE,
 };
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index c1a3e603279c..7f464c513ba0 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 
 	if (insert_inode_locked(inode) < 0) {
 		rc = -EINVAL;
-		goto fail_unlock;
+		goto fail_put;
 	}
 
 	inode_init_owner(inode, parent, mode);
@@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 fail_drop:
 	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
-fail_unlock:
 	clear_nlink(inode);
 	unlock_new_inode(inode);
 fail_put:
diff --git a/fs/namei.c b/fs/namei.c
index 645268f23eb6..caa28051e197 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2294,10 +2294,11 @@ out:
  * path_mountpoint - look up a path to be umounted
  * @dfd:	directory file descriptor to start walk from
  * @name:	full pathname to walk
+ * @path:	pointer to container for result
  * @flags:	lookup flags
  *
  * Look up the given name, but don't attempt to revalidate the last component.
- * Returns 0 and "path" will be valid on success; Retuns error otherwise.
+ * Returns 0 and "path" will be valid on success; Returns error otherwise.
  */
 static int
 path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9f8ef9b7674d..8eaa1ba793fc 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -288,10 +288,14 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
-	int rv = -EIO;
-	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long rv = -EIO;
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
 	if (use_pde(pde)) {
-		get_unmapped_area = pde->proc_fops->get_unmapped_area;
+#ifdef CONFIG_MMU
+		get_unmapped_area = current->mm->get_unmapped_area;
+#endif
+		if (pde->proc_fops->get_unmapped_area)
+			get_unmapped_area = pde->proc_fops->get_unmapped_area;
 		if (get_unmapped_area)
 			rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
 		unuse_pde(pde);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7366e9d63cee..390bdab01c3c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 		frame = pte_pfn(pte);
 		flags = PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
+		if (pte_soft_dirty(pte))
+			flags2 |= __PM_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t entry;
 		if (pte_swp_soft_dirty(pte))
@@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 
 	if (page && !PageAnon(page))
 		flags |= PM_FILE;
-	if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
+	if ((vma->vm_flags & VM_SOFTDIRTY))
 		flags2 |= __PM_SOFT_DIRTY;
 
 	*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
diff --git a/fs/select.c b/fs/select.c
index 35d4adc749d9..dfd5cb18c012 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
 	set_current_state(state);
 	if (!pwq->triggered)
-		rc = freezable_schedule_hrtimeout_range(expires, slack,
-							HRTIMER_MODE_ABS);
+		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
 	__set_current_state(TASK_RUNNING);
 
 	/*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3135c2525c76..a290157265ef 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
 				m->read_pos = offset;
 				retval = file->f_pos = offset;
 			}
+		} else {
+			file->f_pos = offset;
 		}
 	}
 	file->f_version = m->version;
diff --git a/fs/statfs.c b/fs/statfs.c
index c219e733f553..083dc0ac9140 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -94,7 +94,7 @@ retry:
 
 int fd_statfs(int fd, struct kstatfs *st)
 {
-	struct fd f = fdget(fd);
+	struct fd f = fdget_raw(fd);
 	int error = -EBADF;
 	if (f.file) {
 		error = vfs_statfs(&f.file->f_path, st);
diff --git a/fs/super.c b/fs/super.c
index 3a96c9783a8b..0225c20f8770 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -264,6 +264,8 @@ out_free_sb:
  */
 static inline void destroy_super(struct super_block *s)
 {
+	list_lru_destroy(&s->s_dentry_lru);
+	list_lru_destroy(&s->s_inode_lru);
 #ifdef CONFIG_SMP
 	free_percpu(s->s_files);
 #endif
@@ -323,8 +325,6 @@ void deactivate_locked_super(struct super_block *s)
 
 		/* caches are now gone, we can safely kill the shrinker now */
 		unregister_shrinker(&s->s_shrink);
-		list_lru_destroy(&s->s_dentry_lru);
-		list_lru_destroy(&s->s_inode_lru);
 
 		put_filesystem(fs);
 		put_super(s);
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 7a1ceb946b80..8876ac183373 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,5 +2,4 @@
 # Makefile for the sysfs virtual filesystem
 #
 
-obj-y		:= inode.o file.o dir.o symlink.o mount.o bin.o \
-		   group.o
+obj-y		:= inode.o file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
deleted file mode 100644
index c590cabd57bb..000000000000
--- a/fs/sysfs/bin.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * fs/sysfs/bin.c - sysfs binary file implementation
- *
- * Copyright (c) 2003 Patrick Mochel
- * Copyright (c) 2003 Matthew Wilcox
- * Copyright (c) 2004 Silicon Graphics, Inc.
- * Copyright (c) 2007 SUSE Linux Products GmbH
- * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
- *
- * This file is released under the GPLv2.
- *
- * Please see Documentation/filesystems/sysfs.txt for more information.
- */
-
-#undef DEBUG
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/kobject.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-
-#include "sysfs.h"
-
-/*
- * There's one bin_buffer for each open file.
- *
- * filp->private_data points to bin_buffer and
- * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
- * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
- */
-static DEFINE_MUTEX(sysfs_bin_lock);
-
-struct bin_buffer {
-	struct mutex			mutex;
-	void				*buffer;
-	int				mmapped;
-	const struct vm_operations_struct *vm_ops;
-	struct file			*file;
-	struct hlist_node		list;
-};
-
-static int
-fill_read(struct file *file, char *buffer, loff_t off, size_t count)
-{
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
-	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	int rc;
-
-	/* need attr_sd for attr, its parent for kobj */
-	if (!sysfs_get_active(attr_sd))
-		return -ENODEV;
-
-	rc = -EIO;
-	if (attr->read)
-		rc = attr->read(file, kobj, attr, buffer, off, count);
-
-	sysfs_put_active(attr_sd);
-
-	return rc;
-}
-
-static ssize_t
-read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
-{
-	struct bin_buffer *bb = file->private_data;
-	int size = file_inode(file)->i_size;
-	loff_t offs = *off;
-	int count = min_t(size_t, bytes, PAGE_SIZE);
-	char *temp;
-
-	if (!bytes)
-		return 0;
-
-	if (size) {
-		if (offs > size)
-			return 0;
-		if (offs + count > size)
-			count = size - offs;
-	}
-
-	temp = kmalloc(count, GFP_KERNEL);
-	if (!temp)
-		return -ENOMEM;
-
-	mutex_lock(&bb->mutex);
-
-	count = fill_read(file, bb->buffer, offs, count);
-	if (count < 0) {
-		mutex_unlock(&bb->mutex);
-		goto out_free;
-	}
-
-	memcpy(temp, bb->buffer, count);
-
-	mutex_unlock(&bb->mutex);
-
-	if (copy_to_user(userbuf, temp, count)) {
-		count = -EFAULT;
-		goto out_free;
-	}
-
-	pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
-
-	*off = offs + count;
-
- out_free:
-	kfree(temp);
-	return count;
-}
-
-static int
-flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
-{
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
-	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	int rc;
-
-	/* need attr_sd for attr, its parent for kobj */
-	if (!sysfs_get_active(attr_sd))
-		return -ENODEV;
-
-	rc = -EIO;
-	if (attr->write)
-		rc = attr->write(file, kobj, attr, buffer, offset, count);
-
-	sysfs_put_active(attr_sd);
-
-	return rc;
-}
-
-static ssize_t write(struct file *file, const char __user *userbuf,
-		     size_t bytes, loff_t *off)
-{
-	struct bin_buffer *bb = file->private_data;
-	int size = file_inode(file)->i_size;
-	loff_t offs = *off;
-	int count = min_t(size_t, bytes, PAGE_SIZE);
-	char *temp;
-
-	if (!bytes)
-		return 0;
-
-	if (size) {
-		if (offs > size)
-			return 0;
-		if (offs + count > size)
-			count = size - offs;
-	}
-
-	temp = memdup_user(userbuf, count);
-	if (IS_ERR(temp))
-		return PTR_ERR(temp);
-
-	mutex_lock(&bb->mutex);
-
-	memcpy(bb->buffer, temp, count);
-
-	count = flush_write(file, bb->buffer, offs, count);
-	mutex_unlock(&bb->mutex);
-
-	if (count > 0)
-		*off = offs + count;
-
-	kfree(temp);
-	return count;
-}
-
-static void bin_vma_open(struct vm_area_struct *vma)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-
-	if (!bb->vm_ops)
-		return;
-
-	if (!sysfs_get_active(attr_sd))
-		return;
-
-	if (bb->vm_ops->open)
-		bb->vm_ops->open(vma);
-
-	sysfs_put_active(attr_sd);
-}
-
-static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	int ret;
-
-	if (!bb->vm_ops)
-		return VM_FAULT_SIGBUS;
-
-	if (!sysfs_get_active(attr_sd))
-		return VM_FAULT_SIGBUS;
-
-	ret = VM_FAULT_SIGBUS;
-	if (bb->vm_ops->fault)
-		ret = bb->vm_ops->fault(vma, vmf);
-
-	sysfs_put_active(attr_sd);
-	return ret;
-}
-
-static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	int ret;
-
-	if (!bb->vm_ops)
-		return VM_FAULT_SIGBUS;
-
-	if (!sysfs_get_active(attr_sd))
-		return VM_FAULT_SIGBUS;
-
-	ret = 0;
-	if (bb->vm_ops->page_mkwrite)
-		ret = bb->vm_ops->page_mkwrite(vma, vmf);
-	else
-		file_update_time(file);
-
-	sysfs_put_active(attr_sd);
-	return ret;
-}
-
-static int bin_access(struct vm_area_struct *vma, unsigned long addr,
-		  void *buf, int len, int write)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	int ret;
-
-	if (!bb->vm_ops)
-		return -EINVAL;
-
-	if (!sysfs_get_active(attr_sd))
-		return -EINVAL;
-
-	ret = -EINVAL;
-	if (bb->vm_ops->access)
-		ret = bb->vm_ops->access(vma, addr, buf, len, write);
-
-	sysfs_put_active(attr_sd);
-	return ret;
-}
-
-#ifdef CONFIG_NUMA
-static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	int ret;
-
-	if (!bb->vm_ops)
-		return 0;
-
-	if (!sysfs_get_active(attr_sd))
-		return -EINVAL;
-
-	ret = 0;
-	if (bb->vm_ops->set_policy)
-		ret = bb->vm_ops->set_policy(vma, new);
-
-	sysfs_put_active(attr_sd);
-	return ret;
-}
-
-static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
-					unsigned long addr)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	struct mempolicy *pol;
-
-	if (!bb->vm_ops)
-		return vma->vm_policy;
-
-	if (!sysfs_get_active(attr_sd))
-		return vma->vm_policy;
-
-	pol = vma->vm_policy;
-	if (bb->vm_ops->get_policy)
-		pol = bb->vm_ops->get_policy(vma, addr);
-
-	sysfs_put_active(attr_sd);
-	return pol;
-}
-
-static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
-			const nodemask_t *to, unsigned long flags)
-{
-	struct file *file = vma->vm_file;
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	int ret;
-
-	if (!bb->vm_ops)
-		return 0;
-
-	if (!sysfs_get_active(attr_sd))
-		return 0;
-
-	ret = 0;
-	if (bb->vm_ops->migrate)
-		ret = bb->vm_ops->migrate(vma, from, to, flags);
-
-	sysfs_put_active(attr_sd);
-	return ret;
-}
-#endif
-
-static const struct vm_operations_struct bin_vm_ops = {
-	.open		= bin_vma_open,
-	.fault		= bin_fault,
-	.page_mkwrite	= bin_page_mkwrite,
-	.access		= bin_access,
-#ifdef CONFIG_NUMA
-	.set_policy	= bin_set_policy,
-	.get_policy	= bin_get_policy,
-	.migrate	= bin_migrate,
-#endif
-};
-
-static int mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct bin_buffer *bb = file->private_data;
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
-	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	int rc;
-
-	mutex_lock(&bb->mutex);
-
-	/* need attr_sd for attr, its parent for kobj */
-	rc = -ENODEV;
-	if (!sysfs_get_active(attr_sd))
-		goto out_unlock;
-
-	rc = -EINVAL;
-	if (!attr->mmap)
-		goto out_put;
-
-	rc = attr->mmap(file, kobj, attr, vma);
-	if (rc)
-		goto out_put;
-
-	/*
-	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
-	 * to satisfy versions of X which crash if the mmap fails: that
-	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
-	 */
-	if (vma->vm_file != file)
-		goto out_put;
-
-	rc = -EINVAL;
-	if (bb->mmapped && bb->vm_ops != vma->vm_ops)
-		goto out_put;
-
-	/*
-	 * It is not possible to successfully wrap close.
-	 * So error if someone is trying to use close.
-	 */
-	rc = -EINVAL;
-	if (vma->vm_ops && vma->vm_ops->close)
-		goto out_put;
-
-	rc = 0;
-	bb->mmapped = 1;
-	bb->vm_ops = vma->vm_ops;
-	vma->vm_ops = &bin_vm_ops;
-out_put:
-	sysfs_put_active(attr_sd);
-out_unlock:
-	mutex_unlock(&bb->mutex);
-
-	return rc;
-}
-
-static int open(struct inode *inode, struct file *file)
-{
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
-	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
-	struct bin_buffer *bb = NULL;
-	int error;
-
-	/* binary file operations requires both @sd and its parent */
-	if (!sysfs_get_active(attr_sd))
-		return -ENODEV;
-
-	error = -EACCES;
-	if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
-		goto err_out;
-	if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
-		goto err_out;
-
-	error = -ENOMEM;
-	bb = kzalloc(sizeof(*bb), GFP_KERNEL);
-	if (!bb)
-		goto err_out;
-
-	bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!bb->buffer)
-		goto err_out;
-
-	mutex_init(&bb->mutex);
-	bb->file = file;
-	file->private_data = bb;
-
-	mutex_lock(&sysfs_bin_lock);
-	hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
-	mutex_unlock(&sysfs_bin_lock);
-
-	/* open succeeded, put active references */
-	sysfs_put_active(attr_sd);
-	return 0;
-
- err_out:
-	sysfs_put_active(attr_sd);
-	kfree(bb);
-	return error;
-}
-
-static int release(struct inode *inode, struct file *file)
-{
-	struct bin_buffer *bb = file->private_data;
-
-	mutex_lock(&sysfs_bin_lock);
-	hlist_del(&bb->list);
-	mutex_unlock(&sysfs_bin_lock);
-
-	kfree(bb->buffer);
-	kfree(bb);
-	return 0;
-}
-
-const struct file_operations bin_fops = {
-	.read		= read,
-	.write		= write,
-	.mmap		= mmap,
-	.llseek		= generic_file_llseek,
-	.open		= open,
-	.release	= release,
-};
-
-
-void unmap_bin_file(struct sysfs_dirent *attr_sd)
-{
-	struct bin_buffer *bb;
-
-	if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
-		return;
-
-	mutex_lock(&sysfs_bin_lock);
-
-	hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
-		struct inode *inode = file_inode(bb->file);
-
-		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
-	}
-
-	mutex_unlock(&sysfs_bin_lock);
-}
-
-/**
- *	sysfs_create_bin_file - create binary file for object.
- *	@kobj:	object.
- *	@attr:	attribute descriptor.
- */
-int sysfs_create_bin_file(struct kobject *kobj,
-			  const struct bin_attribute *attr)
-{
-	BUG_ON(!kobj || !kobj->sd || !attr);
-
-	return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
-}
-EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
-
-/**
- *	sysfs_remove_bin_file - remove binary file for object.
- *	@kobj:	object.
- *	@attr:	attribute descriptor.
- */
-void sysfs_remove_bin_file(struct kobject *kobj,
-			   const struct bin_attribute *attr)
-{
-	sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
-}
-EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 4d83cedb9fcb..5e73d6626e50 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -26,21 +26,21 @@
 #include "sysfs.h"
 
 DEFINE_MUTEX(sysfs_mutex);
-DEFINE_SPINLOCK(sysfs_assoc_lock);
+DEFINE_SPINLOCK(sysfs_symlink_target_lock);
 
-#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
+#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
 
 static DEFINE_SPINLOCK(sysfs_ino_lock);
 static DEFINE_IDA(sysfs_ino_ida);
 
 /**
  *	sysfs_name_hash
- *	@ns:   Namespace tag to hash
  *	@name: Null terminated string to hash
+ *	@ns:   Namespace tag to hash
  *
  *	Returns 31 bit hash of ns + name (so it fits in an off_t )
  */
-static unsigned int sysfs_name_hash(const void *ns, const char *name)
+static unsigned int sysfs_name_hash(const char *name, const void *ns)
 {
 	unsigned long hash = init_name_hash();
 	unsigned int len = strlen(name);
@@ -56,8 +56,8 @@ static unsigned int sysfs_name_hash(const void *ns, const char *name)
 	return hash;
 }
 
-static int sysfs_name_compare(unsigned int hash, const void *ns,
-	const char *name, const struct sysfs_dirent *sd)
+static int sysfs_name_compare(unsigned int hash, const char *name,
+			      const void *ns, const struct sysfs_dirent *sd)
 {
 	if (hash != sd->s_hash)
 		return hash - sd->s_hash;
@@ -69,7 +69,7 @@ static int sysfs_name_compare(unsigned int hash, const void *ns,
 static int sysfs_sd_compare(const struct sysfs_dirent *left,
 			    const struct sysfs_dirent *right)
 {
-	return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
+	return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
 				  right);
 }
 
@@ -132,24 +132,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 	rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
 }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-/* Test for attributes that want to ignore lockdep for read-locking */
-static bool ignore_lockdep(struct sysfs_dirent *sd)
-{
-	return sysfs_type(sd) == SYSFS_KOBJ_ATTR &&
-			sd->s_attr.attr->ignore_lockdep;
-}
-
-#else
-
-static inline bool ignore_lockdep(struct sysfs_dirent *sd)
-{
-	return true;
-}
-
-#endif
-
 /**
  *	sysfs_get_active - get an active reference to sysfs_dirent
  *	@sd: sysfs_dirent to get an active reference to
@@ -168,7 +150,7 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
 	if (!atomic_inc_unless_negative(&sd->s_active))
 		return NULL;
 
-	if (likely(!ignore_lockdep(sd)))
+	if (likely(!sysfs_ignore_lockdep(sd)))
 		rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
 	return sd;
 }
@@ -187,7 +169,7 @@ void sysfs_put_active(struct sysfs_dirent *sd)
 	if (unlikely(!sd))
 		return;
 
-	if (likely(!ignore_lockdep(sd)))
+	if (likely(!sysfs_ignore_lockdep(sd)))
 		rwsem_release(&sd->dep_map, 1, _RET_IP_);
 	v = atomic_dec_return(&sd->s_active);
 	if (likely(v != SD_DEACTIVATED_BIAS))
@@ -400,22 +382,19 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
 /**
  *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
  *	@acxt: pointer to sysfs_addrm_cxt to be used
- *	@parent_sd: parent sysfs_dirent
  *
- *	This function is called when the caller is about to add or
- *	remove sysfs_dirent under @parent_sd.  This function acquires
- *	sysfs_mutex.  @acxt is used to keep and pass context to
- *	other addrm functions.
+ *	This function is called when the caller is about to add or remove
+ *	sysfs_dirent.  This function acquires sysfs_mutex.  @acxt is used
+ *	to keep and pass context to other addrm functions.
  *
  *	LOCKING:
  *	Kernel thread context (may sleep).  sysfs_mutex is locked on
  *	return.
  */
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
-		       struct sysfs_dirent *parent_sd)
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
+	__acquires(sysfs_mutex)
 {
 	memset(acxt, 0, sizeof(*acxt));
-	acxt->parent_sd = parent_sd;
 
 	mutex_lock(&sysfs_mutex);
 }
@@ -424,10 +403,11 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
  *	__sysfs_add_one - add sysfs_dirent to parent without warning
  *	@acxt: addrm context to use
  *	@sd: sysfs_dirent to be added
+ *	@parent_sd: the parent sysfs_dirent to add @sd to
  *
- *	Get @acxt->parent_sd and set sd->s_parent to it and increment
- *	nlink of parent inode if @sd is a directory and link into the
- *	children list of the parent.
+ *	Get @parent_sd and set @sd->s_parent to it and increment nlink of
+ *	the parent inode if @sd is a directory and link into the children
+ *	list of the parent.
  *
  *	This function should be called between calls to
  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -440,27 +420,28 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
  *	0 on success, -EEXIST if entry with the given name already
  *	exists.
  */
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+		    struct sysfs_dirent *parent_sd)
 {
 	struct sysfs_inode_attrs *ps_iattr;
 	int ret;
 
-	if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
+	if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
 		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
-			sysfs_ns_type(acxt->parent_sd) ? "required" : "invalid",
-			acxt->parent_sd->s_name, sd->s_name);
+			sysfs_ns_type(parent_sd) ? "required" : "invalid",
+			parent_sd->s_name, sd->s_name);
 		return -EINVAL;
 	}
 
-	sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
-	sd->s_parent = sysfs_get(acxt->parent_sd);
+	sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
+	sd->s_parent = sysfs_get(parent_sd);
 
 	ret = sysfs_link_sibling(sd);
 	if (ret)
 		return ret;
 
 	/* Update timestamps on the parent */
-	ps_iattr = acxt->parent_sd->s_iattr;
+	ps_iattr = parent_sd->s_iattr;
 	if (ps_iattr) {
 		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
 		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
@@ -490,14 +471,32 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
 	return path;
 }
 
+void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
+{
+	char *path;
+
+	path = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (path) {
+		sysfs_pathname(parent, path);
+		strlcat(path, "/", PATH_MAX);
+		strlcat(path, name, PATH_MAX);
+	}
+
+	WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n",
+	     path ? path : name);
+
+	kfree(path);
+}
+
 /**
  *	sysfs_add_one - add sysfs_dirent to parent
  *	@acxt: addrm context to use
  *	@sd: sysfs_dirent to be added
+ *	@parent_sd: the parent sysfs_dirent to add @sd to
  *
- *	Get @acxt->parent_sd and set sd->s_parent to it and increment
- *	nlink of parent inode if @sd is a directory and link into the
- *	children list of the parent.
+ *	Get @parent_sd and set @sd->s_parent to it and increment nlink of
+ *	the parent inode if @sd is a directory and link into the children
+ *	list of the parent.
  *
  *	This function should be called between calls to
  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -510,23 +509,15 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
  *	0 on success, -EEXIST if entry with the given name already
  *	exists.
  */
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+		  struct sysfs_dirent *parent_sd)
 {
 	int ret;
 
-	ret = __sysfs_add_one(acxt, sd);
-	if (ret == -EEXIST) {
-		char *path = kzalloc(PATH_MAX, GFP_KERNEL);
-		WARN(1, KERN_WARNING
-		     "sysfs: cannot create duplicate filename '%s'\n",
-		     (path == NULL) ? sd->s_name
-				    : (sysfs_pathname(acxt->parent_sd, path),
-				       strlcat(path, "/", PATH_MAX),
-				       strlcat(path, sd->s_name, PATH_MAX),
-				       path));
-		kfree(path);
-	}
+	ret = __sysfs_add_one(acxt, sd, parent_sd);
 
+	if (ret == -EEXIST)
+		sysfs_warn_dup(parent_sd, sd->s_name);
 	return ret;
 }
 
@@ -545,16 +536,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
  *	LOCKING:
  *	Determined by sysfs_addrm_start().
  */
-void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
+			     struct sysfs_dirent *sd)
 {
 	struct sysfs_inode_attrs *ps_iattr;
 
-	BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
+	/*
+	 * Removal can be called multiple times on the same node.  Only the
+	 * first invocation is effective and puts the base ref.
+	 */
+	if (sd->s_flags & SYSFS_FLAG_REMOVED)
+		return;
 
 	sysfs_unlink_sibling(sd);
 
 	/* Update timestamps on the parent */
-	ps_iattr = acxt->parent_sd->s_iattr;
+	ps_iattr = sd->s_parent->s_iattr;
 	if (ps_iattr) {
 		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
 		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
@@ -577,6 +574,7 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
  *	sysfs_mutex is released.
  */
 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
+	__releases(sysfs_mutex)
 {
 	/* release resources acquired by sysfs_addrm_start() */
 	mutex_unlock(&sysfs_mutex);
@@ -588,7 +586,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 		acxt->removed = sd->u.removed_list;
 
 		sysfs_deactivate(sd);
-		unmap_bin_file(sd);
+		sysfs_unmap_bin_file(sd);
 		sysfs_put(sd);
 	}
 }
@@ -597,6 +595,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
  *	sysfs_find_dirent - find sysfs_dirent with the given name
  *	@parent_sd: sysfs_dirent to search under
  *	@name: name to look for
+ *	@ns: the namespace tag to use
  *
  *	Look for sysfs_dirent with name @name under @parent_sd.
  *
@@ -607,8 +606,8 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
  *	Pointer to sysfs_dirent if found, NULL if not.
  */
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
-				       const void *ns,
-				       const unsigned char *name)
+				       const unsigned char *name,
+				       const void *ns)
 {
 	struct rb_node *node = parent_sd->s_dir.children.rb_node;
 	unsigned int hash;
@@ -620,13 +619,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 		return NULL;
 	}
 
-	hash = sysfs_name_hash(ns, name);
+	hash = sysfs_name_hash(name, ns);
 	while (node) {
 		struct sysfs_dirent *sd;
 		int result;
 
 		sd = to_sysfs_dirent(node);
-		result = sysfs_name_compare(hash, ns, name, sd);
+		result = sysfs_name_compare(hash, name, ns, sd);
 		if (result < 0)
 			node = node->rb_left;
 		else if (result > 0)
@@ -638,9 +637,10 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 }
 
 /**
- *	sysfs_get_dirent - find and get sysfs_dirent with the given name
+ *	sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
  *	@parent_sd: sysfs_dirent to search under
  *	@name: name to look for
+ *	@ns: the namespace tag to use
  *
  *	Look for sysfs_dirent with name @name under @parent_sd and get
  *	it if found.
@@ -651,24 +651,25 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
  *	RETURNS:
  *	Pointer to sysfs_dirent if found, NULL if not.
  */
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name)
+struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
+					 const unsigned char *name,
+					 const void *ns)
 {
 	struct sysfs_dirent *sd;
 
 	mutex_lock(&sysfs_mutex);
-	sd = sysfs_find_dirent(parent_sd, ns, name);
+	sd = sysfs_find_dirent(parent_sd, name, ns);
 	sysfs_get(sd);
 	mutex_unlock(&sysfs_mutex);
 
 	return sd;
 }
-EXPORT_SYMBOL_GPL(sysfs_get_dirent);
+EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
 
 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
-	enum kobj_ns_type type, const void *ns, const char *name,
-	struct sysfs_dirent **p_sd)
+		      enum kobj_ns_type type,
+		      const char *name, const void *ns,
+		      struct sysfs_dirent **p_sd)
 {
 	umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
 	struct sysfs_addrm_cxt acxt;
@@ -685,8 +686,8 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 	sd->s_dir.kobj = kobj;
 
 	/* link in */
-	sysfs_addrm_start(&acxt, parent_sd);
-	rc = sysfs_add_one(&acxt, sd);
+	sysfs_addrm_start(&acxt);
+	rc = sysfs_add_one(&acxt, sd, parent_sd);
 	sysfs_addrm_finish(&acxt);
 
 	if (rc == 0)
@@ -701,7 +702,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
 			struct sysfs_dirent **p_sd)
 {
 	return create_dir(kobj, kobj->sd,
-			  KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
+			  KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
 }
 
 /**
@@ -730,14 +731,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
 }
 
 /**
- *	sysfs_create_dir - create a directory for an object.
- *	@kobj:		object we're creating directory for.
+ * sysfs_create_dir_ns - create a directory for an object with a namespace tag
+ * @kobj: object we're creating directory for
+ * @ns: the namespace tag to use
  */
-int sysfs_create_dir(struct kobject *kobj)
+int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	enum kobj_ns_type type;
 	struct sysfs_dirent *parent_sd, *sd;
-	const void *ns = NULL;
 	int error = 0;
 
 	BUG_ON(!kobj);
@@ -750,11 +751,9 @@ int sysfs_create_dir(struct kobject *kobj)
 	if (!parent_sd)
 		return -ENOENT;
 
-	if (sysfs_ns_type(parent_sd))
-		ns = kobj->ktype->namespace(kobj);
 	type = sysfs_read_ns_type(kobj);
 
-	error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
+	error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd);
 	if (!error)
 		kobj->sd = sd;
 	return error;
@@ -776,7 +775,7 @@ static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	type = sysfs_ns_type(parent_sd);
 	ns = sysfs_info(dir->i_sb)->ns[type];
 
-	sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
+	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
 
 	/* no such entry */
 	if (!sd) {
@@ -807,41 +806,128 @@ const struct inode_operations sysfs_dir_inode_operations = {
 	.setxattr	= sysfs_setxattr,
 };
 
-static void remove_dir(struct sysfs_dirent *sd)
+static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
 {
-	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *last;
 
-	sysfs_addrm_start(&acxt, sd->s_parent);
-	sysfs_remove_one(&acxt, sd);
-	sysfs_addrm_finish(&acxt);
+	while (true) {
+		struct rb_node *rbn;
+
+		last = pos;
+
+		if (sysfs_type(pos) != SYSFS_DIR)
+			break;
+
+		rbn = rb_first(&pos->s_dir.children);
+		if (!rbn)
+			break;
+
+		pos = to_sysfs_dirent(rbn);
+	}
+
+	return last;
 }
 
-void sysfs_remove_subdir(struct sysfs_dirent *sd)
+/**
+ * sysfs_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: sysfs_dirent whose descendants to walk
+ *
+ * Find the next descendant to visit for post-order traversal of @root's
+ * descendants.  @root is included in the iteration and the last node to be
+ * visited.
+ */
+static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
+						       struct sysfs_dirent *root)
 {
-	remove_dir(sd);
+	struct rb_node *rbn;
+
+	lockdep_assert_held(&sysfs_mutex);
+
+	/* if first iteration, visit leftmost descendant which may be root */
+	if (!pos)
+		return sysfs_leftmost_descendant(root);
+
+	/* if we visited @root, we're done */
+	if (pos == root)
+		return NULL;
+
+	/* if there's an unvisited sibling, visit its leftmost descendant */
+	rbn = rb_next(&pos->s_rb);
+	if (rbn)
+		return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
+
+	/* no sibling left, visit parent */
+	return pos->s_parent;
 }
 
+static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
+			   struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *pos, *next;
+
+	if (!sd)
+		return;
 
-static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
+	pr_debug("sysfs %s: removing\n", sd->s_name);
+
+	next = NULL;
+	do {
+		pos = next;
+		next = sysfs_next_descendant_post(pos, sd);
+		if (pos)
+			sysfs_remove_one(acxt, pos);
+	} while (next);
+}
+
+/**
+ * sysfs_remove - remove a sysfs_dirent recursively
+ * @sd: the sysfs_dirent to remove
+ *
+ * Remove @sd along with all its subdirectories and files.
+ */
+void sysfs_remove(struct sysfs_dirent *sd)
 {
 	struct sysfs_addrm_cxt acxt;
-	struct rb_node *pos;
 
-	if (!dir_sd)
-		return;
+	sysfs_addrm_start(&acxt);
+	__sysfs_remove(&acxt, sd);
+	sysfs_addrm_finish(&acxt);
+}
 
-	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
-	sysfs_addrm_start(&acxt, dir_sd);
-	pos = rb_first(&dir_sd->s_dir.children);
-	while (pos) {
-		struct sysfs_dirent *sd = to_sysfs_dirent(pos);
-		pos = rb_next(pos);
-		if (sysfs_type(sd) != SYSFS_DIR)
-			sysfs_remove_one(&acxt, sd);
+/**
+ * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
+ * @dir_sd: parent of the target
+ * @name: name of the sysfs_dirent to remove
+ * @ns: namespace tag of the sysfs_dirent to remove
+ *
+ * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
+ * it.  Returns 0 on success, -ENOENT if such entry doesn't exist.
+ */
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
+			  const void *ns)
+{
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *sd;
+
+	if (!dir_sd) {
+		WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
+			name);
+		return -ENOENT;
 	}
+
+	sysfs_addrm_start(&acxt);
+
+	sd = sysfs_find_dirent(dir_sd, name, ns);
+	if (sd)
+		__sysfs_remove(&acxt, sd);
+
 	sysfs_addrm_finish(&acxt);
 
-	remove_dir(dir_sd);
+	if (sd)
+		return 0;
+	else
+		return -ENOENT;
 }
 
 /**
@@ -852,21 +938,34 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
  *	the directory before we remove the directory, and we've inlined
  *	what used to be sysfs_rmdir() below, instead of calling separately.
  */
-
 void sysfs_remove_dir(struct kobject *kobj)
 {
 	struct sysfs_dirent *sd = kobj->sd;
 
-	spin_lock(&sysfs_assoc_lock);
+	/*
+	 * In general, kboject owner is responsible for ensuring removal
+	 * doesn't race with other operations and sysfs doesn't provide any
+	 * protection; however, when @kobj is used as a symlink target, the
+	 * symlinking entity usually doesn't own @kobj and thus has no
+	 * control over removal.  @kobj->sd may be removed anytime and
+	 * symlink code may end up dereferencing an already freed sd.
+	 *
+	 * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation
+	 * against symlink operations so that symlink code can safely
+	 * dereference @kobj->sd.
+	 */
+	spin_lock(&sysfs_symlink_target_lock);
 	kobj->sd = NULL;
-	spin_unlock(&sysfs_assoc_lock);
+	spin_unlock(&sysfs_symlink_target_lock);
 
-	__sysfs_remove_dir(sd);
+	if (sd) {
+		WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
+		sysfs_remove(sd);
+	}
 }
 
-int sysfs_rename(struct sysfs_dirent *sd,
-	struct sysfs_dirent *new_parent_sd, const void *new_ns,
-	const char *new_name)
+int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
+		 const char *new_name, const void *new_ns)
 {
 	int error;
 
@@ -878,7 +977,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 		goto out;	/* nothing to rename */
 
 	error = -EEXIST;
-	if (sysfs_find_dirent(new_parent_sd, new_ns, new_name))
+	if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
 		goto out;
 
 	/* rename sysfs_dirent */
@@ -899,7 +998,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 	sysfs_get(new_parent_sd);
 	sysfs_put(sd->s_parent);
 	sd->s_ns = new_ns;
-	sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
+	sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
 	sd->s_parent = new_parent_sd;
 	sysfs_link_sibling(sd);
 
@@ -909,30 +1008,25 @@ int sysfs_rename(struct sysfs_dirent *sd,
 	return error;
 }
 
-int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
+int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
+			const void *new_ns)
 {
 	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
-	const void *new_ns = NULL;
-
-	if (sysfs_ns_type(parent_sd))
-		new_ns = kobj->ktype->namespace(kobj);
 
-	return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
+	return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns);
 }
 
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
+int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
+		      const void *new_ns)
 {
 	struct sysfs_dirent *sd = kobj->sd;
 	struct sysfs_dirent *new_parent_sd;
-	const void *new_ns = NULL;
 
 	BUG_ON(!sd->s_parent);
-	if (sysfs_ns_type(sd->s_parent))
-		new_ns = kobj->ktype->namespace(kobj);
 	new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
 		new_parent_kobj->sd : &sysfs_root;
 
-	return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name);
+	return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns);
 }
 
 /* Relationship between s_mode and the DT_xxx types */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 15ef5eb13663..79b5da2acbe1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -21,70 +21,114 @@
 #include <linux/mutex.h>
 #include <linux/limits.h>
 #include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/mm.h>
 
 #include "sysfs.h"
 
 /*
- * There's one sysfs_buffer for each open file and one
- * sysfs_open_dirent for each sysfs_dirent with one or more open
- * files.
+ * There's one sysfs_open_file for each open file and one sysfs_open_dirent
+ * for each sysfs_dirent with one or more open files.
  *
- * filp->private_data points to sysfs_buffer and
- * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open
- * is protected by sysfs_open_dirent_lock.
+ * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open is
+ * protected by sysfs_open_dirent_lock.
+ *
+ * filp->private_data points to seq_file whose ->private points to
+ * sysfs_open_file.  sysfs_open_files are chained at
+ * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
  */
 static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
+static DEFINE_MUTEX(sysfs_open_file_mutex);
 
 struct sysfs_open_dirent {
 	atomic_t		refcnt;
 	atomic_t		event;
 	wait_queue_head_t	poll;
-	struct list_head	buffers; /* goes through sysfs_buffer.list */
+	struct list_head	files; /* goes through sysfs_open_file.list */
 };
 
-struct sysfs_buffer {
-	size_t			count;
-	loff_t			pos;
-	char			*page;
-	const struct sysfs_ops	*ops;
+struct sysfs_open_file {
+	struct sysfs_dirent	*sd;
+	struct file		*file;
 	struct mutex		mutex;
-	int			needs_read_fill;
 	int			event;
 	struct list_head	list;
+
+	bool			mmapped;
+	const struct vm_operations_struct *vm_ops;
 };
 
-/**
- *	fill_read_buffer - allocate and fill buffer from object.
- *	@dentry:	dentry pointer.
- *	@buffer:	data buffer for file.
- *
- *	Allocate @buffer->page, if it hasn't been already, then call the
- *	kobject's show() method to fill the buffer with this attribute's
- *	data.
- *	This is called only once, on the file's first read unless an error
- *	is returned.
+static bool sysfs_is_bin(struct sysfs_dirent *sd)
+{
+	return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
+}
+
+static struct sysfs_open_file *sysfs_of(struct file *file)
+{
+	return ((struct seq_file *)file->private_data)->private;
+}
+
+/*
+ * Determine ktype->sysfs_ops for the given sysfs_dirent.  This function
+ * must be called while holding an active reference.
  */
-static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer)
+static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	const struct sysfs_ops *ops = buffer->ops;
-	int ret = 0;
+	struct kobject *kobj = sd->s_parent->s_dir.kobj;
+
+	if (!sysfs_ignore_lockdep(sd))
+		lockdep_assert_held(sd);
+	return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
+}
+
+/*
+ * Reads on sysfs are handled through seq_file, which takes care of hairy
+ * details like buffering and seeking.  The following function pipes
+ * sysfs_ops->show() result through seq_file.
+ */
+static int sysfs_seq_show(struct seq_file *sf, void *v)
+{
+	struct sysfs_open_file *of = sf->private;
+	struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+	const struct sysfs_ops *ops;
+	char *buf;
 	ssize_t count;
 
-	if (!buffer->page)
-		buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
-	if (!buffer->page)
-		return -ENOMEM;
+	/* acquire buffer and ensure that it's >= PAGE_SIZE */
+	count = seq_get_buf(sf, &buf);
+	if (count < PAGE_SIZE) {
+		seq_commit(sf, -1);
+		return 0;
+	}
 
-	/* need attr_sd for attr and ops, its parent for kobj */
-	if (!sysfs_get_active(attr_sd))
+	/*
+	 * Need @of->sd for attr and ops, its parent for kobj.  @of->mutex
+	 * nests outside active ref and is just to ensure that the ops
+	 * aren't called concurrently for the same open file.
+	 */
+	mutex_lock(&of->mutex);
+	if (!sysfs_get_active(of->sd)) {
+		mutex_unlock(&of->mutex);
 		return -ENODEV;
+	}
 
-	buffer->event = atomic_read(&attr_sd->s_attr.open->event);
-	count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page);
+	of->event = atomic_read(&of->sd->s_attr.open->event);
 
-	sysfs_put_active(attr_sd);
+	/*
+	 * Lookup @ops and invoke show().  Control may reach here via seq
+	 * file lseek even if @ops->show() isn't implemented.
+	 */
+	ops = sysfs_file_ops(of->sd);
+	if (ops->show)
+		count = ops->show(kobj, of->sd->s_attr.attr, buf);
+	else
+		count = 0;
+
+	sysfs_put_active(of->sd);
+	mutex_unlock(&of->mutex);
+
+	if (count < 0)
+		return count;
 
 	/*
 	 * The code works fine with PAGE_SIZE return but it's likely to
@@ -96,155 +140,389 @@ static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer)
 		/* Try to struggle along */
 		count = PAGE_SIZE - 1;
 	}
-	if (count >= 0) {
-		buffer->needs_read_fill = 0;
-		buffer->count = count;
-	} else {
-		ret = count;
-	}
-	return ret;
+	seq_commit(sf, count);
+	return 0;
 }
 
-/**
- *	sysfs_read_file - read an attribute.
- *	@file:	file pointer.
- *	@buf:	buffer to fill.
- *	@count:	number of bytes to read.
- *	@ppos:	starting offset in file.
- *
- *	Userspace wants to read an attribute file. The attribute descriptor
- *	is in the file's ->d_fsdata. The target object is in the directory's
- *	->d_fsdata.
- *
- *	We call fill_read_buffer() to allocate and fill the buffer from the
- *	object's show() method exactly once (if the read is happening from
- *	the beginning of the file). That should fill the entire buffer with
- *	all the data the object has to offer for that attribute.
- *	We then call flush_read_buffer() to copy the buffer to userspace
- *	in the increments specified.
+/*
+ * Read method for bin files.  As reading a bin file can have side-effects,
+ * the exact offset and bytes specified in read(2) call should be passed to
+ * the read callback making it difficult to use seq_file.  Implement
+ * simplistic custom buffering for bin files.
  */
-
-static ssize_t
-sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
+			      size_t bytes, loff_t *off)
 {
-	struct sysfs_buffer *buffer = file->private_data;
-	ssize_t retval = 0;
+	struct sysfs_open_file *of = sysfs_of(file);
+	struct bin_attribute *battr = of->sd->s_attr.bin_attr;
+	struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+	loff_t size = file_inode(file)->i_size;
+	int count = min_t(size_t, bytes, PAGE_SIZE);
+	loff_t offs = *off;
+	char *buf;
+
+	if (!bytes)
+		return 0;
 
-	mutex_lock(&buffer->mutex);
-	if (buffer->needs_read_fill || *ppos == 0) {
-		retval = fill_read_buffer(file->f_path.dentry, buffer);
-		if (retval)
-			goto out;
+	if (size) {
+		if (offs > size)
+			return 0;
+		if (offs + count > size)
+			count = size - offs;
 	}
-	pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
-		 __func__, count, *ppos, buffer->page);
-	retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
-					 buffer->count);
-out:
-	mutex_unlock(&buffer->mutex);
-	return retval;
-}
 
-/**
- *	fill_write_buffer - copy buffer from userspace.
- *	@buffer:	data buffer for file.
- *	@buf:		data from user.
- *	@count:		number of bytes in @userbuf.
- *
- *	Allocate @buffer->page if it hasn't been already, then
- *	copy the user-supplied buffer into it.
- */
-static int fill_write_buffer(struct sysfs_buffer *buffer,
-			     const char __user *buf, size_t count)
-{
-	int error;
-
-	if (!buffer->page)
-		buffer->page = (char *)get_zeroed_page(GFP_KERNEL);
-	if (!buffer->page)
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
 		return -ENOMEM;
 
-	if (count >= PAGE_SIZE)
-		count = PAGE_SIZE - 1;
-	error = copy_from_user(buffer->page, buf, count);
-	buffer->needs_read_fill = 1;
-	/* if buf is assumed to contain a string, terminate it by \0,
-	   so e.g. sscanf() can scan the string easily */
-	buffer->page[count] = 0;
-	return error ? -EFAULT : count;
-}
+	/* need of->sd for battr, its parent for kobj */
+	mutex_lock(&of->mutex);
+	if (!sysfs_get_active(of->sd)) {
+		count = -ENODEV;
+		mutex_unlock(&of->mutex);
+		goto out_free;
+	}
+
+	if (battr->read)
+		count = battr->read(file, kobj, battr, buf, offs, count);
+	else
+		count = -EIO;
 
+	sysfs_put_active(of->sd);
+	mutex_unlock(&of->mutex);
+
+	if (count < 0)
+		goto out_free;
+
+	if (copy_to_user(userbuf, buf, count)) {
+		count = -EFAULT;
+		goto out_free;
+	}
+
+	pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
+
+	*off = offs + count;
+
+ out_free:
+	kfree(buf);
+	return count;
+}
 
 /**
- *	flush_write_buffer - push buffer to kobject.
- *	@dentry:	dentry to the attribute
- *	@buffer:	data buffer for file.
- *	@count:		number of bytes
+ * flush_write_buffer - push buffer to kobject
+ * @of: open file
+ * @buf: data buffer for file
+ * @off: file offset to write to
+ * @count: number of bytes
  *
- *	Get the correct pointers for the kobject and the attribute we're
- *	dealing with, then call the store() method for the attribute,
- *	passing the buffer that we acquired in fill_write_buffer().
+ * Get the correct pointers for the kobject and the attribute we're dealing
+ * with, then call the store() method for it with @buf.
  */
-static int flush_write_buffer(struct dentry *dentry,
-			      struct sysfs_buffer *buffer, size_t count)
+static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
+			      size_t count)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	const struct sysfs_ops *ops = buffer->ops;
-	int rc;
+	struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+	int rc = 0;
 
-	/* need attr_sd for attr and ops, its parent for kobj */
-	if (!sysfs_get_active(attr_sd))
+	/*
+	 * Need @of->sd for attr and ops, its parent for kobj.  @of->mutex
+	 * nests outside active ref and is just to ensure that the ops
+	 * aren't called concurrently for the same open file.
+	 */
+	mutex_lock(&of->mutex);
+	if (!sysfs_get_active(of->sd)) {
+		mutex_unlock(&of->mutex);
 		return -ENODEV;
+	}
 
-	rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count);
+	if (sysfs_is_bin(of->sd)) {
+		struct bin_attribute *battr = of->sd->s_attr.bin_attr;
 
-	sysfs_put_active(attr_sd);
+		rc = -EIO;
+		if (battr->write)
+			rc = battr->write(of->file, kobj, battr, buf, off,
+					  count);
+	} else {
+		const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
+
+		rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
+	}
+
+	sysfs_put_active(of->sd);
+	mutex_unlock(&of->mutex);
 
 	return rc;
 }
 
-
 /**
- *	sysfs_write_file - write an attribute.
- *	@file:	file pointer
- *	@buf:	data to write
- *	@count:	number of bytes
- *	@ppos:	starting offset
+ * sysfs_write_file - write an attribute
+ * @file: file pointer
+ * @user_buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ *
+ * Copy data in from userland and pass it to the matching
+ * sysfs_ops->store() by invoking flush_write_buffer().
  *
- *	Similar to sysfs_read_file(), though working in the opposite direction.
- *	We allocate and fill the data from the user in fill_write_buffer(),
- *	then push it to the kobject in flush_write_buffer().
- *	There is no easy way for us to know if userspace is only doing a partial
- *	write, so we don't support them. We expect the entire buffer to come
- *	on the first write.
- *	Hint: if you're writing a value, first read the file, modify only the
- *	the value you're changing, then write entire buffer back.
+ * There is no easy way for us to know if userspace is only doing a partial
+ * write, so we don't support them. We expect the entire buffer to come on
+ * the first write.  Hint: if you're writing a value, first read the file,
+ * modify only the the value you're changing, then write entire buffer
+ * back.
  */
-static ssize_t sysfs_write_file(struct file *file, const char __user *buf,
+static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
 				size_t count, loff_t *ppos)
 {
-	struct sysfs_buffer *buffer = file->private_data;
-	ssize_t len;
+	struct sysfs_open_file *of = sysfs_of(file);
+	ssize_t len = min_t(size_t, count, PAGE_SIZE);
+	loff_t size = file_inode(file)->i_size;
+	char *buf;
+
+	if (sysfs_is_bin(of->sd) && size) {
+		if (size <= *ppos)
+			return 0;
+		len = min_t(ssize_t, len, size - *ppos);
+	}
 
-	mutex_lock(&buffer->mutex);
-	len = fill_write_buffer(buffer, buf, count);
-	if (len > 0)
-		len = flush_write_buffer(file->f_path.dentry, buffer, len);
+	if (!len)
+		return 0;
+
+	buf = kmalloc(len + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, user_buf, len)) {
+		len = -EFAULT;
+		goto out_free;
+	}
+	buf[len] = '\0';	/* guarantee string termination */
+
+	len = flush_write_buffer(of, buf, *ppos, len);
 	if (len > 0)
 		*ppos += len;
-	mutex_unlock(&buffer->mutex);
+out_free:
+	kfree(buf);
 	return len;
 }
 
+static void sysfs_bin_vma_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+
+	if (!of->vm_ops)
+		return;
+
+	if (!sysfs_get_active(of->sd))
+		return;
+
+	if (of->vm_ops->open)
+		of->vm_ops->open(vma);
+
+	sysfs_put_active(of->sd);
+}
+
+static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	int ret;
+
+	if (!of->vm_ops)
+		return VM_FAULT_SIGBUS;
+
+	if (!sysfs_get_active(of->sd))
+		return VM_FAULT_SIGBUS;
+
+	ret = VM_FAULT_SIGBUS;
+	if (of->vm_ops->fault)
+		ret = of->vm_ops->fault(vma, vmf);
+
+	sysfs_put_active(of->sd);
+	return ret;
+}
+
+static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma,
+				  struct vm_fault *vmf)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	int ret;
+
+	if (!of->vm_ops)
+		return VM_FAULT_SIGBUS;
+
+	if (!sysfs_get_active(of->sd))
+		return VM_FAULT_SIGBUS;
+
+	ret = 0;
+	if (of->vm_ops->page_mkwrite)
+		ret = of->vm_ops->page_mkwrite(vma, vmf);
+	else
+		file_update_time(file);
+
+	sysfs_put_active(of->sd);
+	return ret;
+}
+
+static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
+			    void *buf, int len, int write)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	int ret;
+
+	if (!of->vm_ops)
+		return -EINVAL;
+
+	if (!sysfs_get_active(of->sd))
+		return -EINVAL;
+
+	ret = -EINVAL;
+	if (of->vm_ops->access)
+		ret = of->vm_ops->access(vma, addr, buf, len, write);
+
+	sysfs_put_active(of->sd);
+	return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int sysfs_bin_set_policy(struct vm_area_struct *vma,
+				struct mempolicy *new)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	int ret;
+
+	if (!of->vm_ops)
+		return 0;
+
+	if (!sysfs_get_active(of->sd))
+		return -EINVAL;
+
+	ret = 0;
+	if (of->vm_ops->set_policy)
+		ret = of->vm_ops->set_policy(vma, new);
+
+	sysfs_put_active(of->sd);
+	return ret;
+}
+
+static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
+					      unsigned long addr)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	struct mempolicy *pol;
+
+	if (!of->vm_ops)
+		return vma->vm_policy;
+
+	if (!sysfs_get_active(of->sd))
+		return vma->vm_policy;
+
+	pol = vma->vm_policy;
+	if (of->vm_ops->get_policy)
+		pol = of->vm_ops->get_policy(vma, addr);
+
+	sysfs_put_active(of->sd);
+	return pol;
+}
+
+static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+			     const nodemask_t *to, unsigned long flags)
+{
+	struct file *file = vma->vm_file;
+	struct sysfs_open_file *of = sysfs_of(file);
+	int ret;
+
+	if (!of->vm_ops)
+		return 0;
+
+	if (!sysfs_get_active(of->sd))
+		return 0;
+
+	ret = 0;
+	if (of->vm_ops->migrate)
+		ret = of->vm_ops->migrate(vma, from, to, flags);
+
+	sysfs_put_active(of->sd);
+	return ret;
+}
+#endif
+
+static const struct vm_operations_struct sysfs_bin_vm_ops = {
+	.open		= sysfs_bin_vma_open,
+	.fault		= sysfs_bin_fault,
+	.page_mkwrite	= sysfs_bin_page_mkwrite,
+	.access		= sysfs_bin_access,
+#ifdef CONFIG_NUMA
+	.set_policy	= sysfs_bin_set_policy,
+	.get_policy	= sysfs_bin_get_policy,
+	.migrate	= sysfs_bin_migrate,
+#endif
+};
+
+static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct sysfs_open_file *of = sysfs_of(file);
+	struct bin_attribute *battr = of->sd->s_attr.bin_attr;
+	struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
+	int rc;
+
+	mutex_lock(&of->mutex);
+
+	/* need of->sd for battr, its parent for kobj */
+	rc = -ENODEV;
+	if (!sysfs_get_active(of->sd))
+		goto out_unlock;
+
+	if (!battr->mmap)
+		goto out_put;
+
+	rc = battr->mmap(file, kobj, battr, vma);
+	if (rc)
+		goto out_put;
+
+	/*
+	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+	 * to satisfy versions of X which crash if the mmap fails: that
+	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
+	 */
+	if (vma->vm_file != file)
+		goto out_put;
+
+	rc = -EINVAL;
+	if (of->mmapped && of->vm_ops != vma->vm_ops)
+		goto out_put;
+
+	/*
+	 * It is not possible to successfully wrap close.
+	 * So error if someone is trying to use close.
+	 */
+	rc = -EINVAL;
+	if (vma->vm_ops && vma->vm_ops->close)
+		goto out_put;
+
+	rc = 0;
+	of->mmapped = 1;
+	of->vm_ops = vma->vm_ops;
+	vma->vm_ops = &sysfs_bin_vm_ops;
+out_put:
+	sysfs_put_active(of->sd);
+out_unlock:
+	mutex_unlock(&of->mutex);
+
+	return rc;
+}
+
 /**
  *	sysfs_get_open_dirent - get or create sysfs_open_dirent
  *	@sd: target sysfs_dirent
- *	@buffer: sysfs_buffer for this instance of open
+ *	@of: sysfs_open_file for this instance of open
  *
  *	If @sd->s_attr.open exists, increment its reference count;
- *	otherwise, create one.  @buffer is chained to the buffers
- *	list.
+ *	otherwise, create one.  @of is chained to the files list.
  *
  *	LOCKING:
  *	Kernel thread context (may sleep).
@@ -253,11 +531,12 @@ static ssize_t sysfs_write_file(struct file *file, const char __user *buf,
  *	0 on success, -errno on failure.
  */
 static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
-				 struct sysfs_buffer *buffer)
+				 struct sysfs_open_file *of)
 {
 	struct sysfs_open_dirent *od, *new_od = NULL;
 
  retry:
+	mutex_lock(&sysfs_open_file_mutex);
 	spin_lock_irq(&sysfs_open_dirent_lock);
 
 	if (!sd->s_attr.open && new_od) {
@@ -268,10 +547,11 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
 	od = sd->s_attr.open;
 	if (od) {
 		atomic_inc(&od->refcnt);
-		list_add_tail(&buffer->list, &od->buffers);
+		list_add_tail(&of->list, &od->files);
 	}
 
 	spin_unlock_irq(&sysfs_open_dirent_lock);
+	mutex_unlock(&sysfs_open_file_mutex);
 
 	if (od) {
 		kfree(new_od);
@@ -286,36 +566,40 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
 	atomic_set(&new_od->refcnt, 0);
 	atomic_set(&new_od->event, 1);
 	init_waitqueue_head(&new_od->poll);
-	INIT_LIST_HEAD(&new_od->buffers);
+	INIT_LIST_HEAD(&new_od->files);
 	goto retry;
 }
 
 /**
  *	sysfs_put_open_dirent - put sysfs_open_dirent
  *	@sd: target sysfs_dirent
- *	@buffer: associated sysfs_buffer
+ *	@of: associated sysfs_open_file
  *
- *	Put @sd->s_attr.open and unlink @buffer from the buffers list.
- *	If reference count reaches zero, disassociate and free it.
+ *	Put @sd->s_attr.open and unlink @of from the files list.  If
+ *	reference count reaches zero, disassociate and free it.
  *
  *	LOCKING:
  *	None.
  */
 static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
-				  struct sysfs_buffer *buffer)
+				  struct sysfs_open_file *of)
 {
 	struct sysfs_open_dirent *od = sd->s_attr.open;
 	unsigned long flags;
 
+	mutex_lock(&sysfs_open_file_mutex);
 	spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
 
-	list_del(&buffer->list);
+	if (of)
+		list_del(&of->list);
+
 	if (atomic_dec_and_test(&od->refcnt))
 		sd->s_attr.open = NULL;
 	else
 		od = NULL;
 
 	spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
+	mutex_unlock(&sysfs_open_file_mutex);
 
 	kfree(od);
 }
@@ -324,67 +608,81 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 {
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct sysfs_buffer *buffer;
-	const struct sysfs_ops *ops;
+	struct sysfs_open_file *of;
+	bool has_read, has_write;
 	int error = -EACCES;
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active(attr_sd))
 		return -ENODEV;
 
-	/* every kobject with an attribute needs a ktype assigned */
-	if (kobj->ktype && kobj->ktype->sysfs_ops)
-		ops = kobj->ktype->sysfs_ops;
-	else {
-		WARN(1, KERN_ERR
-		     "missing sysfs attribute operations for kobject: %s\n",
-		     kobject_name(kobj));
-		goto err_out;
-	}
+	if (sysfs_is_bin(attr_sd)) {
+		struct bin_attribute *battr = attr_sd->s_attr.bin_attr;
 
-	/* File needs write support.
-	 * The inode's perms must say it's ok,
-	 * and we must have a store method.
-	 */
-	if (file->f_mode & FMODE_WRITE) {
-		if (!(inode->i_mode & S_IWUGO) || !ops->store)
-			goto err_out;
-	}
+		has_read = battr->read || battr->mmap;
+		has_write = battr->write || battr->mmap;
+	} else {
+		const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
 
-	/* File needs read support.
-	 * The inode's perms must say it's ok, and we there
-	 * must be a show method for it.
-	 */
-	if (file->f_mode & FMODE_READ) {
-		if (!(inode->i_mode & S_IRUGO) || !ops->show)
+		/* every kobject with an attribute needs a ktype assigned */
+		if (WARN(!ops, KERN_ERR
+			 "missing sysfs attribute operations for kobject: %s\n",
+			 kobject_name(kobj)))
 			goto err_out;
+
+		has_read = ops->show;
+		has_write = ops->store;
 	}
 
-	/* No error? Great, allocate a buffer for the file, and store it
-	 * it in file->private_data for easy access.
-	 */
+	/* check perms and supported operations */
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (!(inode->i_mode & S_IWUGO) || !has_write))
+		goto err_out;
+
+	if ((file->f_mode & FMODE_READ) &&
+	    (!(inode->i_mode & S_IRUGO) || !has_read))
+		goto err_out;
+
+	/* allocate a sysfs_open_file for the file */
 	error = -ENOMEM;
-	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
-	if (!buffer)
+	of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
+	if (!of)
 		goto err_out;
 
-	mutex_init(&buffer->mutex);
-	buffer->needs_read_fill = 1;
-	buffer->ops = ops;
-	file->private_data = buffer;
+	mutex_init(&of->mutex);
+	of->sd = attr_sd;
+	of->file = file;
 
-	/* make sure we have open dirent struct */
-	error = sysfs_get_open_dirent(attr_sd, buffer);
+	/*
+	 * Always instantiate seq_file even if read access doesn't use
+	 * seq_file or is not requested.  This unifies private data access
+	 * and readable regular files are the vast majority anyway.
+	 */
+	if (sysfs_is_bin(attr_sd))
+		error = single_open(file, NULL, of);
+	else
+		error = single_open(file, sysfs_seq_show, of);
 	if (error)
 		goto err_free;
 
+	/* seq_file clears PWRITE unconditionally, restore it if WRITE */
+	if (file->f_mode & FMODE_WRITE)
+		file->f_mode |= FMODE_PWRITE;
+
+	/* make sure we have open dirent struct */
+	error = sysfs_get_open_dirent(attr_sd, of);
+	if (error)
+		goto err_close;
+
 	/* open succeeded, put active references */
 	sysfs_put_active(attr_sd);
 	return 0;
 
- err_free:
-	kfree(buffer);
- err_out:
+err_close:
+	single_release(inode, file);
+err_free:
+	kfree(of);
+err_out:
 	sysfs_put_active(attr_sd);
 	return error;
 }
@@ -392,17 +690,41 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 static int sysfs_release(struct inode *inode, struct file *filp)
 {
 	struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
-	struct sysfs_buffer *buffer = filp->private_data;
+	struct sysfs_open_file *of = sysfs_of(filp);
 
-	sysfs_put_open_dirent(sd, buffer);
-
-	if (buffer->page)
-		free_page((unsigned long)buffer->page);
-	kfree(buffer);
+	sysfs_put_open_dirent(sd, of);
+	single_release(inode, filp);
+	kfree(of);
 
 	return 0;
 }
 
+void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
+{
+	struct sysfs_open_dirent *od;
+	struct sysfs_open_file *of;
+
+	if (!sysfs_is_bin(sd))
+		return;
+
+	spin_lock_irq(&sysfs_open_dirent_lock);
+	od = sd->s_attr.open;
+	if (od)
+		atomic_inc(&od->refcnt);
+	spin_unlock_irq(&sysfs_open_dirent_lock);
+	if (!od)
+		return;
+
+	mutex_lock(&sysfs_open_file_mutex);
+	list_for_each_entry(of, &od->files, list) {
+		struct inode *inode = file_inode(of->file);
+		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+	}
+	mutex_unlock(&sysfs_open_file_mutex);
+
+	sysfs_put_open_dirent(sd, NULL);
+}
+
 /* Sysfs attribute files are pollable.  The idea is that you read
  * the content and then you use 'poll' or 'select' to wait for
  * the content to change.  When the content changes (assuming the
@@ -418,7 +740,7 @@ static int sysfs_release(struct inode *inode, struct file *filp)
  */
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
-	struct sysfs_buffer *buffer = filp->private_data;
+	struct sysfs_open_file *of = sysfs_of(filp);
 	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
 	struct sysfs_open_dirent *od = attr_sd->s_attr.open;
 
@@ -430,13 +752,12 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 
 	sysfs_put_active(attr_sd);
 
-	if (buffer->event != atomic_read(&od->event))
+	if (of->event != atomic_read(&od->event))
 		goto trigger;
 
 	return DEFAULT_POLLMASK;
 
  trigger:
-	buffer->needs_read_fill = 1;
 	return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
 
@@ -466,9 +787,9 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
 	mutex_lock(&sysfs_mutex);
 
 	if (sd && dir)
-		sd = sysfs_find_dirent(sd, NULL, dir);
+		sd = sysfs_find_dirent(sd, dir, NULL);
 	if (sd && attr)
-		sd = sysfs_find_dirent(sd, NULL, attr);
+		sd = sysfs_find_dirent(sd, attr, NULL);
 	if (sd)
 		sysfs_notify_dirent(sd);
 
@@ -477,7 +798,7 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
 EXPORT_SYMBOL_GPL(sysfs_notify);
 
 const struct file_operations sysfs_file_operations = {
-	.read		= sysfs_read_file,
+	.read		= seq_read,
 	.write		= sysfs_write_file,
 	.llseek		= generic_file_llseek,
 	.open		= sysfs_open_file,
@@ -485,58 +806,25 @@ const struct file_operations sysfs_file_operations = {
 	.poll		= sysfs_poll,
 };
 
-static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
-			 const void **pns)
-{
-	struct sysfs_dirent *dir_sd = kobj->sd;
-	const struct sysfs_ops *ops;
-	const void *ns = NULL;
-	int err;
-
-	if (!dir_sd) {
-		WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n",
-			kobject_name(kobj));
-		return -ENOENT;
-	}
-
-	err = 0;
-	if (!sysfs_ns_type(dir_sd))
-		goto out;
-
-	err = -EINVAL;
-	if (!kobj->ktype)
-		goto out;
-	ops = kobj->ktype->sysfs_ops;
-	if (!ops)
-		goto out;
-	if (!ops->namespace)
-		goto out;
-
-	err = 0;
-	ns = ops->namespace(kobj, attr);
-out:
-	if (err) {
-		WARN(1, KERN_ERR
-		     "missing sysfs namespace attribute operation for kobject: %s\n",
-		     kobject_name(kobj));
-	}
-	*pns = ns;
-	return err;
-}
+const struct file_operations sysfs_bin_operations = {
+	.read		= sysfs_bin_read,
+	.write		= sysfs_write_file,
+	.llseek		= generic_file_llseek,
+	.mmap		= sysfs_bin_mmap,
+	.open		= sysfs_open_file,
+	.release	= sysfs_release,
+	.poll		= sysfs_poll,
+};
 
-int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-			const struct attribute *attr, int type, umode_t amode)
+int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
+			   const struct attribute *attr, int type,
+			   umode_t amode, const void *ns)
 {
 	umode_t mode = (amode & S_IALLUGO) | S_IFREG;
 	struct sysfs_addrm_cxt acxt;
 	struct sysfs_dirent *sd;
-	const void *ns;
 	int rc;
 
-	rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
-	if (rc)
-		return rc;
-
 	sd = sysfs_new_dirent(attr->name, mode, type);
 	if (!sd)
 		return -ENOMEM;
@@ -545,8 +833,8 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
 	sd->s_attr.attr = (void *)attr;
 	sysfs_dirent_init_lockdep(sd);
 
-	sysfs_addrm_start(&acxt, dir_sd);
-	rc = sysfs_add_one(&acxt, sd);
+	sysfs_addrm_start(&acxt);
+	rc = sysfs_add_one(&acxt, sd, dir_sd);
 	sysfs_addrm_finish(&acxt);
 
 	if (rc)
@@ -559,23 +847,25 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
 int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
 		   int type)
 {
-	return sysfs_add_file_mode(dir_sd, attr, type, attr->mode);
+	return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL);
 }
 
-
 /**
- *	sysfs_create_file - create an attribute file for an object.
- *	@kobj:	object we're creating for.
- *	@attr:	attribute descriptor.
+ * sysfs_create_file_ns - create an attribute file for an object with custom ns
+ * @kobj: object we're creating for
+ * @attr: attribute descriptor
+ * @ns: namespace the new file should belong to
  */
-int sysfs_create_file(struct kobject *kobj, const struct attribute *attr)
+int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
+			 const void *ns)
 {
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
+	return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR,
+				      attr->mode, ns);
 
 }
-EXPORT_SYMBOL_GPL(sysfs_create_file);
+EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
 
 int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
 {
@@ -604,7 +894,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	int error;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 
@@ -630,17 +920,12 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 {
 	struct sysfs_dirent *sd;
 	struct iattr newattrs;
-	const void *ns;
 	int rc;
 
-	rc = sysfs_attr_ns(kobj, attr, &ns);
-	if (rc)
-		return rc;
-
 	mutex_lock(&sysfs_mutex);
 
 	rc = -ENOENT;
-	sd = sysfs_find_dirent(kobj->sd, ns, attr->name);
+	sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
 	if (!sd)
 		goto out;
 
@@ -655,22 +940,21 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 /**
- *	sysfs_remove_file - remove an object attribute.
- *	@kobj:	object we're acting for.
- *	@attr:	attribute descriptor.
+ * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
+ * @kobj: object we're acting for
+ * @attr: attribute descriptor
+ * @ns: namespace tag of the file to remove
  *
- *	Hash the attribute name and kill the victim.
+ * Hash the attribute name and namespace tag and kill the victim.
  */
-void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr)
+void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
+			  const void *ns)
 {
-	const void *ns;
-
-	if (sysfs_attr_ns(kobj, attr, &ns))
-		return;
+	struct sysfs_dirent *dir_sd = kobj->sd;
 
-	sysfs_hash_and_remove(kobj->sd, ns, attr->name);
+	sysfs_hash_and_remove(dir_sd, attr->name, ns);
 }
-EXPORT_SYMBOL_GPL(sysfs_remove_file);
+EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
 
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
 {
@@ -692,16 +976,42 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 	struct sysfs_dirent *dir_sd;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 	if (dir_sd) {
-		sysfs_hash_and_remove(dir_sd, NULL, attr->name);
+		sysfs_hash_and_remove(dir_sd, attr->name, NULL);
 		sysfs_put(dir_sd);
 	}
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 
+/**
+ *	sysfs_create_bin_file - create binary file for object.
+ *	@kobj:	object.
+ *	@attr:	attribute descriptor.
+ */
+int sysfs_create_bin_file(struct kobject *kobj,
+			  const struct bin_attribute *attr)
+{
+	BUG_ON(!kobj || !kobj->sd || !attr);
+
+	return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+}
+EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
+
+/**
+ *	sysfs_remove_bin_file - remove binary file for object.
+ *	@kobj:	object.
+ *	@attr:	attribute descriptor.
+ */
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr)
+{
+	sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL);
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
+
 struct sysfs_schedule_callback_struct {
 	struct list_head	workq_list;
 	struct kobject		*kobj;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 5f92cd2f61c1..1898a10e38ce 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 
 	if (grp->attrs)
 		for (attr = grp->attrs; *attr; attr++)
-			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+			sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
 	if (grp->bin_attrs)
 		for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
 			sysfs_remove_bin_file(kobj, *bin_attr);
@@ -49,16 +49,17 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 			 * re-adding (if required) the file.
 			 */
 			if (update)
-				sysfs_hash_and_remove(dir_sd, NULL,
-						      (*attr)->name);
+				sysfs_hash_and_remove(dir_sd, (*attr)->name,
+						      NULL);
 			if (grp->is_visible) {
 				mode = grp->is_visible(kobj, *attr, i);
 				if (!mode)
 					continue;
 			}
-			error = sysfs_add_file_mode(dir_sd, *attr,
-						    SYSFS_KOBJ_ATTR,
-						    (*attr)->mode | mode);
+			error = sysfs_add_file_mode_ns(dir_sd, *attr,
+						       SYSFS_KOBJ_ATTR,
+						       (*attr)->mode | mode,
+						       NULL);
 			if (unlikely(error))
 				break;
 		}
@@ -110,7 +111,7 @@ static int internal_create_group(struct kobject *kobj, int update,
 	error = create_files(sd, kobj, grp, update);
 	if (error) {
 		if (grp->name)
-			sysfs_remove_subdir(sd);
+			sysfs_remove(sd);
 	}
 	sysfs_put(sd);
 	return error;
@@ -206,7 +207,7 @@ void sysfs_remove_group(struct kobject *kobj,
 	struct sysfs_dirent *sd;
 
 	if (grp->name) {
-		sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
+		sd = sysfs_get_dirent(dir_sd, grp->name);
 		if (!sd) {
 			WARN(!sd, KERN_WARNING
 			     "sysfs group %p not found for kobject '%s'\n",
@@ -218,7 +219,7 @@ void sysfs_remove_group(struct kobject *kobj,
 
 	remove_files(sd, kobj, grp);
 	if (grp->name)
-		sysfs_remove_subdir(sd);
+		sysfs_remove(sd);
 
 	sysfs_put(sd);
 }
@@ -261,7 +262,7 @@ int sysfs_merge_group(struct kobject *kobj,
 	struct attribute *const *attr;
 	int i;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
 	if (!dir_sd)
 		return -ENOENT;
 
@@ -269,7 +270,7 @@ int sysfs_merge_group(struct kobject *kobj,
 		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
 	if (error) {
 		while (--i >= 0)
-			sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name);
+			sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL);
 	}
 	sysfs_put(dir_sd);
 
@@ -288,10 +289,10 @@ void sysfs_unmerge_group(struct kobject *kobj,
 	struct sysfs_dirent *dir_sd;
 	struct attribute *const *attr;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
 	if (dir_sd) {
 		for (attr = grp->attrs; *attr; ++attr)
-			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+			sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
 		sysfs_put(dir_sd);
 	}
 }
@@ -310,7 +311,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
 	struct sysfs_dirent *dir_sd;
 	int error = 0;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	dir_sd = sysfs_get_dirent(kobj->sd, group_name);
 	if (!dir_sd)
 		return -ENOENT;
 
@@ -332,9 +333,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
 {
 	struct sysfs_dirent *dir_sd;
 
-	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	dir_sd = sysfs_get_dirent(kobj->sd, group_name);
 	if (dir_sd) {
-		sysfs_hash_and_remove(dir_sd, NULL, link_name);
+		sysfs_hash_and_remove(dir_sd, link_name, NULL);
 		sysfs_put(dir_sd);
 	}
 }
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 963f910c8034..1750f790af3b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -258,9 +258,9 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 		inode->i_fop = &sysfs_file_operations;
 		break;
 	case SYSFS_KOBJ_BIN_ATTR:
-		bin_attr = sd->s_bin_attr.bin_attr;
+		bin_attr = sd->s_attr.bin_attr;
 		inode->i_size = bin_attr->size;
-		inode->i_fop = &bin_fops;
+		inode->i_fop = &sysfs_bin_operations;
 		break;
 	case SYSFS_KOBJ_LINK:
 		inode->i_op = &sysfs_symlink_inode_operations;
@@ -314,32 +314,6 @@ void sysfs_evict_inode(struct inode *inode)
 	sysfs_put(sd);
 }
 
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns,
-			  const char *name)
-{
-	struct sysfs_addrm_cxt acxt;
-	struct sysfs_dirent *sd;
-
-	if (!dir_sd) {
-		WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
-			name);
-		return -ENOENT;
-	}
-
-	sysfs_addrm_start(&acxt, dir_sd);
-
-	sd = sysfs_find_dirent(dir_sd, ns, name);
-	if (sd)
-		sysfs_remove_one(&acxt, sd);
-
-	sysfs_addrm_finish(&acxt);
-
-	if (sd)
-		return 0;
-	else
-		return -ENOENT;
-}
-
 int sysfs_permission(struct inode *inode, int mask)
 {
 	struct sysfs_dirent *sd;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 2dd4507d9edd..3ae3f1bf1a09 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -33,13 +33,15 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
 
 	BUG_ON(!name || !parent_sd);
 
-	/* target->sd can go away beneath us but is protected with
-	 * sysfs_assoc_lock.  Fetch target_sd from it.
+	/*
+	 * We don't own @target and it may be removed at any time.
+	 * Synchronize using sysfs_symlink_target_lock.  See
+	 * sysfs_remove_dir() for details.
 	 */
-	spin_lock(&sysfs_assoc_lock);
+	spin_lock(&sysfs_symlink_target_lock);
 	if (target->sd)
 		target_sd = sysfs_get(target->sd);
-	spin_unlock(&sysfs_assoc_lock);
+	spin_unlock(&sysfs_symlink_target_lock);
 
 	error = -ENOENT;
 	if (!target_sd)
@@ -52,18 +54,18 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
 
 	ns_type = sysfs_ns_type(parent_sd);
 	if (ns_type)
-		sd->s_ns = target->ktype->namespace(target);
+		sd->s_ns = target_sd->s_ns;
 	sd->s_symlink.target_sd = target_sd;
 	target_sd = NULL;	/* reference is now owned by the symlink */
 
-	sysfs_addrm_start(&acxt, parent_sd);
+	sysfs_addrm_start(&acxt);
 	/* Symlinks must be between directories with the same ns_type */
 	if (!ns_type ||
 	    (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
 		if (warn)
-			error = sysfs_add_one(&acxt, sd);
+			error = sysfs_add_one(&acxt, sd, parent_sd);
 		else
-			error = __sysfs_add_one(&acxt, sd);
+			error = __sysfs_add_one(&acxt, sd, parent_sd);
 	} else {
 		error = -EINVAL;
 		WARN(1, KERN_WARNING
@@ -155,11 +157,17 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
 			const char *name)
 {
 	const void *ns = NULL;
-	spin_lock(&sysfs_assoc_lock);
+
+	/*
+	 * We don't own @target and it may be removed at any time.
+	 * Synchronize using sysfs_symlink_target_lock.  See
+	 * sysfs_remove_dir() for details.
+	 */
+	spin_lock(&sysfs_symlink_target_lock);
 	if (targ->sd && sysfs_ns_type(kobj->sd))
 		ns = targ->sd->s_ns;
-	spin_unlock(&sysfs_assoc_lock);
-	sysfs_hash_and_remove(kobj->sd, ns, name);
+	spin_unlock(&sysfs_symlink_target_lock);
+	sysfs_hash_and_remove(kobj->sd, name, ns);
 }
 
 /**
@@ -176,24 +184,25 @@ void sysfs_remove_link(struct kobject *kobj, const char *name)
 	else
 		parent_sd = kobj->sd;
 
-	sysfs_hash_and_remove(parent_sd, NULL, name);
+	sysfs_hash_and_remove(parent_sd, name, NULL);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
 
 /**
- *	sysfs_rename_link - rename symlink in object's directory.
+ *	sysfs_rename_link_ns - rename symlink in object's directory.
  *	@kobj:	object we're acting for.
  *	@targ:	object we're pointing to.
  *	@old:	previous name of the symlink.
  *	@new:	new name of the symlink.
+ *	@new_ns: new namespace of the symlink.
  *
  *	A helper function for the common rename symlink idiom.
  */
-int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
-			const char *old, const char *new)
+int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
+			 const char *old, const char *new, const void *new_ns)
 {
 	struct sysfs_dirent *parent_sd, *sd = NULL;
-	const void *old_ns = NULL, *new_ns = NULL;
+	const void *old_ns = NULL;
 	int result;
 
 	if (!kobj)
@@ -205,7 +214,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 		old_ns = targ->sd->s_ns;
 
 	result = -ENOENT;
-	sd = sysfs_get_dirent(parent_sd, old_ns, old);
+	sd = sysfs_get_dirent_ns(parent_sd, old, old_ns);
 	if (!sd)
 		goto out;
 
@@ -215,16 +224,13 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 	if (sd->s_symlink.target_sd->s_dir.kobj != targ)
 		goto out;
 
-	if (sysfs_ns_type(parent_sd))
-		new_ns = targ->ktype->namespace(targ);
-
-	result = sysfs_rename(sd, parent_sd, new_ns, new);
+	result = sysfs_rename(sd, parent_sd, new, new_ns);
 
 out:
 	sysfs_put(sd);
 	return result;
 }
-EXPORT_SYMBOL_GPL(sysfs_rename_link);
+EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
 
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
 				 struct sysfs_dirent *target_sd, char *path)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index b6deca3e301d..0af09fbfb3f6 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -29,15 +29,13 @@ struct sysfs_elem_symlink {
 };
 
 struct sysfs_elem_attr {
-	struct attribute	*attr;
+	union {
+		struct attribute	*attr;
+		struct bin_attribute	*bin_attr;
+	};
 	struct sysfs_open_dirent *open;
 };
 
-struct sysfs_elem_bin_attr {
-	struct bin_attribute	*bin_attr;
-	struct hlist_head	buffers;
-};
-
 struct sysfs_inode_attrs {
 	struct iattr	ia_iattr;
 	void		*ia_secdata;
@@ -74,7 +72,6 @@ struct sysfs_dirent {
 		struct sysfs_elem_dir		s_dir;
 		struct sysfs_elem_symlink	s_symlink;
 		struct sysfs_elem_attr		s_attr;
-		struct sysfs_elem_bin_attr	s_bin_attr;
 	};
 
 	unsigned short		s_flags;
@@ -115,6 +112,7 @@ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+
 #define sysfs_dirent_init_lockdep(sd)				\
 do {								\
 	struct attribute *attr = sd->s_attr.attr;		\
@@ -124,15 +122,31 @@ do {								\
 								\
 	lockdep_init_map(&sd->dep_map, "s_active", key, 0);	\
 } while (0)
+
+/* Test for attributes that want to ignore lockdep for read-locking */
+static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
+{
+	int type = sysfs_type(sd);
+
+	return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
+		sd->s_attr.attr->ignore_lockdep;
+}
+
 #else
+
 #define sysfs_dirent_init_lockdep(sd) do {} while (0)
+
+static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
+{
+	return true;
+}
+
 #endif
 
 /*
  * Context structure to be used while adding/removing nodes.
  */
 struct sysfs_addrm_cxt {
-	struct sysfs_dirent	*parent_sd;
 	struct sysfs_dirent	*removed;
 };
 
@@ -156,38 +170,37 @@ extern struct kmem_cache *sysfs_dir_cachep;
  * dir.c
  */
 extern struct mutex sysfs_mutex;
-extern spinlock_t sysfs_assoc_lock;
+extern spinlock_t sysfs_symlink_target_lock;
 extern const struct dentry_operations sysfs_dentry_ops;
 
 extern const struct file_operations sysfs_dir_operations;
 extern const struct inode_operations sysfs_dir_inode_operations;
 
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
 void sysfs_put_active(struct sysfs_dirent *sd);
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
-		       struct sysfs_dirent *parent_sd);
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
-void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
+void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+		    struct sysfs_dirent *parent_sd);
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
+		  struct sysfs_dirent *parent_sd);
+void sysfs_remove(struct sysfs_dirent *sd);
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
+			  const void *ns);
 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
 
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
-				       const void *ns,
-				       const unsigned char *name);
-struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
-				      const void *ns,
-				      const unsigned char *name);
+				       const unsigned char *name,
+				       const void *ns);
 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
 
 void release_sysfs_dirent(struct sysfs_dirent *sd);
 
 int sysfs_create_subdir(struct kobject *kobj, const char *name,
 			struct sysfs_dirent **p_sd);
-void sysfs_remove_subdir(struct sysfs_dirent *sd);
 
 int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
-		 const void *ns, const char *new_name);
+		 const char *new_name, const void *new_ns);
 
 static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
 {
@@ -218,25 +231,21 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
 int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		   size_t size, int flags);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns,
-			  const char *name);
 int sysfs_inode_init(void);
 
 /*
  * file.c
  */
 extern const struct file_operations sysfs_file_operations;
+extern const struct file_operations sysfs_bin_operations;
 
 int sysfs_add_file(struct sysfs_dirent *dir_sd,
 		   const struct attribute *attr, int type);
 
-int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-			const struct attribute *attr, int type, umode_t amode);
-/*
- * bin.c
- */
-extern const struct file_operations bin_fops;
-void unmap_bin_file(struct sysfs_dirent *attr_sd);
+int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
+			   const struct attribute *attr, int type,
+			   umode_t amode, const void *ns);
+void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
 
 /*
  * symlink.c
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 0957aa98b6c0..12dad188939d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1158,7 +1158,7 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Create entry for .
 	 */
-	dep = xfs_dir3_data_dot_entry_p(hdr);
+	dep = xfs_dir3_data_dot_entry_p(mp, hdr);
 	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
@@ -1172,7 +1172,7 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Create entry for ..
 	 */
-	dep = xfs_dir3_data_dotdot_entry_p(hdr);
+	dep = xfs_dir3_data_dotdot_entry_p(mp, hdr);
 	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
@@ -1183,7 +1183,7 @@ xfs_dir2_sf_to_block(
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
 	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)hdr));
-	offset = xfs_dir3_data_first_offset(hdr);
+	offset = xfs_dir3_data_first_offset(mp);
 	/*
 	 * Loop over existing entries, stuff them in.
 	 */
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a0961a61ac1a..9cf67381adf6 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -497,69 +497,58 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
 /*
  * Offsets of . and .. in data space (always block 0)
  *
- * The macros are used for shortform directories as they have no headers to read
- * the magic number out of. Shortform directories need to know the size of the
- * data block header because the sfe embeds the block offset of the entry into
- * it so that it doesn't change when format conversion occurs. Bad Things Happen
- * if we don't follow this rule.
- *
  * XXX: there is scope for significant optimisation of the logic here. Right
  * now we are checking for "dir3 format" over and over again. Ideally we should
  * only do it once for each operation.
  */
-#define	XFS_DIR3_DATA_DOT_OFFSET(mp)	\
-	xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
-#define	XFS_DIR3_DATA_DOTDOT_OFFSET(mp)	\
-	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1))
-#define	XFS_DIR3_DATA_FIRST_OFFSET(mp)		\
-	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2))
-
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dot_offset(struct xfs_mount *mp)
 {
-	return xfs_dir3_data_entry_offset(hdr);
+	return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
 }
 
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dotdot_offset(struct xfs_mount *mp)
 {
-	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-	return xfs_dir3_data_dot_offset(hdr) +
-		__xfs_dir3_data_entsize(dir3, 1);
+	return xfs_dir3_data_dot_offset(mp) +
+		xfs_dir3_data_entsize(mp, 1);
 }
 
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_first_offset(struct xfs_mount *mp)
 {
-	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-	return xfs_dir3_data_dotdot_offset(hdr) +
-		__xfs_dir3_data_entsize(dir3, 2);
+	return xfs_dir3_data_dotdot_offset(mp) +
+		xfs_dir3_data_entsize(mp, 2);
 }
 
 /*
  * location of . and .. in data space (always block 0)
  */
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dot_entry_p(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_hdr *hdr)
 {
 	return (struct xfs_dir2_data_entry *)
-		((char *)hdr + xfs_dir3_data_dot_offset(hdr));
+		((char *)hdr + xfs_dir3_data_dot_offset(mp));
 }
 
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dotdot_entry_p(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_hdr *hdr)
 {
 	return (struct xfs_dir2_data_entry *)
-		((char *)hdr + xfs_dir3_data_dotdot_offset(hdr));
+		((char *)hdr + xfs_dir3_data_dotdot_offset(mp));
 }
 
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_first_entry_p(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_hdr *hdr)
 {
 	return (struct xfs_dir2_data_entry *)
-		((char *)hdr + xfs_dir3_data_first_offset(hdr));
+		((char *)hdr + xfs_dir3_data_first_offset(mp));
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8993ec17452c..8f84153e98a8 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -119,9 +119,9 @@ xfs_dir2_sf_getdents(
 	 * mp->m_dirdatablk.
 	 */
 	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-					     XFS_DIR3_DATA_DOT_OFFSET(mp));
+					     xfs_dir3_data_dot_offset(mp));
 	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
+						xfs_dir3_data_dotdot_offset(mp));
 
 	/*
 	 * Put . entry unless we're starting past it.
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index bb6e2848f473..3ef6d402084c 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -557,7 +557,7 @@ xfs_dir2_sf_addname_hard(
 	 * to insert the new entry.
 	 * If it's going to end up at the end then oldsfep will point there.
 	 */
-	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp),
+	for (offset = xfs_dir3_data_first_offset(mp),
 	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
 	      add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
 	      eof = (char *)oldsfep == &buf[old_isize];
@@ -640,7 +640,7 @@ xfs_dir2_sf_addname_pick(
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 	size = xfs_dir3_data_entsize(mp, args->namelen);
-	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
+	offset = xfs_dir3_data_first_offset(mp);
 	sfep = xfs_dir2_sf_firstentry(sfp);
 	holefit = 0;
 	/*
@@ -713,7 +713,7 @@ xfs_dir2_sf_check(
 	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
+	offset = xfs_dir3_data_first_offset(mp);
 	ino = xfs_dir2_sf_get_parent_ino(sfp);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 71520e6e5d65..1ee776d477c3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33;
 struct kmem_zone		*xfs_qm_dqtrxzone;
 static struct kmem_zone		*xfs_qm_dqzone;
 
-static struct lock_class_key xfs_dquot_other_class;
+static struct lock_class_key xfs_dquot_group_class;
+static struct lock_class_key xfs_dquot_project_class;
 
 /*
  * This is called to free all the memory associated with a dquot
@@ -703,8 +704,20 @@ xfs_qm_dqread(
 	 * Make sure group quotas have a different lock class than user
 	 * quotas.
 	 */
-	if (!(type & XFS_DQ_USER))
-		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+	switch (type) {
+	case XFS_DQ_USER:
+		/* uses the default lock class */
+		break;
+	case XFS_DQ_GROUP:
+		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
+		break;
+	case XFS_DQ_PROJ:
+		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
+		break;
+	default:
+		ASSERT(0);
+		break;
+	}
 
 	XFS_STATS_INC(xs_qm_dquot);
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cc179878fe41..39797490a1f1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1585,6 +1585,7 @@ xlog_recover_add_to_trans(
 		"bad number of regions (%d) in inode log format",
 				  in_f->ilf_size);
 			ASSERT(0);
+			kmem_free(ptr);
 			return XFS_ERROR(EIO);
 		}