248 files changed, 6347 insertions, 4610 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index e847f504a47c..1a6d08761f39 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -1,8 +1,9 @@
 /*
  *  linux/fs/9p/9p.c
  *
- *  This file contains functions 9P2000 functions
+ *  This file contains functions to perform synchronous 9P calls
  *
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -33,6 +34,7 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
+#include "conv.h"
 #include "mux.h"
 
 /**
@@ -46,16 +48,21 @@
 
 int
 v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-	       char *version, struct v9fs_fcall **fcall)
+	       char *version, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
 
 	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-	msg.id = TVERSION;
-	msg.params.tversion.msize = msize;
-	msg.params.tversion.version = version;
+	tc = v9fs_create_tversion(msize, version);
 
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
 }
 
 /**
@@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 
 int
 v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-	      u32 fid, u32 afid, struct v9fs_fcall **fcall)
+	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall* tc;
 
 	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
 		aname, fid, afid);
-	msg.id = TATTACH;
-	msg.params.tattach.fid = fid;
-	msg.params.tattach.afid = afid;
-	msg.params.tattach.uname = uname;
-	msg.params.tattach.aname = aname;
 
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	tc = v9fs_create_tattach(fid, afid, uname, aname);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
+	struct v9fs_fcall *rc, int err)
+{
+	int fid;
+	struct v9fs_session_info *v9ses;
+
+	if (err)
+		return;
+
+	fid = tc->params.tclunk.fid;
+	kfree(tc);
+
+	if (!rc)
+		return;
+
+	dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id);
+	v9ses = a;
+	if (rc->id == RCLUNK)
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+
+	kfree(rc);
 }
 
 /**
@@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
  */
 
 int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
-	     struct v9fs_fcall **fcall)
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc, *rc;
 
 	dprintk(DEBUG_9P, "fid %d\n", fid);
-	msg.id = TCLUNK;
-	msg.params.tclunk.fid = fid;
 
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	rc = NULL;
+	tc = v9fs_create_tclunk(fid);
+	if (!IS_ERR(tc))
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+	else
+		ret = PTR_ERR(tc);
+
+	if (ret)
+		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
+
+	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
+	return ret;
 }
 
 /**
@@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
  *
  */
 
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
+
+	tc = v9fs_create_tflush(oldtag);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
 
-	dprintk(DEBUG_9P, "oldtag %d\n", tag);
-	msg.id = TFLUSH;
-	msg.params.tflush.oldtag = tag;
-	return v9fs_mux_rpc(v9ses, &msg, NULL);
+	return ret;
 }
 
 /**
@@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
  */
 
 int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
 
 	dprintk(DEBUG_9P, "fid %d\n", fid);
-	if (fcall)
-		*fcall = NULL;
 
-	msg.id = TSTAT;
-	msg.params.tstat.fid = fid;
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	ret = -ENOMEM;
+	tc = v9fs_create_tstat(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
 }
 
 /**
@@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
 
 int
 v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-	     struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
 
-	dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
-	msg.id = TWSTAT;
-	msg.params.twstat.fid = fid;
-	msg.params.twstat.stat = stat;
+	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
 
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	return ret;
 }
 
 /**
@@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
 
 int
 v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-	    char *name, struct v9fs_fcall **fcall)
+	    char *name, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
+	int nwname;
 
 	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-	msg.id = TWALK;
-	msg.params.twalk.fid = fid;
-	msg.params.twalk.newfid = newfid;
-
-	if (name) {
-		msg.params.twalk.nwname = 1;
-		msg.params.twalk.wnames = &name;
-	} else {
-		msg.params.twalk.nwname = 0;
-	}
-
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+
+	if (name)
+		nwname = 1;
+	else
+		nwname = 0;
+
+	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
 }
 
 /**
@@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
 
 int
 v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-	    struct v9fs_fcall **fcall)
+	    struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
-	long errorno = -1;
+	int ret;
+	struct v9fs_fcall *tc;
 
 	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-	msg.id = TOPEN;
-	msg.params.topen.fid = fid;
-	msg.params.topen.mode = mode;
 
-	errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+	tc = v9fs_create_topen(fid, mode);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
 
-	return errorno;
+	return ret;
 }
 
 /**
@@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
 
 int
 v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-	      struct v9fs_fcall **fcall)
+	      struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
 
 	dprintk(DEBUG_9P, "fid %d\n", fid);
-	msg.id = TREMOVE;
-	msg.params.tremove.fid = fid;
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+
+	tc = v9fs_create_tremove(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
 }
 
 /**
@@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 
 int
 v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-	      u32 perm, u8 mode, struct v9fs_fcall **fcall)
+	      u32 perm, u8 mode, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
+	int ret;
+	struct v9fs_fcall *tc;
 
 	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
 		fid, name, perm, mode);
 
-	msg.id = TCREATE;
-	msg.params.tcreate.fid = fid;
-	msg.params.tcreate.name = name;
-	msg.params.tcreate.perm = perm;
-	msg.params.tcreate.mode = mode;
+	tc = v9fs_create_tcreate(fid, name, perm, mode);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
 
-	return v9fs_mux_rpc(v9ses, &msg, fcall);
+	return ret;
 }
 
 /**
@@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 
 int
 v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-	    u32 count, struct v9fs_fcall **fcall)
+	    u32 count, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
-	struct v9fs_fcall *rc = NULL;
-	long errorno = -1;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
-		(long unsigned int)offset, count);
-	msg.id = TREAD;
-	msg.params.tread.fid = fid;
-	msg.params.tread.offset = offset;
-	msg.params.tread.count = count;
-	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
-
-	if (!errorno) {
-		errorno = rc->params.rread.count;
-		dump_data(rc->params.rread.data, rc->params.rread.count);
-	}
-
-	if (fcall)
-		*fcall = rc;
-	else
-		kfree(rc);
-
-	return errorno;
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
+
+	tc = v9fs_create_tread(fid, offset, count);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+		if (!ret)
+			ret = rc->params.rread.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
+
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
 }
 
 /**
@@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
  */
 
 int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
-	     u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
+	const char __user *data, struct v9fs_fcall **rcp)
 {
-	struct v9fs_fcall msg;
-	struct v9fs_fcall *rc = NULL;
-	long errorno = -1;
+	int ret;
+	struct v9fs_fcall *tc, *rc;
 
-	dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
-		(unsigned long long)offset, count);
-	dump_data(data, count);
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
 
-	msg.id = TWRITE;
-	msg.params.twrite.fid = fid;
-	msg.params.twrite.offset = offset;
-	msg.params.twrite.count = count;
-	msg.params.twrite.data = data;
+	tc = v9fs_create_twrite(fid, offset, count, data);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
 
-	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+		if (!ret)
+			ret = rc->params.rwrite.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
 
-	if (!errorno)
-		errorno = rc->params.rwrite.count;
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
 
-	if (fcall)
-		*fcall = rc;
-	else
-		kfree(rc);
-
-	return errorno;
+	return ret;
 }
+
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index f55424216be2..0cd374d94717 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -3,6 +3,7 @@
  *
  * 9P protocol definitions.
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -100,9 +101,18 @@ enum {
 	V9FS_QTFILE = 0x00,
 };
 
+#define V9FS_NOTAG	(u16)(~0)
+#define V9FS_NOFID	(u32)(~0)
+#define V9FS_MAXWELEM	16
+
 /* ample room for Twrite/Rread header (iounit) */
 #define V9FS_IOHDRSZ	24
 
+struct v9fs_str {
+	u16 len;
+	char *str;
+};
+
 /* qids are the unique ID for a file (like an inode */
 struct v9fs_qid {
 	u8 type;
@@ -120,6 +130,29 @@ struct v9fs_stat {
 	u32 atime;
 	u32 mtime;
 	u64 length;
+	struct v9fs_str name;
+	struct v9fs_str uid;
+	struct v9fs_str gid;
+	struct v9fs_str muid;
+	struct v9fs_str extension;	/* 9p2000.u extensions */
+	u32 n_uid;		/* 9p2000.u extensions */
+	u32 n_gid;		/* 9p2000.u extensions */
+	u32 n_muid;		/* 9p2000.u extensions */
+};
+
+/* file metadata (stat) structure used to create Twstat message
+   The is similar to v9fs_stat, but the strings don't point to
+   the same memory block and should be freed separately
+*/
+struct v9fs_wstat {
+	u16 size;
+	u16 type;
+	u32 dev;
+	struct v9fs_qid qid;
+	u32 mode;
+	u32 atime;
+	u32 mtime;
+	u64 length;
 	char *name;
 	char *uid;
 	char *gid;
@@ -128,25 +161,24 @@ struct v9fs_stat {
 	u32 n_uid;		/* 9p2000.u extensions */
 	u32 n_gid;		/* 9p2000.u extensions */
 	u32 n_muid;		/* 9p2000.u extensions */
-	char data[0];
 };
 
 /* Structures for Protocol Operations */
 
 struct Tversion {
 	u32 msize;
-	char *version;
+	struct v9fs_str version;
 };
 
 struct Rversion {
 	u32 msize;
-	char *version;
+	struct v9fs_str version;
 };
 
 struct Tauth {
 	u32 afid;
-	char *uname;
-	char *aname;
+	struct v9fs_str uname;
+	struct v9fs_str aname;
 };
 
 struct Rauth {
@@ -154,12 +186,12 @@ struct Rauth {
 };
 
 struct Rerror {
-	char *error;
+	struct v9fs_str error;
 	u32 errno;		/* 9p2000.u extension */
 };
 
 struct Tflush {
-	u32 oldtag;
+	u16 oldtag;
 };
 
 struct Rflush {
@@ -168,8 +200,8 @@ struct Rflush {
 struct Tattach {
 	u32 fid;
 	u32 afid;
-	char *uname;
-	char *aname;
+	struct v9fs_str uname;
+	struct v9fs_str aname;
 };
 
 struct Rattach {
@@ -179,13 +211,13 @@ struct Rattach {
 struct Twalk {
 	u32 fid;
 	u32 newfid;
-	u32 nwname;
-	char **wnames;
+	u16 nwname;
+	struct v9fs_str wnames[16];
 };
 
 struct Rwalk {
-	u32 nwqid;
-	struct v9fs_qid *wqids;
+	u16 nwqid;
+	struct v9fs_qid wqids[16];
 };
 
 struct Topen {
@@ -200,7 +232,7 @@ struct Ropen {
 
 struct Tcreate {
 	u32 fid;
-	char *name;
+	struct v9fs_str name;
 	u32 perm;
 	u8 mode;
 };
@@ -251,12 +283,12 @@ struct Tstat {
 };
 
 struct Rstat {
-	struct v9fs_stat *stat;
+	struct v9fs_stat stat;
 };
 
 struct Twstat {
 	u32 fid;
-	struct v9fs_stat *stat;
+	struct v9fs_stat stat;
 };
 
 struct Rwstat {
@@ -271,6 +303,7 @@ struct v9fs_fcall {
 	u32 size;
 	u8 id;
 	u16 tag;
+	void *sdata;
 
 	union {
 		struct Tversion tversion;
@@ -303,7 +336,9 @@ struct v9fs_fcall {
 	} params;
 };
 
-#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
+	fcall?fcall->params.rerror.error.len:0, \
+	fcall?fcall->params.rerror.error.str:"");
 
 int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 		   char *version, struct v9fs_fcall **rcall);
@@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
 		  u32 fid, u32 afid, struct v9fs_fcall **rcall);
 
-int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
-		 struct v9fs_fcall **rcall);
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
 
 int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
 
@@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
 		struct v9fs_fcall **rcall);
 
 int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-		 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+		 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
 
 int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
 		char *name, struct v9fs_fcall **rcall);
@@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 		u64 offset, u32 count, struct v9fs_fcall **rcall);
 
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-		 u32 count, void *data, struct v9fs_fcall **rcall);
+		 u32 count, const char __user * data,
+		 struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index e4e4ffe5a7dc..3d023089707e 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,17 +1,17 @@
 obj-$(CONFIG_9P_FS) := 9p2000.o
 
 9p2000-objs := \
+	trans_fd.o \
+	trans_sock.o \
+	mux.o \
+	9p.o \
+	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
 	vfs_file.o \
 	vfs_dir.o \
 	vfs_dentry.o \
 	error.o \
-	mux.o \
-	trans_fd.o \
-	trans_sock.o \
-	9p.o \
-	conv.o \
 	v9fs.o \
 	fid.o
 
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 18121af99d3e..55ccfa10ee9e 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -30,7 +30,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/idr.h>
-
+#include <asm/uaccess.h>
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
@@ -58,12 +58,15 @@ static inline int buf_check_overflow(struct cbuf *buf)
 
 static inline int buf_check_size(struct cbuf *buf, int len)
 {
-	if (buf->p+len > buf->ep) {
+	if (buf->p + len > buf->ep) {
 		if (buf->p < buf->ep) {
-			eprintk(KERN_ERR, "buffer overflow\n");
+			eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
+				len, (int)(buf->ep - buf->p));
+			dump_stack();
 			buf->p = buf->ep + 1;
-			return 0;
 		}
+
+		return 0;
 	}
 
 	return 1;
@@ -127,14 +130,6 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
 	buf_put_stringn(buf, s, strlen(s));
 }
 
-static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
-{
-	if (buf_check_size(buf, datalen)) {
-		memcpy(buf->p, data, datalen);
-		buf->p += datalen;
-	}
-}
-
 static inline u8 buf_get_int8(struct cbuf *buf)
 {
 	u8 ret = 0;
@@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf)
 	return ret;
 }
 
-static inline int
-buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
-{
-	u16 len = 0;
-
-	len = buf_get_int16(buf);
-	if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
-		memcpy(data, buf->p, len);
-		data[len] = 0;
-		buf->p += len;
-		len++;
-	}
-
-	return len;
-}
-
-static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
-{
-	char *ret;
-	u16 len;
-
-	ret = NULL;
-	len = buf_get_int16(buf);
-
-	if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
-		buf_check_size(sbuf, len+1)) {
-
-		memcpy(sbuf->p, buf->p, len);
-		sbuf->p[len] = 0;
-		ret = sbuf->p;
-		buf->p += len;
-		sbuf->p += len + 1;
-	}
-
-	return ret;
-}
-
-static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+static inline void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
 {
-	int ret = 0;
-
-	if (buf_check_size(buf, datalen)) {
-		memcpy(data, buf->p, datalen);
-		buf->p += datalen;
-		ret = datalen;
+	vstr->len = buf_get_int16(buf);
+	if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
+		vstr->str = buf->p;
+		buf->p += vstr->len;
+	} else {
+		vstr->len = 0;
+		vstr->str = NULL;
 	}
-
-	return ret;
 }
 
-static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
-				  int datalen)
+static inline void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
 {
-	char *ret = NULL;
-	int n = 0;
-
-	if (buf_check_size(dbuf, datalen)) {
-		n = buf_get_data(buf, dbuf->p, datalen);
-		if (n > 0) {
-			ret = dbuf->p;
-			dbuf->p += n;
-		}
-	}
-
-	return ret;
+	qid->type = buf_get_int8(bufp);
+	qid->version = buf_get_int32(bufp);
+	qid->path = buf_get_int64(bufp);
 }
 
 /**
- * v9fs_size_stat - calculate the size of a variable length stat struct
- * @v9ses: session information
+ * v9fs_size_wstat - calculate the size of a variable length stat struct
  * @stat: metadata (stat) structure
+ * @extended: non-zero if 9P2000.u
  *
  */
 
-static int v9fs_size_stat(struct v9fs_session_info *v9ses,
-			  struct v9fs_stat *stat)
+static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
 {
 	int size = 0;
 
-	if (stat == NULL) {
+	if (wstat == NULL) {
 		eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
 		return 0;
 	}
@@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses,
 	    8 +			/* length[8] */
 	    8;			/* minimum sum of string lengths */
 
-	if (stat->name)
-		size += strlen(stat->name);
-	if (stat->uid)
-		size += strlen(stat->uid);
-	if (stat->gid)
-		size += strlen(stat->gid);
-	if (stat->muid)
-		size += strlen(stat->muid);
+	if (wstat->name)
+		size += strlen(wstat->name);
+	if (wstat->uid)
+		size += strlen(wstat->uid);
+	if (wstat->gid)
+		size += strlen(wstat->gid);
+	if (wstat->muid)
+		size += strlen(wstat->muid);
 
-	if (v9ses->extended) {
+	if (extended) {
 		size += 4 +	/* n_uid[4] */
 		    4 +		/* n_gid[4] */
 		    4 +		/* n_muid[4] */
 		    2;		/* string length of extension[4] */
-		if (stat->extension)
-			size += strlen(stat->extension);
+		if (wstat->extension)
+			size += strlen(wstat->extension);
 	}
 
 	return size;
 }
 
 /**
- * serialize_stat - safely format a stat structure for transmission
- * @v9ses: session info
- * @stat: metadata (stat) structure
- * @bufp: buffer to serialize structure into
- *
- */
-
-static int
-serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
-	       struct cbuf *bufp)
-{
-	buf_put_int16(bufp, stat->size);
-	buf_put_int16(bufp, stat->type);
-	buf_put_int32(bufp, stat->dev);
-	buf_put_int8(bufp, stat->qid.type);
-	buf_put_int32(bufp, stat->qid.version);
-	buf_put_int64(bufp, stat->qid.path);
-	buf_put_int32(bufp, stat->mode);
-	buf_put_int32(bufp, stat->atime);
-	buf_put_int32(bufp, stat->mtime);
-	buf_put_int64(bufp, stat->length);
-
-	buf_put_string(bufp, stat->name);
-	buf_put_string(bufp, stat->uid);
-	buf_put_string(bufp, stat->gid);
-	buf_put_string(bufp, stat->muid);
-
-	if (v9ses->extended) {
-		buf_put_string(bufp, stat->extension);
-		buf_put_int32(bufp, stat->n_uid);
-		buf_put_int32(bufp, stat->n_gid);
-		buf_put_int32(bufp, stat->n_muid);
-	}
-
-	if (buf_check_overflow(bufp))
-		return 0;
-
-	return stat->size;
-}
-
-/**
- * deserialize_stat - safely decode a recieved metadata (stat) structure
- * @v9ses: session info
+ * buf_get_stat - safely decode a recieved metadata (stat) structure
  * @bufp: buffer to deserialize
  * @stat: metadata (stat) structure
- * @dbufp: buffer to deserialize variable strings into
+ * @extended: non-zero if 9P2000.u
  *
  */
 
-static inline int
-deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
-		 struct v9fs_stat *stat, struct cbuf *dbufp)
+static inline void
+buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
 {
-
 	stat->size = buf_get_int16(bufp);
 	stat->type = buf_get_int16(bufp);
 	stat->dev = buf_get_int32(bufp);
@@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
 	stat->atime = buf_get_int32(bufp);
 	stat->mtime = buf_get_int32(bufp);
 	stat->length = buf_get_int64(bufp);
-	stat->name = buf_get_stringb(bufp, dbufp);
-	stat->uid = buf_get_stringb(bufp, dbufp);
-	stat->gid = buf_get_stringb(bufp, dbufp);
-	stat->muid = buf_get_stringb(bufp, dbufp);
+	buf_get_str(bufp, &stat->name);
+	buf_get_str(bufp, &stat->uid);
+	buf_get_str(bufp, &stat->gid);
+	buf_get_str(bufp, &stat->muid);
 
-	if (v9ses->extended) {
-		stat->extension = buf_get_stringb(bufp, dbufp);
+	if (extended) {
+		buf_get_str(bufp, &stat->extension);
 		stat->n_uid = buf_get_int32(bufp);
 		stat->n_gid = buf_get_int32(bufp);
 		stat->n_muid = buf_get_int32(bufp);
 	}
-
-	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
-		return 0;
-
-	return stat->size + 2;
-}
-
-/**
- * deserialize_statb - wrapper for decoding a received metadata structure
- * @v9ses: session info
- * @bufp: buffer to deserialize
- * @dbufp: buffer to deserialize variable strings into
- *
- */
-
-static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
-						  *v9ses, struct cbuf *bufp,
-						  struct cbuf *dbufp)
-{
-	struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
-
-	if (ret) {
-		int n = deserialize_stat(v9ses, bufp, ret, dbufp);
-		if (n <= 0)
-			return NULL;
-	}
-
-	return ret;
 }
 
 /**
  * v9fs_deserialize_stat - decode a received metadata structure
- * @v9ses: session info
  * @buf: buffer to deserialize
  * @buflen: length of received buffer
  * @stat: metadata structure to decode into
- * @statlen: length of destination metadata structure
+ * @extended: non-zero if 9P2000.u
  *
+ * Note: stat will point to the buf region.
  */
 
 int
-v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
-		      u32 buflen, struct v9fs_stat *stat, u32 statlen)
+v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
+		int extended)
 {
 	struct cbuf buffer;
 	struct cbuf *bufp = &buffer;
-	struct cbuf dbuffer;
-	struct cbuf *dbufp = &dbuffer;
+	unsigned char *p;
 
 	buf_init(bufp, buf, buflen);
-	buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
-		 statlen - sizeof(struct v9fs_stat));
-
-	return deserialize_stat(v9ses, bufp, stat, dbufp);
-}
-
-static inline int
-v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
-{
-	int size = 4 + 1 + 2;	/* size[4] msg[1] tag[2] */
-	int i = 0;
-
-	switch (fcall->id) {
-	default:
-		eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
-		return 0;
-	case TVERSION:		/* msize[4] version[s] */
-		size += 4 + 2 + strlen(fcall->params.tversion.version);
-		break;
-	case TAUTH:		/* afid[4] uname[s] aname[s] */
-		size += 4 + 2 + strlen(fcall->params.tauth.uname) +
-		    2 + strlen(fcall->params.tauth.aname);
-		break;
-	case TFLUSH:		/* oldtag[2] */
-		size += 2;
-		break;
-	case TATTACH:		/* fid[4] afid[4] uname[s] aname[s] */
-		size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
-		    2 + strlen(fcall->params.tattach.aname);
-		break;
-	case TWALK:		/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
-		size += 4 + 4 + 2;
-		/* now compute total for the array of names */
-		for (i = 0; i < fcall->params.twalk.nwname; i++)
-			size += 2 + strlen(fcall->params.twalk.wnames[i]);
-		break;
-	case TOPEN:		/* fid[4] mode[1] */
-		size += 4 + 1;
-		break;
-	case TCREATE:		/* fid[4] name[s] perm[4] mode[1] */
-		size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
-		break;
-	case TREAD:		/* fid[4] offset[8] count[4] */
-		size += 4 + 8 + 4;
-		break;
-	case TWRITE:		/* fid[4] offset[8] count[4] data[count] */
-		size += 4 + 8 + 4 + fcall->params.twrite.count;
-		break;
-	case TCLUNK:		/* fid[4] */
-		size += 4;
-		break;
-	case TREMOVE:		/* fid[4] */
-		size += 4;
-		break;
-	case TSTAT:		/* fid[4] */
-		size += 4;
-		break;
-	case TWSTAT:		/* fid[4] stat[n] */
-		fcall->params.twstat.stat->size =
-		    v9fs_size_stat(v9ses, fcall->params.twstat.stat);
-		size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
-	}
-	return size;
-}
-
-/*
- * v9fs_serialize_fcall - marshall fcall struct into a packet
- * @v9ses: session information
- * @fcall: structure to convert
- * @data: buffer to serialize fcall into
- * @datalen: length of buffer to serialize fcall into
- *
- */
-
-int
-v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
-		     void *data, u32 datalen)
-{
-	int i = 0;
-	struct v9fs_stat *stat = NULL;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	buf_init(bufp, data, datalen);
-
-	if (!fcall) {
-		eprintk(KERN_ERR, "no fcall\n");
-		return -EINVAL;
-	}
-
-	fcall->size = v9fs_size_fcall(v9ses, fcall);
-
-	buf_put_int32(bufp, fcall->size);
-	buf_put_int8(bufp, fcall->id);
-	buf_put_int16(bufp, fcall->tag);
-
-	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
-		fcall->tag);
-
-	/* now encode it */
-	switch (fcall->id) {
-	default:
-		eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
-		return -EPROTO;
-	case TVERSION:
-		buf_put_int32(bufp, fcall->params.tversion.msize);
-		buf_put_string(bufp, fcall->params.tversion.version);
-		break;
-	case TAUTH:
-		buf_put_int32(bufp, fcall->params.tauth.afid);
-		buf_put_string(bufp, fcall->params.tauth.uname);
-		buf_put_string(bufp, fcall->params.tauth.aname);
-		break;
-	case TFLUSH:
-		buf_put_int16(bufp, fcall->params.tflush.oldtag);
-		break;
-	case TATTACH:
-		buf_put_int32(bufp, fcall->params.tattach.fid);
-		buf_put_int32(bufp, fcall->params.tattach.afid);
-		buf_put_string(bufp, fcall->params.tattach.uname);
-		buf_put_string(bufp, fcall->params.tattach.aname);
-		break;
-	case TWALK:
-		buf_put_int32(bufp, fcall->params.twalk.fid);
-		buf_put_int32(bufp, fcall->params.twalk.newfid);
-		buf_put_int16(bufp, fcall->params.twalk.nwname);
-		for (i = 0; i < fcall->params.twalk.nwname; i++)
-			buf_put_string(bufp, fcall->params.twalk.wnames[i]);
-		break;
-	case TOPEN:
-		buf_put_int32(bufp, fcall->params.topen.fid);
-		buf_put_int8(bufp, fcall->params.topen.mode);
-		break;
-	case TCREATE:
-		buf_put_int32(bufp, fcall->params.tcreate.fid);
-		buf_put_string(bufp, fcall->params.tcreate.name);
-		buf_put_int32(bufp, fcall->params.tcreate.perm);
-		buf_put_int8(bufp, fcall->params.tcreate.mode);
-		break;
-	case TREAD:
-		buf_put_int32(bufp, fcall->params.tread.fid);
-		buf_put_int64(bufp, fcall->params.tread.offset);
-		buf_put_int32(bufp, fcall->params.tread.count);
-		break;
-	case TWRITE:
-		buf_put_int32(bufp, fcall->params.twrite.fid);
-		buf_put_int64(bufp, fcall->params.twrite.offset);
-		buf_put_int32(bufp, fcall->params.twrite.count);
-		buf_put_data(bufp, fcall->params.twrite.data,
-			     fcall->params.twrite.count);
-		break;
-	case TCLUNK:
-		buf_put_int32(bufp, fcall->params.tclunk.fid);
-		break;
-	case TREMOVE:
-		buf_put_int32(bufp, fcall->params.tremove.fid);
-		break;
-	case TSTAT:
-		buf_put_int32(bufp, fcall->params.tstat.fid);
-		break;
-	case TWSTAT:
-		buf_put_int32(bufp, fcall->params.twstat.fid);
-		stat = fcall->params.twstat.stat;
-
-		buf_put_int16(bufp, stat->size + 2);
-		serialize_stat(v9ses, stat, bufp);
-		break;
-	}
+	p = bufp->p;
+	buf_get_stat(bufp, stat, extended);
 
 	if (buf_check_overflow(bufp))
-		return -EIO;
-
-	return fcall->size;
+		return 0;
+	else
+		return bufp->p - p;
 }
 
 /**
  * deserialize_fcall - unmarshal a response
- * @v9ses: session information
- * @msgsize: size of rcall message
  * @buf: recieved buffer
  * @buflen: length of received buffer
  * @rcall: fcall structure to populate
  * @rcalllen: length of fcall structure to populate
+ * @extended: non-zero if 9P2000.u
  *
  */
 
 int
-v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
-		       void *buf, u32 buflen, struct v9fs_fcall *rcall,
-		       int rcalllen)
+v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
+		       int extended)
 {
 
 	struct cbuf buffer;
 	struct cbuf *bufp = &buffer;
-	struct cbuf dbuffer;
-	struct cbuf *dbufp = &dbuffer;
 	int i = 0;
 
 	buf_init(bufp, buf, buflen);
-	buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
-		 rcalllen - sizeof(struct v9fs_fcall));
 
-	rcall->size = msgsize;
+	rcall->size = buf_get_int32(bufp);
 	rcall->id = buf_get_int8(bufp);
 	rcall->tag = buf_get_int16(bufp);
 
 	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
 		rcall->tag);
+
 	switch (rcall->id) {
 	default:
 		eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
 		return -EPROTO;
 	case RVERSION:
 		rcall->params.rversion.msize = buf_get_int32(bufp);
-		rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+		buf_get_str(bufp, &rcall->params.rversion.version);
 		break;
 	case RFLUSH:
 		break;
@@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
 		break;
 	case RWALK:
 		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		rcall->params.rwalk.wqids = buf_alloc(dbufp,
-		      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
-		if (rcall->params.rwalk.wqids)
-			for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
-				rcall->params.rwalk.wqids[i].type =
-				    buf_get_int8(bufp);
-				rcall->params.rwalk.wqids[i].version =
-				    buf_get_int16(bufp);
-				rcall->params.rwalk.wqids[i].path =
-				    buf_get_int64(bufp);
-			}
+		if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
+			eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
+				V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
+			return -EPROTO;
+		}
+
+		for (i = 0; i < rcall->params.rwalk.nwqid; i++)
+			buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
 		break;
 	case ROPEN:
-		rcall->params.ropen.qid.type = buf_get_int8(bufp);
-		rcall->params.ropen.qid.version = buf_get_int32(bufp);
-		rcall->params.ropen.qid.path = buf_get_int64(bufp);
+		buf_get_qid(bufp, &rcall->params.ropen.qid);
 		rcall->params.ropen.iounit = buf_get_int32(bufp);
 		break;
 	case RCREATE:
-		rcall->params.rcreate.qid.type = buf_get_int8(bufp);
-		rcall->params.rcreate.qid.version = buf_get_int32(bufp);
-		rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+		buf_get_qid(bufp, &rcall->params.rcreate.qid);
 		rcall->params.rcreate.iounit = buf_get_int32(bufp);
 		break;
 	case RREAD:
 		rcall->params.rread.count = buf_get_int32(bufp);
-		rcall->params.rread.data = buf_get_datab(bufp, dbufp,
-			rcall->params.rread.count);
+		rcall->params.rread.data = bufp->p;
+		buf_check_size(bufp, rcall->params.rread.count);
 		break;
 	case RWRITE:
 		rcall->params.rwrite.count = buf_get_int32(bufp);
@@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
 		break;
 	case RSTAT:
 		buf_get_int16(bufp);
-		rcall->params.rstat.stat =
-		    deserialize_statb(v9ses, bufp, dbufp);
+		buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
 		break;
 	case RWSTAT:
 		break;
 	case RERROR:
-		rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
-		if (v9ses->extended)
+		buf_get_str(bufp, &rcall->params.rerror.error);
+		if (extended)
 			rcall->params.rerror.errno = buf_get_int16(bufp);
 		break;
 	}
 
-	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+	if (buf_check_overflow(bufp)) {
+		dprintk(DEBUG_ERROR, "buffer overflow\n");
 		return -EIO;
+	}
+
+	return bufp->p - bufp->sp;
+}
+
+static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
+{
+	*p = val;
+	buf_put_int8(bufp, val);
+}
+
+static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
+{
+	*p = val;
+	buf_put_int16(bufp, val);
+}
+
+static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
+{
+	*p = val;
+	buf_put_int32(bufp, val);
+}
+
+static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
+{
+	*p = val;
+	buf_put_int64(bufp, val);
+}
 
-	return rcall->size;
+static inline void
+v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
+{
+	if (data) {
+		str->len = strlen(data);
+		str->str = bufp->p;
+	} else {
+		str->len = 0;
+		str->str = NULL;
+	}
+
+	buf_put_stringn(bufp, data, str->len);
+}
+
+static inline int
+v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
+		   unsigned char **pdata)
+{
+	*pdata = buf_alloc(bufp, count);
+	return copy_from_user(*pdata, data, count);
+}
+
+static void
+v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
+	       struct v9fs_stat *stat, int statsz, int extended)
+{
+	v9fs_put_int16(bufp, statsz, &stat->size);
+	v9fs_put_int16(bufp, wstat->type, &stat->type);
+	v9fs_put_int32(bufp, wstat->dev, &stat->dev);
+	v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
+	v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
+	v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
+	v9fs_put_int32(bufp, wstat->mode, &stat->mode);
+	v9fs_put_int32(bufp, wstat->atime, &stat->atime);
+	v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
+	v9fs_put_int64(bufp, wstat->length, &stat->length);
+
+	v9fs_put_str(bufp, wstat->name, &stat->name);
+	v9fs_put_str(bufp, wstat->uid, &stat->uid);
+	v9fs_put_str(bufp, wstat->gid, &stat->gid);
+	v9fs_put_str(bufp, wstat->muid, &stat->muid);
+
+	if (extended) {
+		v9fs_put_str(bufp, wstat->extension, &stat->extension);
+		v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
+		v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
+		v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
+	}
+}
+
+static struct v9fs_fcall *
+v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
+{
+	struct v9fs_fcall *fc;
+
+	size += 4 + 1 + 2;	/* size[4] id[1] tag[2] */
+	fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
+	if (!fc)
+		return ERR_PTR(-ENOMEM);
+
+	fc->sdata = (char *)fc + sizeof(*fc);
+
+	buf_init(bufp, (char *)fc->sdata, size);
+	v9fs_put_int32(bufp, size, &fc->size);
+	v9fs_put_int8(bufp, id, &fc->id);
+	v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
+
+	return fc;
+}
+
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
+{
+	fc->tag = tag;
+	*(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
+}
+
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 2 + strlen(version);	/* msize[4] version[s] */
+	fc = v9fs_create_common(bufp, size, TVERSION);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
+	v9fs_put_str(bufp, version, &fc->params.tversion.version);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 2 + strlen(uname) + 2 + strlen(aname);	/* afid[4] uname[s] aname[s] */
+	fc = v9fs_create_common(bufp, size, TAUTH);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
+	v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
+	v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *
+v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname);	/* fid[4] afid[4] uname[s] aname[s] */
+	fc = v9fs_create_common(bufp, size, TATTACH);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
+	v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
+	v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
+	v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
+
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 2;		/* oldtag[2] */
+	fc = v9fs_create_common(bufp, size, TFLUSH);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+				     char **wnames)
+{
+	int i, size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	if (nwname > V9FS_MAXWELEM) {
+		dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
+		return NULL;
+	}
+
+	size = 4 + 4 + 2;	/* fid[4] newfid[4] nwname[2] ... */
+	for (i = 0; i < nwname; i++) {
+		size += 2 + strlen(wnames[i]);	/* wname[s] */
+	}
+
+	fc = v9fs_create_common(bufp, size, TWALK);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
+	v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
+	v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
+	for (i = 0; i < nwname; i++) {
+		v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
+	}
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 1;		/* fid[4] mode[1] */
+	fc = v9fs_create_common(bufp, size, TOPEN);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
+	v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
+	fc = v9fs_create_common(bufp, size, TCREATE);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
+	v9fs_put_str(bufp, name, &fc->params.tcreate.name);
+	v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
+	v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 8 + 4;	/* fid[4] offset[8] count[4] */
+	fc = v9fs_create_common(bufp, size, TREAD);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
+	v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
+	v9fs_put_int32(bufp, count, &fc->params.tread.count);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+				      const char __user * data)
+{
+	int size, err;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4 + 8 + 4 + count;	/* fid[4] offset[8] count[4] data[count] */
+	fc = v9fs_create_common(bufp, size, TWRITE);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
+	v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
+	v9fs_put_int32(bufp, count, &fc->params.twrite.count);
+	err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
+	if (err) {
+		kfree(fc);
+		fc = ERR_PTR(err);
+	}
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4;		/* fid[4] */
+	fc = v9fs_create_common(bufp, size, TCLUNK);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tremove(u32 fid)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4;		/* fid[4] */
+	fc = v9fs_create_common(bufp, size, TREMOVE);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tstat(u32 fid)
+{
+	int size;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	size = 4;		/* fid[4] */
+	fc = v9fs_create_common(bufp, size, TSTAT);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+				      int extended)
+{
+	int size, statsz;
+	struct v9fs_fcall *fc;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	statsz = v9fs_size_wstat(wstat, extended);
+	size = 4 + 2 + 2 + statsz;	/* fid[4] stat[n] */
+	fc = v9fs_create_common(bufp, size, TWSTAT);
+	if (IS_ERR(fc))
+		goto error;
+
+	v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
+	buf_put_int16(bufp, statsz + 2);
+	v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
+
+	if (buf_check_overflow(bufp)) {
+		kfree(fc);
+		fc = ERR_PTR(-ENOMEM);
+	}
+      error:
+	return fc;
 }
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index ee849613c61a..26a736e4a2e7 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -1,8 +1,9 @@
 /*
  * linux/fs/9p/conv.h
  *
- * 9P protocol conversion definitions
+ * 9P protocol conversion definitions.
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -24,13 +25,27 @@
  *
  */
 
-int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
-			  u32 buflen, struct v9fs_stat *stat, u32 statlen);
-int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
-			 void *buf, u32 buflen);
-int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
-			   void *buf, u32 buflen, struct v9fs_fcall *rcall,
-			   int rcalllen);
+int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
+	int extended);
+int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
+	int extended);
 
-/* this one is actually in error.c right now */
-int v9fs_errstr2errno(char *errstr);
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
+
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
+struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
+	char *aname);
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+	char **wnames);
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+	const char __user *data);
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
+struct v9fs_fcall *v9fs_create_tremove(u32 fid);
+struct v9fs_fcall *v9fs_create_tstat(u32 fid);
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+	int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index 4445f06919d9..fe551032788b 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -51,16 +51,23 @@ do { \
 #if DEBUG_DUMP_PKT
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
 {
-	int i, j;
-	int len = datalen;
+	int i, n;
+	char buf[5*8];
 
-	printk(KERN_DEBUG "data ");
-	for (i = 0; i < len; i += 4) {
-		for (j = 0; (j < 4) && (i + j < len); j++)
-			printk(KERN_DEBUG "%02x", data[i + j]);
-		printk(KERN_DEBUG " ");
+	n = 0;
+	i = 0;
+	while (i < datalen) {
+		n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
+		if (i%4 == 0)
+			n += snprintf(buf+n, sizeof(buf)-n, " ");
+
+		if (i%16 == 0) {
+			dprintk(DEBUG_ERROR, "%s\n", buf);
+			n = 0;
+		}
 	}
-	printk(KERN_DEBUG "\n");
+
+	dprintk(DEBUG_ERROR, "%s\n", buf);
 }
 #else				/* DEBUG_DUMP_PKT */
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
diff --git a/fs/9p/error.c b/fs/9p/error.c
index 834cb179e388..e4b6f8f38b6f 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -33,7 +33,6 @@
 
 #include <linux/list.h>
 #include <linux/jhash.h>
-#include <linux/string.h>
 
 #include "debug.h"
 #include "error.h"
@@ -55,7 +54,8 @@ int v9fs_error_init(void)
 
 	/* load initial error map into hash table */
 	for (c = errmap; c->name != NULL; c++) {
-		bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+		c->namelen = strlen(c->name);
+		bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
 		INIT_HLIST_NODE(&c->list);
 		hlist_add_head(&c->list, &hash_errmap[bucket]);
 	}
@@ -69,15 +69,15 @@ int v9fs_error_init(void)
  *
  */
 
-int v9fs_errstr2errno(char *errstr)
+int v9fs_errstr2errno(char *errstr, int len)
 {
 	int errno = 0;
 	struct hlist_node *p = NULL;
 	struct errormap *c = NULL;
-	int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+	int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
 
 	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
-		if (!strcmp(c->name, errstr)) {
+		if (c->namelen==len && !memcmp(c->name, errstr, len)) {
 			errno = c->val;
 			break;
 		}
diff --git a/fs/9p/error.h b/fs/9p/error.h
index 78f89acf7c9a..a9794e85fe51 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -36,6 +36,7 @@ struct errormap {
 	char *name;
 	int val;
 
+	int namelen;
 	struct hlist_node list;
 };
 
@@ -175,4 +176,3 @@ static struct errormap errmap[] = {
 };
 
 extern int v9fs_error_init(void);
-extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index d95f8626d170..eda449778fa5 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -31,9 +31,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "transport.h"
-#include "mux.h"
-#include "conv.h"
 #include "fid.h"
 
 /**
@@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
 	return v9fs_fid_create(dentry, v9ses, fidnum, 0);
 
 clunk_fid:
-	v9fs_t_clunk(v9ses, fidnum, NULL);
+	v9fs_t_clunk(v9ses, fidnum);
 	return ERR_PTR(err);
 }
 
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8835b576f744..945cb368d451 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -4,7 +4,7 @@
  * Protocol Multiplexer
  *
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -28,448 +28,943 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/poll.h>
 #include <linux/kthread.h>
 #include <linux/idr.h>
 
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "transport.h"
 #include "conv.h"
+#include "transport.h"
 #include "mux.h"
 
+#define ERREQFLUSH	1
+#define SCHED_TIMEOUT	10
+#define MAXPOLLWADDR	2
+
+enum {
+	Rworksched = 1,		/* read work scheduled or running */
+	Rpending = 2,		/* can read */
+	Wworksched = 4,		/* write work scheduled or running */
+	Wpending = 8,		/* can write */
+};
+
+struct v9fs_mux_poll_task;
+
+struct v9fs_req {
+	int tag;
+	struct v9fs_fcall *tcall;
+	struct v9fs_fcall *rcall;
+	int err;
+	v9fs_mux_req_callback cb;
+	void *cba;
+	struct list_head req_list;
+};
+
+struct v9fs_mux_data {
+	spinlock_t lock;
+	struct list_head mux_list;
+	struct v9fs_mux_poll_task *poll_task;
+	int msize;
+	unsigned char *extended;
+	struct v9fs_transport *trans;
+	struct v9fs_idpool tidpool;
+	int err;
+	wait_queue_head_t equeue;
+	struct list_head req_list;
+	struct list_head unsent_req_list;
+	struct v9fs_fcall *rcall;
+	int rpos;
+	char *rbuf;
+	int wpos;
+	int wsize;
+	char *wbuf;
+	wait_queue_t poll_wait[MAXPOLLWADDR];
+	wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+	poll_table pt;
+	struct work_struct rq;
+	struct work_struct wq;
+	unsigned long wsched;
+};
+
+struct v9fs_mux_poll_task {
+	struct task_struct *task;
+	struct list_head mux_list;
+	int muxnum;
+};
+
+struct v9fs_mux_rpc {
+	struct v9fs_mux_data *m;
+	struct v9fs_req *req;
+	int err;
+	struct v9fs_fcall *rcall;
+	wait_queue_head_t wqueue;
+};
+
+static int v9fs_poll_proc(void *);
+static void v9fs_read_work(void *);
+static void v9fs_write_work(void *);
+static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+			  poll_table * p);
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
+static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
+
+static DECLARE_MUTEX(v9fs_mux_task_lock);
+static struct workqueue_struct *v9fs_mux_wq;
+
+static int v9fs_mux_num;
+static int v9fs_mux_poll_task_num;
+static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
+
+int v9fs_mux_global_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
+		v9fs_mux_poll_tasks[i].task = NULL;
+
+	v9fs_mux_wq = create_workqueue("v9fs");
+	if (!v9fs_mux_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void v9fs_mux_global_exit(void)
+{
+	destroy_workqueue(v9fs_mux_wq);
+}
+
 /**
- * dprintcond - print condition of session info
- * @v9ses: session info structure
- * @req: RPC request structure
+ * v9fs_mux_calc_poll_procs - calculates the number of polling procs
+ * based on the number of mounted v9fs filesystems.
  *
+ * The current implementation returns sqrt of the number of mounts.
  */
+inline int v9fs_mux_calc_poll_procs(int muxnum)
+{
+	int n;
+
+	if (v9fs_mux_poll_task_num)
+		n = muxnum / v9fs_mux_poll_task_num +
+		    (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
+	else
+		n = 1;
+
+	if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
+		n = ARRAY_SIZE(v9fs_mux_poll_tasks);
+
+	return n;
+}
 
-static inline int
-dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 {
-	dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
-		req->rcall);
+	int i, n;
+	struct v9fs_mux_poll_task *vpt, *vptlast;
+	struct task_struct *pproc;
+
+	dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
+		v9fs_mux_poll_task_num);
+	up(&v9fs_mux_task_lock);
+
+	n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
+	if (n > v9fs_mux_poll_task_num) {
+		for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+			if (v9fs_mux_poll_tasks[i].task == NULL) {
+				vpt = &v9fs_mux_poll_tasks[i];
+				dprintk(DEBUG_MUX, "create proc %p\n", vpt);
+				pproc = kthread_create(v9fs_poll_proc, vpt,
+						   "v9fs-poll");
+
+				if (!IS_ERR(pproc)) {
+					vpt->task = pproc;
+					INIT_LIST_HEAD(&vpt->mux_list);
+					vpt->muxnum = 0;
+					v9fs_mux_poll_task_num++;
+					wake_up_process(vpt->task);
+				}
+				break;
+			}
+		}
+
+		if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
+			dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
+	}
+
+	n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
+	    ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
+
+	vptlast = NULL;
+	for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+		vpt = &v9fs_mux_poll_tasks[i];
+		if (vpt->task != NULL) {
+			vptlast = vpt;
+			if (vpt->muxnum < n) {
+				dprintk(DEBUG_MUX, "put in proc %d\n", i);
+				list_add(&m->mux_list, &vpt->mux_list);
+				vpt->muxnum++;
+				m->poll_task = vpt;
+				memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+				init_poll_funcptr(&m->pt, v9fs_pollwait);
+				break;
+			}
+		}
+	}
+
+	if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
+		if (vptlast == NULL)
+			return -ENOMEM;
+
+		dprintk(DEBUG_MUX, "put in proc %d\n", i);
+		list_add(&m->mux_list, &vptlast->mux_list);
+		vptlast->muxnum++;
+		m->poll_task = vptlast;
+		memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+		init_poll_funcptr(&m->pt, v9fs_pollwait);
+	}
+
+	v9fs_mux_num++;
+	down(&v9fs_mux_task_lock);
+
 	return 0;
 }
 
+static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
+{
+	int i;
+	struct v9fs_mux_poll_task *vpt;
+
+	up(&v9fs_mux_task_lock);
+	vpt = m->poll_task;
+	list_del(&m->mux_list);
+	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+		if (m->poll_waddr[i] != NULL) {
+			remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
+			m->poll_waddr[i] = NULL;
+		}
+	}
+	vpt->muxnum--;
+	if (!vpt->muxnum) {
+		dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
+		send_sig(SIGKILL, vpt->task, 1);
+		vpt->task = NULL;
+		v9fs_mux_poll_task_num--;
+	}
+	v9fs_mux_num--;
+	down(&v9fs_mux_task_lock);
+}
+
 /**
- * xread - force read of a certain number of bytes
- * @v9ses: session info structure
- * @ptr: pointer to buffer
- * @sz: number of bytes to read
+ * v9fs_mux_init - allocate and initialize the per-session mux data
+ * Creates the polling task if this is the first session.
  *
- * Chuck Cranor CS-533 project1
+ * @trans - transport structure
+ * @msize - maximum message size
+ * @extended - pointer to the extended flag
  */
-
-static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
+				    unsigned char *extended)
 {
-	int rd = 0;
-	int ret = 0;
-	while (rd < sz) {
-		ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
-		if (ret <= 0) {
-			dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
-			return ret;
+	int i, n;
+	struct v9fs_mux_data *m, *mtmp;
+
+	dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
+	m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&m->lock);
+	INIT_LIST_HEAD(&m->mux_list);
+	m->msize = msize;
+	m->extended = extended;
+	m->trans = trans;
+	idr_init(&m->tidpool.pool);
+	init_MUTEX(&m->tidpool.lock);
+	m->err = 0;
+	init_waitqueue_head(&m->equeue);
+	INIT_LIST_HEAD(&m->req_list);
+	INIT_LIST_HEAD(&m->unsent_req_list);
+	m->rcall = NULL;
+	m->rpos = 0;
+	m->rbuf = NULL;
+	m->wpos = m->wsize = 0;
+	m->wbuf = NULL;
+	INIT_WORK(&m->rq, v9fs_read_work, m);
+	INIT_WORK(&m->wq, v9fs_write_work, m);
+	m->wsched = 0;
+	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+	m->poll_task = NULL;
+	n = v9fs_mux_poll_start(m);
+	if (n)
+		return ERR_PTR(n);
+
+	n = trans->poll(trans, &m->pt);
+	if (n & POLLIN) {
+		dprintk(DEBUG_MUX, "mux %p can read\n", m);
+		set_bit(Rpending, &m->wsched);
+	}
+
+	if (n & POLLOUT) {
+		dprintk(DEBUG_MUX, "mux %p can write\n", m);
+		set_bit(Wpending, &m->wsched);
+	}
+
+	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+		if (IS_ERR(m->poll_waddr[i])) {
+			v9fs_mux_poll_stop(m);
+			mtmp = (void *)m->poll_waddr;	/* the error code */
+			kfree(m);
+			m = mtmp;
+			break;
 		}
-		rd += ret;
-		ptr += ret;
 	}
-	return (rd);
+
+	return m;
 }
 
 /**
- * read_message - read a full 9P2000 fcall packet
- * @v9ses: session info structure
- * @rcall: fcall structure to read into
- * @rcalllen: size of fcall buffer
- *
+ * v9fs_mux_destroy - cancels all pending requests and frees mux resources
  */
+void v9fs_mux_destroy(struct v9fs_mux_data *m)
+{
+	dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
+		m->mux_list.prev, m->mux_list.next);
+	v9fs_mux_cancel(m, -ECONNRESET);
+
+	if (!list_empty(&m->req_list)) {
+		/* wait until all processes waiting on this session exit */
+		dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
+			m);
+		wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
+		dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
+			list_empty(&m->req_list));
+	}
+
+	v9fs_mux_poll_stop(m);
+	m->trans = NULL;
+
+	kfree(m);
+}
 
-static int
-read_message(struct v9fs_session_info *v9ses,
-	     struct v9fs_fcall *rcall, int rcalllen)
+/**
+ * v9fs_pollwait - called by files poll operation to add v9fs-poll task
+ * 	to files wait queue
+ */
+static void
+v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+	      poll_table * p)
 {
-	unsigned char buf[4];
-	void *data;
-	int size = 0;
-	int res = 0;
-
-	res = xread(v9ses, buf, sizeof(buf));
-	if (res < 0) {
-		dprintk(DEBUG_ERROR,
-			"Reading of count field failed returned: %d\n", res);
-		return res;
+	int i;
+	struct v9fs_mux_data *m;
+
+	m = container_of(p, struct v9fs_mux_data, pt);
+	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
+		if (m->poll_waddr[i] == NULL)
+			break;
+
+	if (i >= ARRAY_SIZE(m->poll_waddr)) {
+		dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
+		return;
 	}
 
-	if (res < 4) {
-		dprintk(DEBUG_ERROR,
-			"Reading of count field failed returned: %d\n", res);
-		return -EIO;
+	m->poll_waddr[i] = wait_address;
+
+	if (!wait_address) {
+		dprintk(DEBUG_ERROR, "no wait_address\n");
+		m->poll_waddr[i] = ERR_PTR(-EIO);
+		return;
 	}
 
-	size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
-	dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+	init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
+	add_wait_queue(wait_address, &m->poll_wait[i]);
+}
+
+/**
+ * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
+ */
+static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
+{
+	int n;
 
-	/* adjust for the four bytes of size */
-	size -= 4;
+	if (m->err < 0)
+		return;
 
-	if (size > v9ses->maxdata) {
-		dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
-		return -E2BIG;
+	n = m->trans->poll(m->trans, NULL);
+	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+		dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
+		if (n >= 0)
+			n = -ECONNRESET;
+		v9fs_mux_cancel(m, n);
 	}
 
-	data = kmalloc(size, GFP_KERNEL);
-	if (!data) {
-		eprintk(KERN_WARNING, "out of memory\n");
-		return -ENOMEM;
+	if (n & POLLIN) {
+		set_bit(Rpending, &m->wsched);
+		dprintk(DEBUG_MUX, "mux %p can read\n", m);
+		if (!test_and_set_bit(Rworksched, &m->wsched)) {
+			dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+			queue_work(v9fs_mux_wq, &m->rq);
+		}
 	}
 
-	res = xread(v9ses, data, size);
-	if (res < size) {
-		dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
-			res);
-		kfree(data);
-		return res;
+	if (n & POLLOUT) {
+		set_bit(Wpending, &m->wsched);
+		dprintk(DEBUG_MUX, "mux %p can write\n", m);
+		if ((m->wsize || !list_empty(&m->unsent_req_list))
+		    && !test_and_set_bit(Wworksched, &m->wsched)) {
+			dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+			queue_work(v9fs_mux_wq, &m->wq);
+		}
 	}
+}
+
+/**
+ * v9fs_poll_proc - polls all v9fs transports for new events and queues
+ * 	the appropriate work to the work queue
+ */
+static int v9fs_poll_proc(void *a)
+{
+	struct v9fs_mux_data *m, *mtmp;
+	struct v9fs_mux_poll_task *vpt;
 
-	/* we now have an in-memory string that is the reply.
-	 * deserialize it. There is very little to go wrong at this point
-	 * save for v9fs_alloc errors.
-	 */
-	res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
-				     rcall, rcalllen);
+	vpt = a;
+	dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
+	allow_signal(SIGKILL);
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (signal_pending(current))
+			break;
 
-	kfree(data);
+		list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
+			v9fs_poll_mux(m);
+		}
 
-	if (res < 0)
-		return res;
+		dprintk(DEBUG_MUX, "sleeping...\n");
+		schedule_timeout(SCHED_TIMEOUT * HZ);
+	}
 
+	__set_current_state(TASK_RUNNING);
+	dprintk(DEBUG_MUX, "finish\n");
 	return 0;
 }
 
 /**
- * v9fs_recv - receive an RPC response for a particular tag
- * @v9ses: session info structure
- * @req: RPC request structure
- *
+ * v9fs_write_work - called when a transport can send some data
  */
-
-static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+static void v9fs_write_work(void *a)
 {
-	int ret = 0;
+	int n, err;
+	struct v9fs_mux_data *m;
+	struct v9fs_req *req;
 
-	dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
-	ret = wait_event_interruptible(v9ses->read_wait,
-		       ((v9ses->transport->status != Connected) ||
-			(req->rcall != 0) || (req->err < 0) ||
-			dprintcond(v9ses, req)));
+	m = a;
 
-	dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+	if (m->err < 0) {
+		clear_bit(Wworksched, &m->wsched);
+		return;
+	}
 
-	spin_lock(&v9ses->muxlock);
-	list_del(&req->next);
-	spin_unlock(&v9ses->muxlock);
+	if (!m->wsize) {
+		if (list_empty(&m->unsent_req_list)) {
+			clear_bit(Wworksched, &m->wsched);
+			return;
+		}
 
-	if (req->err < 0)
-		return req->err;
+		spin_lock(&m->lock);
+		req =
+		    list_entry(m->unsent_req_list.next, struct v9fs_req,
+			       req_list);
+		list_move_tail(&req->req_list, &m->req_list);
+		m->wbuf = req->tcall->sdata;
+		m->wsize = req->tcall->size;
+		m->wpos = 0;
+		dump_data(m->wbuf, m->wsize);
+		spin_unlock(&m->lock);
+	}
 
-	if (v9ses->transport->status == Disconnected)
-		return -ECONNRESET;
+	dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
+	clear_bit(Wpending, &m->wsched);
+	err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
+	dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+	if (err == -EAGAIN) {
+		clear_bit(Wworksched, &m->wsched);
+		return;
+	}
 
-	return ret;
-}
+	if (err <= 0)
+		goto error;
 
-/**
- * v9fs_send - send a 9P request
- * @v9ses: session info structure
- * @req: RPC request to send
- *
- */
+	m->wpos += err;
+	if (m->wpos == m->wsize)
+		m->wpos = m->wsize = 0;
+
+	if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
+		if (test_and_clear_bit(Wpending, &m->wsched))
+			n = POLLOUT;
+		else
+			n = m->trans->poll(m->trans, NULL);
+
+		if (n & POLLOUT) {
+			dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+			queue_work(v9fs_mux_wq, &m->wq);
+		} else
+			clear_bit(Wworksched, &m->wsched);
+	} else
+		clear_bit(Wworksched, &m->wsched);
+
+	return;
 
-static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+      error:
+	v9fs_mux_cancel(m, err);
+	clear_bit(Wworksched, &m->wsched);
+}
+
+static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 {
-	int ret = -1;
-	void *data = NULL;
-	struct v9fs_fcall *tcall = req->tcall;
+	int ecode, tag;
+	struct v9fs_str *ename;
 
-	data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
+	tag = req->tag;
+	if (req->rcall->id == RERROR && !req->err) {
+		ecode = req->rcall->params.rerror.errno;
+		ename = &req->rcall->params.rerror.error;
 
-	tcall->size = 0;	/* enforce size recalculation */
-	ret =
-	    v9fs_serialize_fcall(v9ses, tcall, data,
-				 v9ses->maxdata + V9FS_IOHDRSZ);
-	if (ret < 0)
-		goto free_data;
+		dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
 
-	spin_lock(&v9ses->muxlock);
-	list_add(&req->next, &v9ses->mux_fcalls);
-	spin_unlock(&v9ses->muxlock);
+		if (*m->extended)
+			req->err = -ecode;
 
-	dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
-		tcall->size);
-	ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+		if (!req->err) {
+			req->err = v9fs_errstr2errno(ename->str, ename->len);
 
-	if (ret != tcall->size) {
-		spin_lock(&v9ses->muxlock);
-		list_del(&req->next);
-		kfree(req->rcall);
+			if (!req->err) {	/* string match failed */
+				PRINT_FCALL_ERROR("unknown error", req->rcall);
+			}
+
+			if (!req->err)
+				req->err = -ESERVERFAULT;
+		}
+	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
+		dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
+			req->tcall->id + 1, req->rcall->id);
+		if (!req->err)
+			req->err = -EIO;
+	}
 
-		spin_unlock(&v9ses->muxlock);
-		if (ret >= 0)
-			ret = -EREMOTEIO;
+	if (req->cb && req->err != ERREQFLUSH) {
+		dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
+			req->tcall, req->rcall);
+
+		(*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+		req->cb = NULL;
 	} else
-		ret = 0;
+		kfree(req->rcall);
 
-      free_data:
-	kfree(data);
-	return ret;
+	v9fs_mux_put_tag(m, tag);
+
+	wake_up(&m->equeue);
+	kfree(req);
 }
 
 /**
- * v9fs_mux_rpc - send a request, receive a response
- * @v9ses: session info structure
- * @tcall: fcall to send
- * @rcall: buffer to place response into
- *
+ * v9fs_read_work - called when there is some data to be read from a transport
  */
-
-long
-v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
-	     struct v9fs_fcall **rcall)
+static void v9fs_read_work(void *a)
 {
-	int tid = -1;
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_rpcreq req;
-	int ret = -1;
-
-	if (!v9ses)
-		return -EINVAL;
-
-	if (!v9ses->transport || v9ses->transport->status != Connected)
-		return -EIO;
+	int n, err;
+	struct v9fs_mux_data *m;
+	struct v9fs_req *req, *rptr, *rreq;
+	struct v9fs_fcall *rcall;
+	char *rbuf;
+
+	m = a;
+
+	if (m->err < 0)
+		return;
+
+	rcall = NULL;
+	dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+
+	if (!m->rcall) {
+		m->rcall =
+		    kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
+		if (!m->rcall) {
+			err = -ENOMEM;
+			goto error;
+		}
 
-	if (rcall)
-		*rcall = NULL;
+		m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
+		m->rpos = 0;
+	}
 
-	if (tcall->id != TVERSION) {
-		tid = v9fs_get_idpool(&v9ses->tidpool);
-		if (tid < 0)
-			return -ENOMEM;
+	clear_bit(Rpending, &m->wsched);
+	err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
+	dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
+	if (err == -EAGAIN) {
+		clear_bit(Rworksched, &m->wsched);
+		return;
 	}
 
-	tcall->tag = tid;
+	if (err <= 0)
+		goto error;
 
-	req.tcall = tcall;
-	req.err = 0;
-	req.rcall = NULL;
+	m->rpos += err;
+	while (m->rpos > 4) {
+		n = le32_to_cpu(*(__le32 *) m->rbuf);
+		if (n >= m->msize) {
+			dprintk(DEBUG_ERROR,
+				"requested packet size too big: %d\n", n);
+			err = -EIO;
+			goto error;
+		}
 
-	ret = v9fs_send(v9ses, &req);
+		if (m->rpos < n)
+			break;
 
-	if (ret < 0) {
-		if (tcall->id != TVERSION)
-			v9fs_put_idpool(tid, &v9ses->tidpool);
-		dprintk(DEBUG_MUX, "error %d\n", ret);
-		return ret;
-	}
+		dump_data(m->rbuf, n);
+		err =
+		    v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
+		if (err < 0) {
+			goto error;
+		}
+
+		rcall = m->rcall;
+		rbuf = m->rbuf;
+		if (m->rpos > n) {
+			m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
+					   GFP_KERNEL);
+			if (!m->rcall) {
+				err = -ENOMEM;
+				goto error;
+			}
 
-	ret = v9fs_recv(v9ses, &req);
-
-	fcall = req.rcall;
-
-	dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
-	if (ret == -ERESTARTSYS) {
-		if (v9ses->transport->status != Disconnected
-		    && tcall->id != TFLUSH) {
-			unsigned long flags;
-
-			dprintk(DEBUG_MUX, "flushing the tag: %d\n",
-				tcall->tag);
-			clear_thread_flag(TIF_SIGPENDING);
-			v9fs_t_flush(v9ses, tcall->tag);
-			spin_lock_irqsave(&current->sighand->siglock, flags);
-			recalc_sigpending();
-			spin_unlock_irqrestore(&current->sighand->siglock,
-					       flags);
-			dprintk(DEBUG_MUX, "flushing done\n");
+			m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
+			memmove(m->rbuf, rbuf + n, m->rpos - n);
+			m->rpos -= n;
+		} else {
+			m->rcall = NULL;
+			m->rbuf = NULL;
+			m->rpos = 0;
 		}
 
-		goto release_req;
-	} else if (ret < 0)
-		goto release_req;
-
-	if (!fcall)
-		ret = -EIO;
-	else {
-		if (fcall->id == RERROR) {
-			ret = v9fs_errstr2errno(fcall->params.rerror.error);
-			if (ret == 0) {	/* string match failed */
-				if (fcall->params.rerror.errno)
-					ret = -(fcall->params.rerror.errno);
-				else
-					ret = -ESERVERFAULT;
+		dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
+			rcall->tag);
+
+		req = NULL;
+		spin_lock(&m->lock);
+		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
+			if (rreq->tag == rcall->tag) {
+				req = rreq;
+				req->rcall = rcall;
+				list_del(&req->req_list);
+				spin_unlock(&m->lock);
+				process_request(m, req);
+				break;
 			}
-		} else if (fcall->id != tcall->id + 1) {
-			dprintk(DEBUG_ERROR,
-				"fcall mismatch: expected %d, got %d\n",
-				tcall->id + 1, fcall->id);
-			ret = -EIO;
+
+		}
+
+		if (!req) {
+			spin_unlock(&m->lock);
+			if (err >= 0 && rcall->id != RFLUSH)
+				dprintk(DEBUG_ERROR,
+					"unexpected response mux %p id %d tag %d\n",
+					m, rcall->id, rcall->tag);
+			kfree(rcall);
 		}
 	}
 
-      release_req:
-	if (tcall->id != TVERSION)
-		v9fs_put_idpool(tid, &v9ses->tidpool);
-	if (rcall)
-		*rcall = fcall;
-	else
-		kfree(fcall);
+	if (!list_empty(&m->req_list)) {
+		if (test_and_clear_bit(Rpending, &m->wsched))
+			n = POLLIN;
+		else
+			n = m->trans->poll(m->trans, NULL);
+
+		if (n & POLLIN) {
+			dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+			queue_work(v9fs_mux_wq, &m->rq);
+		} else
+			clear_bit(Rworksched, &m->wsched);
+	} else
+		clear_bit(Rworksched, &m->wsched);
+
+	return;
 
-	return ret;
+      error:
+	v9fs_mux_cancel(m, err);
+	clear_bit(Rworksched, &m->wsched);
 }
 
 /**
- * v9fs_mux_cancel_requests - cancels all pending requests
+ * v9fs_send_request - send 9P request
+ * The function can sleep until the request is scheduled for sending.
+ * The function can be interrupted. Return from the function is not
+ * a guarantee that the request is sent succesfully. Can return errors
+ * that can be retrieved by PTR_ERR macros.
  *
- * @v9ses: session info structure
- * @err: error code to return to the requests
+ * @m: mux data
+ * @tc: request to be sent
+ * @cb: callback function to call when response is received
+ * @cba: parameter to pass to the callback function
  */
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
+					  struct v9fs_fcall *tc,
+					  v9fs_mux_req_callback cb, void *cba)
 {
-	struct v9fs_rpcreq *rptr;
-	struct v9fs_rpcreq *rreq;
+	int n;
+	struct v9fs_req *req;
 
-	dprintk(DEBUG_MUX, " %d\n", err);
-	spin_lock(&v9ses->muxlock);
-	list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-		rreq->err = err;
-	}
-	spin_unlock(&v9ses->muxlock);
-	wake_up_all(&v9ses->read_wait);
-}
+	dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
+		tc, tc->id);
+	if (m->err < 0)
+		return ERR_PTR(m->err);
 
-/**
- * v9fs_recvproc - kproc to handle demultiplexing responses
- * @data: session info structure
- *
- */
+	req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
 
-static int v9fs_recvproc(void *data)
-{
-	struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
-	struct v9fs_fcall *rcall = NULL;
-	struct v9fs_rpcreq *rptr;
-	struct v9fs_rpcreq *req;
-	struct v9fs_rpcreq *rreq;
-	int err = 0;
+	if (tc->id == TVERSION)
+		n = V9FS_NOTAG;
+	else
+		n = v9fs_mux_get_tag(m);
 
-	allow_signal(SIGKILL);
-	set_current_state(TASK_INTERRUPTIBLE);
-	complete(&v9ses->proccmpl);
-	while (!kthread_should_stop() && err >= 0) {
-		req = rptr = rreq = NULL;
-
-		rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
-		if (!rcall) {
-			eprintk(KERN_ERR, "no memory for buffers\n");
-			break;
-		}
+	if (n < 0)
+		return ERR_PTR(-ENOMEM);
 
-		err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
-		spin_lock(&v9ses->muxlock);
-		if (err < 0) {
-			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-				rreq->err = err;
-			}
-			if(err != -ERESTARTSYS)
-				eprintk(KERN_ERR,
-					"Transport error while reading message %d\n", err);
-		} else {
-			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-				if (rreq->tcall->tag == rcall->tag) {
-					req = rreq;
-					req->rcall = rcall;
-					break;
-				}
-			}
-		}
+	v9fs_set_tag(tc, n);
 
-		if (req && (req->tcall->id == TFLUSH)) {
-			struct v9fs_rpcreq *treq = NULL;
-			list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
-				if (treq->tcall->tag ==
-				    req->tcall->params.tflush.oldtag) {
-					list_del(&rptr->next);
-					kfree(treq->rcall);
-					break;
-				}
+	req->tag = n;
+	req->tcall = tc;
+	req->rcall = NULL;
+	req->err = 0;
+	req->cb = cb;
+	req->cba = cba;
+
+	spin_lock(&m->lock);
+	list_add_tail(&req->req_list, &m->unsent_req_list);
+	spin_unlock(&m->lock);
+
+	if (test_and_clear_bit(Wpending, &m->wsched))
+		n = POLLOUT;
+	else
+		n = m->trans->poll(m->trans, NULL);
+
+	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+		queue_work(v9fs_mux_wq, &m->wq);
+
+	return req;
+}
+
+static inline void
+v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
+		  int err)
+{
+	v9fs_mux_req_callback cb;
+	int tag;
+	struct v9fs_mux_data *m;
+	struct v9fs_req *req, *rptr;
+
+	m = a;
+	dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
+		rc, err, tc->params.tflush.oldtag);
+
+	spin_lock(&m->lock);
+	cb = NULL;
+	tag = tc->params.tflush.oldtag;
+	list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
+		if (req->tag == tag) {
+			list_del(&req->req_list);
+			if (req->cb) {
+				cb = req->cb;
+				req->cb = NULL;
+				spin_unlock(&m->lock);
+				(*cb) (req->cba, req->tcall, req->rcall,
+				       req->err);
 			}
+			kfree(req);
+			wake_up(&m->equeue);
+			break;
 		}
+	}
 
-		spin_unlock(&v9ses->muxlock);
+	if (!cb)
+		spin_unlock(&m->lock);
 
-		if (!req) {
-			if (err >= 0)
-				dprintk(DEBUG_ERROR,
-					"unexpected response: id %d tag %d\n",
-					rcall->id, rcall->tag);
+	v9fs_mux_put_tag(m, tag);
+	kfree(tc);
+	kfree(rc);
+}
 
-			kfree(rcall);
-		}
+static void
+v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
+{
+	struct v9fs_fcall *fc;
 
-		wake_up_all(&v9ses->read_wait);
-		set_current_state(TASK_INTERRUPTIBLE);
+	dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
+
+	fc = v9fs_create_tflush(req->tag);
+	v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
+}
+
+static void
+v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
+{
+	struct v9fs_mux_rpc *r;
+
+	if (err == ERREQFLUSH) {
+		dprintk(DEBUG_MUX, "err req flush\n");
+		return;
 	}
 
-	v9ses->transport->close(v9ses->transport);
+	r = a;
+	dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
+		tc, rc, err);
+	r->rcall = rc;
+	r->err = err;
+	wake_up(&r->wqueue);
+}
 
-	/* Inform all pending processes about the failure */
-	wake_up_all(&v9ses->read_wait);
+/**
+ * v9fs_mux_rpc - sends 9P request and waits until a response is available.
+ *	The function can be interrupted.
+ * @m: mux data
+ * @tc: request to be sent
+ * @rc: pointer where a pointer to the response is stored
+ */
+int
+v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+	     struct v9fs_fcall **rc)
+{
+	int err;
+	unsigned long flags;
+	struct v9fs_req *req;
+	struct v9fs_mux_rpc r;
+
+	r.err = 0;
+	r.rcall = NULL;
+	r.m = m;
+	init_waitqueue_head(&r.wqueue);
+
+	if (rc)
+		*rc = NULL;
+
+	req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		dprintk(DEBUG_MUX, "error %d\n", err);
+		return PTR_ERR(req);
+	}
 
-	if (signal_pending(current))
-		complete(&v9ses->proccmpl);
+	r.req = req;
+	dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
+		req->tag, &r, req);
+	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
+	if (r.err < 0)
+		err = r.err;
+
+	if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
+		spin_lock(&m->lock);
+		req->tcall = NULL;
+		req->err = ERREQFLUSH;
+		spin_unlock(&m->lock);
+
+		clear_thread_flag(TIF_SIGPENDING);
+		v9fs_mux_flush_request(m, req);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
 
-	dprintk(DEBUG_MUX, "recvproc: end\n");
-	v9ses->recvproc = NULL;
+	if (!err) {
+		if (r.rcall)
+			dprintk(DEBUG_MUX, "got response id %d tag %d\n",
+				r.rcall->id, r.rcall->tag);
+
+		if (rc)
+			*rc = r.rcall;
+		else
+			kfree(r.rcall);
+	} else {
+		kfree(r.rcall);
+		dprintk(DEBUG_MUX, "got error %d\n", err);
+		if (err > 0)
+			err = -EIO;
+	}
 
-	return err >= 0;
+	return err;
 }
 
 /**
- * v9fs_mux_init - initialize multiplexer (spawn kproc)
- * @v9ses: session info structure
- * @dev_name: mount device information (to create unique kproc)
- *
+ * v9fs_mux_rpcnb - sends 9P request without waiting for response.
+ * @m: mux data
+ * @tc: request to be sent
+ * @cb: callback function to be called when response arrives
+ * @cba: value to pass to the callback function
  */
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+		   v9fs_mux_req_callback cb, void *a)
+{
+	int err;
+	struct v9fs_req *req;
+
+	req = v9fs_send_request(m, tc, cb, a);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		dprintk(DEBUG_MUX, "error %d\n", err);
+		return PTR_ERR(req);
+	}
+
+	dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
+	return 0;
+}
 
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+/**
+ * v9fs_mux_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
+ */
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
 {
-	char procname[60];
-
-	strncpy(procname, dev_name, sizeof(procname));
-	procname[sizeof(procname) - 1] = 0;
-
-	init_waitqueue_head(&v9ses->read_wait);
-	init_completion(&v9ses->fcread);
-	init_completion(&v9ses->proccmpl);
-	spin_lock_init(&v9ses->muxlock);
-	INIT_LIST_HEAD(&v9ses->mux_fcalls);
-	v9ses->recvproc = NULL;
-	v9ses->curfcall = NULL;
-
-	v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
-					 "v9fs_recvproc %s", procname);
-
-	if (IS_ERR(v9ses->recvproc)) {
-		eprintk(KERN_ERR, "cannot create receiving thread\n");
-		v9fs_session_close(v9ses);
-		return -ECONNABORTED;
+	struct v9fs_req *req, *rtmp;
+	LIST_HEAD(cancel_list);
+
+	dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
+	m->err = err;
+	spin_lock(&m->lock);
+	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+		list_move(&req->req_list, &cancel_list);
 	}
+	spin_unlock(&m->lock);
 
-	wake_up_process(v9ses->recvproc);
-	wait_for_completion(&v9ses->proccmpl);
+	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+		list_del(&req->req_list);
+		if (!req->err)
+			req->err = err;
 
-	return 0;
+		if (req->cb)
+			(*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+		else
+			kfree(req->rcall);
+
+		kfree(req);
+	}
+
+	wake_up(&m->equeue);
+}
+
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
+{
+	int tag;
+
+	tag = v9fs_get_idpool(&m->tidpool);
+	if (tag < 0)
+		return V9FS_NOTAG;
+	else
+		return (u16) tag;
+}
+
+static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
+{
+	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
+		v9fs_put_idpool(tag, &m->tidpool);
 }
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 4994cb10badf..9473b84f24b2 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -3,6 +3,7 @@
  *
  * Multiplexer Definitions
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -23,19 +24,35 @@
  *
  */
 
-/* structure to manage each RPC transaction */
+struct v9fs_mux_data;
 
-struct v9fs_rpcreq {
-	struct v9fs_fcall *tcall;
-	struct v9fs_fcall *rcall;
-	int err;	/* error code if response failed */
+/**
+ * v9fs_mux_req_callback - callback function that is called when the
+ * response of a request is received. The callback is called from
+ * a workqueue and shouldn't block.
+ *
+ * @a - the pointer that was specified when the request was send to be
+ *      passed to the callback
+ * @tc - request call
+ * @rc - response call
+ * @err - error code (non-zero if error occured)
+ */
+typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
+	struct v9fs_fcall *rc, int err);
+
+int v9fs_mux_global_init(void);
+void v9fs_mux_global_exit(void);
 
-	/* XXX - could we put scatter/gather buffers here? */
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
+	unsigned char *extended);
+void v9fs_mux_destroy(struct v9fs_mux_data *);
 
-	struct list_head next;
-};
+int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
+struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
+int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+	v9fs_mux_req_callback cb, void *a);
 
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
-long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
-		  struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
+void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
+int v9fs_errstr2errno(char *errstr, int len);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 63b58ce98ff4..1a28ef97a3d1 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -3,6 +3,7 @@
  *
  * File Descriptor Transport Layer
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 		return -ENOPROTOOPT;
 	}
 
-	sema_init(&trans->writelock, 1);
-	sema_init(&trans->readlock, 1);
-
 	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
 
 	if (!ts)
@@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
 	if (!trans)
 		return;
 
-	trans->status = Disconnected;
-	ts = trans->priv;
+	ts = xchg(&trans->priv, NULL);
 
 	if (!ts)
 		return;
 
+	trans->status = Disconnected;
 	if (ts->in_file)
 		fput(ts->in_file);
 
@@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
 	kfree(ts);
 }
 
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+{
+	int ret, n;
+	struct v9fs_trans_fd *ts;
+	mm_segment_t oldfs;
+
+	if (!trans)
+		return -EIO;
+
+	ts = trans->priv;
+	if (trans->status != Connected || !ts)
+		return -EIO;
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+
+	if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
+		ret = -EIO;
+		goto end;
+	}
+
+	ret = ts->in_file->f_op->poll(ts->in_file, pt);
+
+	if (ts->out_file != ts->in_file) {
+		if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
+			ret = -EIO;
+			goto end;
+		}
+
+		n = ts->out_file->f_op->poll(ts->out_file, pt);
+
+		ret &= ~POLLOUT;
+		n &= ~POLLIN;
+
+		ret |= n;
+	}
+
+end:
+	set_fs(oldfs);
+	return ret;
+}
+
+
 struct v9fs_transport v9fs_trans_fd = {
 	.init = v9fs_fd_init,
 	.write = v9fs_fd_send,
 	.read = v9fs_fd_recv,
 	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
 };
 
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index 6a9a75d40f73..44e830697acb 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -3,6 +3,7 @@
  *
  * Socket Transport Layer
  *
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
  *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
@@ -36,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <linux/file.h>
 
 #include "debug.h"
 #include "v9fs.h"
@@ -45,6 +47,7 @@
 
 struct v9fs_trans_sock {
 	struct socket *s;
+	struct file *filp;
 };
 
 /**
@@ -57,41 +60,26 @@ struct v9fs_trans_sock {
 
 static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 {
-	struct msghdr msg;
-	struct kvec iov;
-	int result;
-	mm_segment_t oldfs;
-	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+	int ret;
+	struct v9fs_trans_sock *ts;
 
-	if (trans->status == Disconnected)
+	if (!trans || trans->status == Disconnected) {
+		dprintk(DEBUG_ERROR, "disconnected ...\n");
 		return -EREMOTEIO;
+	}
 
-	result = -EINVAL;
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-
-	iov.iov_base = v;
-	iov.iov_len = len;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iovlen = 1;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_namelen = 0;
-	msg.msg_flags = MSG_NOSIGNAL;
+	ts = trans->priv;
 
-	result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+	if (!(ts->filp->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking read ...\n");
 
-	dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
-	set_fs(oldfs);
-
-	if (result <= 0) {
-		if (result != -ERESTARTSYS)
+	ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
+	if (ret <= 0) {
+		if (ret != -ERESTARTSYS && ret != -EAGAIN)
 			trans->status = Disconnected;
 	}
 
-	return result;
+	return ret;
 }
 
 /**
@@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 
 static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
 {
-	struct kvec iov;
-	struct msghdr msg;
-	int result = -1;
+	int ret;
 	mm_segment_t oldfs;
-	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+	struct v9fs_trans_sock *ts;
 
-	dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
-	dump_data(v, len);
+	if (!trans || trans->status == Disconnected) {
+		dprintk(DEBUG_ERROR, "disconnected ...\n");
+		return -EREMOTEIO;
+	}
+
+	ts = trans->priv;
+	if (!ts) {
+		dprintk(DEBUG_ERROR, "no transport ...\n");
+		return -EREMOTEIO;
+	}
 
-	down(&trans->writelock);
+	if (!(ts->filp->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking write ...\n");
 
 	oldfs = get_fs();
 	set_fs(get_ds());
-	iov.iov_base = v;
-	iov.iov_len = len;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iovlen = 1;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_namelen = 0;
-	msg.msg_flags = MSG_NOSIGNAL;
-	result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+	ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
 	set_fs(oldfs);
 
-	if (result < 0) {
-		if (result != -ERESTARTSYS)
+	if (ret < 0) {
+		if (ret != -ERESTARTSYS)
 			trans->status = Disconnected;
 	}
 
-	up(&trans->writelock);
-	return result;
+	return ret;
+}
+
+static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
+	struct poll_table_struct *pt) {
+
+	int ret;
+	struct v9fs_trans_sock *ts;
+	mm_segment_t oldfs;
+
+	if (!trans) {
+		dprintk(DEBUG_ERROR, "no transport\n");
+		return -EIO;
+	}
+
+	ts = trans->priv;
+	if (trans->status != Connected || !ts) {
+		dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
+		return -EIO;
+	}
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+
+	if (!ts->filp->f_op || !ts->filp->f_op->poll) {
+		dprintk(DEBUG_ERROR, "no poll operation\n");
+		ret = -EIO;
+		goto end;
+	}
+
+	ret = ts->filp->f_op->poll(ts->filp, pt);
+
+end:
+	set_fs(oldfs);
+	return ret;
 }
 
+
 /**
  * v9fs_tcp_init - initialize TCP socket
  * @v9ses: session information
@@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 	int rc = 0;
 	struct v9fs_trans_sock *ts = NULL;
 	struct v9fs_transport *trans = v9ses->transport;
+	int fd;
 
-	sema_init(&trans->writelock, 1);
-	sema_init(&trans->readlock, 1);
+	trans->status = Disconnected;
 
 	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
 
@@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 
 	trans->priv = ts;
 	ts->s = NULL;
+	ts->filp = NULL;
 
 	if (!addr)
 		return -EINVAL;
@@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 		return rc;
 	}
 	csocket->sk->sk_allocation = GFP_NOIO;
+
+	fd = sock_map_fd(csocket);
+	if (fd < 0) {
+		sock_release(csocket);
+		kfree(ts);
+		trans->priv = NULL;
+		return fd;
+	}
+
 	ts->s = csocket;
+	ts->filp = fget(fd);
+	ts->filp->f_flags |= O_NONBLOCK;
 	trans->status = Connected;
 
 	return 0;
@@ -203,7 +235,7 @@ static int
 v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 	       char *data)
 {
-	int rc;
+	int rc, fd;
 	struct socket *csocket;
 	struct sockaddr_un sun_server;
 	struct v9fs_transport *trans;
@@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 	csocket = NULL;
 	trans = v9ses->transport;
 
+	trans->status = Disconnected;
+
 	if (strlen(dev_name) > UNIX_PATH_MAX) {
 		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
 			dev_name);
@@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 
 	trans->priv = ts;
 	ts->s = NULL;
-
-	sema_init(&trans->writelock, 1);
-	sema_init(&trans->readlock, 1);
+	ts->filp = NULL;
 
 	sun_server.sun_family = PF_UNIX;
 	strcpy(sun_server.sun_path, dev_name);
@@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 		return rc;
 	}
 	csocket->sk->sk_allocation = GFP_NOIO;
+
+	fd = sock_map_fd(csocket);
+	if (fd < 0) {
+		sock_release(csocket);
+		kfree(ts);
+		trans->priv = NULL;
+		return fd;
+	}
+
 	ts->s = csocket;
+	ts->filp = fget(fd);
+	ts->filp->f_flags |= O_NONBLOCK;
 	trans->status = Connected;
 
 	return 0;
@@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
 
 	ts = trans->priv;
 
-	if ((ts) && (ts->s)) {
-		dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
-		sock_release(ts->s);
+	if ((ts) && (ts->filp)) {
+		fput(ts->filp);
+		ts->filp = NULL;
 		ts->s = NULL;
 		trans->status = Disconnected;
-		dprintk(DEBUG_TRANS, "socket closed\n");
 	}
 
 	kfree(ts);
@@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = {
 	.write = v9fs_sock_send,
 	.read = v9fs_sock_recv,
 	.close = v9fs_sock_close,
+	.poll = v9fs_sock_poll,
 };
 
 struct v9fs_transport v9fs_trans_unix = {
@@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = {
 	.write = v9fs_sock_send,
 	.read = v9fs_sock_recv,
 	.close = v9fs_sock_close,
+	.poll = v9fs_sock_poll,
 };
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 9e9cd418efd5..91fcdb94b361 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -3,6 +3,7 @@
  *
  * Transport Definition
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -31,14 +32,13 @@ enum v9fs_transport_status {
 
 struct v9fs_transport {
 	enum v9fs_transport_status status;
-	struct semaphore writelock;
-	struct semaphore readlock;
 	void *priv;
 
 	int (*init) (struct v9fs_session_info *, const char *, char *);
 	int (*write) (struct v9fs_transport *, void *, int);
 	int (*read) (struct v9fs_transport *, void *, int);
 	void (*close) (struct v9fs_transport *);
+	unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
 };
 
 extern struct v9fs_transport v9fs_trans_tcp;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 418c3743fdee..5250c428fc1f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -37,7 +37,6 @@
 #include "v9fs_vfs.h"
 #include "transport.h"
 #include "mux.h"
-#include "conv.h"
 
 /* TODO: sysfs or debugfs interface */
 int v9fs_debug_level = 0;	/* feature-rific global debug level  */
@@ -213,7 +212,8 @@ retry:
 		return -1;
 	}
 
-	error = idr_get_new(&p->pool, NULL, &i);
+	/* no need to store exactly p, we just need something non-null */
+	error = idr_get_new(&p->pool, p, &i);
 	up(&p->lock);
 
 	if (error == -EAGAIN)
@@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p)
 }
 
 /**
+ * v9fs_check_idpool - check if the specified id is available
+ * @id - id to check
+ * @p - pool
+ */
+int v9fs_check_idpool(int id, struct v9fs_idpool *p)
+{
+	return idr_find(&p->pool, id) != NULL;
+}
+
+/**
  * v9fs_session_init - initialize session
  * @v9ses: session information structure
  * @dev_name: device being mounted
@@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	int n = 0;
 	int newfid = -1;
 	int retval = -EINVAL;
+	struct v9fs_str *version;
 
 	v9ses->name = __getname();
 	if (!v9ses->name)
@@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	/* id pools that are session-dependent: FIDs and TIDs */
 	idr_init(&v9ses->fidpool.pool);
 	init_MUTEX(&v9ses->fidpool.lock);
-	idr_init(&v9ses->tidpool.pool);
-	init_MUTEX(&v9ses->tidpool.lock);
-
 
 	switch (v9ses->proto) {
 	case PROTO_TCP:
@@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	v9ses->shutdown = 0;
 	v9ses->session_hung = 0;
 
-	if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+	v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
+		&v9ses->extended);
+
+	if (IS_ERR(v9ses->mux)) {
+		retval = PTR_ERR(v9ses->mux);
+		v9ses->mux = NULL;
 		dprintk(DEBUG_ERROR, "problem initializing mux\n");
 		goto SessCleanUp;
 	}
@@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 			goto FreeFcall;
 		}
 
-		/* Really should check for 9P1 and report error */
-		if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+		version = &fcall->params.rversion.version;
+		if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
 			dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
 			v9ses->extended = 1;
-		} else {
+		} else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
 			dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
 			v9ses->extended = 0;
+		} else {
+			retval = -EREMOTEIO;
+			goto FreeFcall;
 		}
 
 		n = fcall->params.rversion.msize;
@@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	}
 
 	if (v9ses->afid != ~0) {
-		if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+		if (v9fs_t_clunk(v9ses, v9ses->afid))
 			dprintk(DEBUG_ERROR, "clunk failed\n");
 	}
 
@@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 
 void v9fs_session_close(struct v9fs_session_info *v9ses)
 {
-	if (v9ses->recvproc) {
-		send_sig(SIGKILL, v9ses->recvproc, 1);
-		wait_for_completion(&v9ses->proccmpl);
+	if (v9ses->mux) {
+		v9fs_mux_destroy(v9ses->mux);
+		v9ses->mux = NULL;
 	}
 
-	if (v9ses->transport)
+	if (v9ses->transport) {
 		v9ses->transport->close(v9ses->transport);
+		kfree(v9ses->transport);
+		v9ses->transport = NULL;
+	}
 
 	__putname(v9ses->name);
 	__putname(v9ses->remotename);
@@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
  * 	and cancel all pending requests.
  */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+	dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
 	v9ses->transport->status = Disconnected;
-	v9fs_mux_cancel_requests(v9ses, -EIO);
+	v9fs_mux_cancel(v9ses->mux, -EIO);
 }
 
 extern int v9fs_error_init(void);
@@ -433,11 +453,17 @@ extern int v9fs_error_init(void);
 
 static int __init init_v9fs(void)
 {
+	int ret;
+
 	v9fs_error_init();
 
 	printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
 
-	return register_filesystem(&v9fs_fs_type);
+	ret = v9fs_mux_global_init();
+	if (!ret)
+		ret = register_filesystem(&v9fs_fs_type);
+
+	return ret;
 }
 
 /**
@@ -447,6 +473,7 @@ static int __init init_v9fs(void)
 
 static void __exit exit_v9fs(void)
 {
+	v9fs_mux_global_exit();
 	unregister_filesystem(&v9fs_fs_type);
 }
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 45dcef42bdd6..f337da7a0eec 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -57,24 +57,14 @@ struct v9fs_session_info {
 
 	/* book keeping */
 	struct v9fs_idpool fidpool;	/* The FID pool for file descriptors */
-	struct v9fs_idpool tidpool;	/* The TID pool for transactions ids */
 
-	/* transport information */
 	struct v9fs_transport *transport;
+	struct v9fs_mux_data *mux;
 
 	int inprogress;		/* session in progress => true */
 	int shutdown;		/* session shutting down. no more attaches. */
 	unsigned char session_hung;
-
-	/* mux private data */
-	struct v9fs_fcall *curfcall;
-	wait_queue_head_t read_wait;
-	struct completion fcread;
-	struct completion proccmpl;
-	struct task_struct *recvproc;
-
-	spinlock_t muxlock;
-	struct list_head mux_fcalls;
+	struct dentry *debugfs_dir;
 };
 
 /* possible values of ->proto */
@@ -84,11 +74,14 @@ enum {
 	PROTO_FD,
 };
 
+extern struct dentry *v9fs_debugfs_root;
+
 int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
 struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
 int v9fs_get_idpool(struct v9fs_idpool *p);
 void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+int v9fs_check_idpool(int id, struct v9fs_idpool *p);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 
 #define V9FS_MAGIC 0x01021997
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 2f2cea7ee3e7..c78502ad00ed 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,9 +45,8 @@ extern struct dentry_operations v9fs_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 ino_t v9fs_qid2ino(struct v9fs_qid *qid);
-void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
-		       struct super_block *);
+void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
 void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a6aa947de0f9..2dd806dac9f1 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 /**
@@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
 
 void v9fs_dentry_release(struct dentry *dentry)
 {
+	int err;
+
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
 	if (dentry->d_fsdata != NULL) {
 		struct list_head *fid_list = dentry->d_fsdata;
 		struct v9fs_fid *temp = NULL;
 		struct v9fs_fid *current_fid = NULL;
-		struct v9fs_fcall *fcall = NULL;
 
 		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-			if (v9fs_t_clunk
-			    (current_fid->v9ses, current_fid->fid, &fcall))
-				dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-					FCALL_ERROR(fcall));
+			err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
 
-			v9fs_put_idpool(current_fid->fid,
-					&current_fid->v9ses->fidpool);
+			if (err < 0)
+				dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
+					err, dentry->d_iname);
 
-			kfree(fcall);
 			v9fs_fid_destroy(current_fid);
 		}
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 57a43b8feef5..ae6d032b9b59 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -37,8 +37,8 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "v9fs_vfs.h"
 #include "conv.h"
+#include "v9fs_vfs.h"
 #include "fid.h"
 
 /**
@@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 	struct v9fs_fid *file = filp->private_data;
-	unsigned int i, n;
+	unsigned int i, n, s;
 	int fid = -1;
 	int ret = 0;
-	struct v9fs_stat *mi = NULL;
+	struct v9fs_stat stat;
 	int over = 0;
 
 	dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
 
 	fid = file->fid;
 
-	mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
-	if (!mi)
-		return -ENOMEM;
-
 	if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
 		kfree(file->rdir_fcall);
 		file->rdir_fcall = NULL;
@@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		n = file->rdir_fcall->params.rread.count;
 		i = file->rdir_fpos;
 		while (i < n) {
-			int s = v9fs_deserialize_stat(v9ses,
-				  file->rdir_fcall->params.rread.data + i,
-			          n - i, mi, v9ses->maxdata);
+			s = v9fs_deserialize_stat(
+				file->rdir_fcall->params.rread.data + i,
+				n - i, &stat, v9ses->extended);
 
 			if (s == 0) {
 				dprintk(DEBUG_ERROR,
-					"error while deserializing mistat\n");
+					"error while deserializing stat\n");
 				ret = -EIO;
 				goto FreeStructs;
 			}
 
-			over = filldir(dirent, mi->name, strlen(mi->name),
-				    filp->f_pos, v9fs_qid2ino(&mi->qid),
-				    dt_type(mi));
+			over = filldir(dirent, stat.name.str, stat.name.len,
+				    filp->f_pos, v9fs_qid2ino(&stat.qid),
+				    dt_type(&stat));
 
 			if (over) {
 				file->rdir_fpos = i;
@@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 	while (!over) {
 		ret = v9fs_t_read(v9ses, fid, filp->f_pos,
-					    v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+			v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
 		if (ret < 0) {
 			dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
 				ret, fcall);
@@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		n = ret;
 		i = 0;
 		while (i < n) {
-			int s = v9fs_deserialize_stat(v9ses,
-			          fcall->params.rread.data + i, n - i, mi,
-			          v9ses->maxdata);
+			s = v9fs_deserialize_stat(fcall->params.rread.data + i,
+				n - i, &stat, v9ses->extended);
 
 			if (s == 0) {
 				dprintk(DEBUG_ERROR,
-					"error while deserializing mistat\n");
+					"error while deserializing stat\n");
 				return -EIO;
 			}
 
-			over = filldir(dirent, mi->name, strlen(mi->name),
-				    filp->f_pos, v9fs_qid2ino(&mi->qid),
-				    dt_type(mi));
+			over = filldir(dirent, stat.name.str, stat.name.len,
+				    filp->f_pos, v9fs_qid2ino(&stat.qid),
+				    dt_type(&stat));
 
 			if (over) {
 				file->rdir_fcall = fcall;
@@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
       FreeStructs:
 	kfree(fcall);
-	kfree(mi);
 	return ret;
 }
 
@@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 		fid->fid);
 	fidnum = fid->fid;
 
-	filemap_fdatawrite(inode->i_mapping);
-	filemap_fdatawait(inode->i_mapping);
+	filemap_write_and_wait(inode->i_mapping);
 
 	if (fidnum >= 0) {
 		dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
 			fid->fid);
 
-		if (v9fs_t_clunk(v9ses, fidnum, NULL))
+		if (v9fs_t_clunk(v9ses, fidnum))
 			dprintk(DEBUG_ERROR, "clunk failed\n");
 
-		v9fs_put_idpool(fid->fid, &v9ses->fidpool);
-
 		kfree(fid->rdir_fcall);
 		kfree(fid);
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 89c849da8504..6852f0eb96ed 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
+#include <linux/version.h>
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
@@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 		result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
 		if (result < 0) {
-			dprintk(DEBUG_ERROR,
-				"open failed, open_mode 0x%x: %s\n", open_mode,
-				FCALL_ERROR(fcall));
+			PRINT_FCALL_ERROR("open failed", fcall);
 			kfree(fcall);
 			return result;
 		}
@@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 		return -ENOLCK;
 
 	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
-		filemap_fdatawrite(inode->i_mapping);
-		filemap_fdatawait(inode->i_mapping);
+		filemap_write_and_wait(inode->i_mapping);
 		invalidate_inode_pages(&inode->i_data);
 	}
 
@@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	int result = -EIO;
 	int rsize = 0;
 	int total = 0;
-	char *buf;
 
 	dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
 		(int)*offset);
@@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	if (v9fid->iounit != 0 && rsize > v9fid->iounit)
 		rsize = v9fid->iounit;
 
-	buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
 	do {
 		if (count < rsize)
 			rsize = count;
 
-		result = copy_from_user(buf, data, rsize);
-		if (result) {
-			dprintk(DEBUG_ERROR, "Problem copying from user\n");
-			kfree(buf);
-			return -EFAULT;
-		}
-
-		dump_data(buf, rsize);
-		result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
+		result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
 		if (result < 0) {
-			eprintk(KERN_ERR, "error while writing: %s(%d)\n",
-				FCALL_ERROR(fcall), result);
+			PRINT_FCALL_ERROR("error while writing", fcall);
 			kfree(fcall);
-			kfree(buf);
 			return result;
 		} else
 			*offset += result;
@@ -306,7 +289,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
 		total += result;
 	} while (count);
 
-	kfree(buf);
 	return total;
 }
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0ea965c3bb7d..a17b28854288 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 static struct inode_operations v9fs_dir_inode_operations;
@@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 }
 
 /**
- * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * v9fs_blank_wstat - helper function to setup a 9P stat structure
  * @v9ses: 9P session info (for determining extended mode)
- * @mistat: structure to initialize
+ * @wstat: structure to initialize
  *
  */
 
 static void
-v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+v9fs_blank_wstat(struct v9fs_wstat *wstat)
 {
-	mistat->type = ~0;
-	mistat->dev = ~0;
-	mistat->qid.type = ~0;
-	mistat->qid.version = ~0;
-	*((long long *)&mistat->qid.path) = ~0;
-	mistat->mode = ~0;
-	mistat->atime = ~0;
-	mistat->mtime = ~0;
-	mistat->length = ~0;
-	mistat->name = mistat->data;
-	mistat->uid = mistat->data;
-	mistat->gid = mistat->data;
-	mistat->muid = mistat->data;
-	if (v9ses->extended) {
-		mistat->n_uid = ~0;
-		mistat->n_gid = ~0;
-		mistat->n_muid = ~0;
-		mistat->extension = mistat->data;
-	}
-	*mistat->data = 0;
-}
-
-/**
- * v9fs_mistat2unix - convert mistat to unix stat
- * @mistat: Plan 9 metadata (mistat) structure
- * @buf: unix metadata (stat) structure to populate
- * @sb: superblock
- *
- */
-
-static void
-v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
-		 struct super_block *sb)
-{
-	struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
-
-	buf->st_nlink = 1;
-
-	buf->st_atime = mistat->atime;
-	buf->st_mtime = mistat->mtime;
-	buf->st_ctime = mistat->mtime;
-
-	buf->st_uid = (unsigned short)-1;
-	buf->st_gid = (unsigned short)-1;
-
-	if (v9ses && v9ses->extended) {
-		/* TODO: string to uid mapping via user-space daemon */
-		if (mistat->n_uid != -1)
-			sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
-
-		if (mistat->n_gid != -1)
-			sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
-	}
-
-	if (buf->st_uid == (unsigned short)-1)
-		buf->st_uid = v9ses->uid;
-	if (buf->st_gid == (unsigned short)-1)
-		buf->st_gid = v9ses->gid;
-
-	buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
-	if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
-		char type = 0;
-		int major = -1;
-		int minor = -1;
-		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
-		switch (type) {
-		case 'c':
-			buf->st_mode &= ~S_IFBLK;
-			buf->st_mode |= S_IFCHR;
-			break;
-		case 'b':
-			break;
-		default:
-			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
-				type, mistat->extension);
-		};
-		buf->st_rdev = MKDEV(major, minor);
-	} else
-		buf->st_rdev = 0;
-
-	buf->st_size = mistat->length;
-
-	buf->st_blksize = sb->s_blocksize;
-	buf->st_blocks =
-	    (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+	wstat->type = ~0;
+	wstat->dev = ~0;
+	wstat->qid.type = ~0;
+	wstat->qid.version = ~0;
+	*((long long *)&wstat->qid.path) = ~0;
+	wstat->mode = ~0;
+	wstat->atime = ~0;
+	wstat->mtime = ~0;
+	wstat->length = ~0;
+	wstat->name = NULL;
+	wstat->uid = NULL;
+	wstat->gid = NULL;
+	wstat->muid = NULL;
+	wstat->n_uid = ~0;
+	wstat->n_gid = ~0;
+	wstat->n_muid = ~0;
+	wstat->extension = NULL;
 }
 
 /**
@@ -312,12 +243,12 @@ v9fs_create(struct inode *dir,
 	struct inode *file_inode = NULL;
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_qid qid;
-	struct stat newstat;
 	int dirfidnum = -1;
 	long newfid = -1;
 	int result = 0;
 	unsigned int iounit = 0;
 	int wfidno = -1;
+	int err;
 
 	perm = unixmode2p9mode(v9ses, perm);
 
@@ -349,57 +280,64 @@ v9fs_create(struct inode *dir,
 
 	result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
 	if (result < 0) {
-		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		PRINT_FCALL_ERROR("clone error", fcall);
 		v9fs_put_idpool(newfid, &v9ses->fidpool);
 		newfid = -1;
 		goto CleanUpFid;
 	}
 
 	kfree(fcall);
+	fcall = NULL;
 
 	result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
 			       perm, open_mode, &fcall);
 	if (result < 0) {
-		dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
-			FCALL_ERROR(fcall), result);
-
+		PRINT_FCALL_ERROR("create fails", fcall);
 		goto CleanUpFid;
 	}
 
 	iounit = fcall->params.rcreate.iounit;
 	qid = fcall->params.rcreate.qid;
 	kfree(fcall);
+	fcall = NULL;
 
-	fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
-	dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
-	if (!fid) {
-		result = -ENOMEM;
-		goto CleanUpFid;
-	}
+	if (!(perm&V9FS_DMDIR)) {
+		fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
+		dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
+		if (!fid) {
+			result = -ENOMEM;
+			goto CleanUpFid;
+		}
 
-	fid->qid = qid;
-	fid->iounit = iounit;
+		fid->qid = qid;
+		fid->iounit = iounit;
+	} else {
+		err = v9fs_t_clunk(v9ses, newfid);
+		newfid = -1;
+		if (err < 0)
+			dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err);
+	}
 
 	/* walk to the newly created file and put the fid in the dentry */
 	wfidno = v9fs_get_idpool(&v9ses->fidpool);
-	if (newfid < 0) {
+	if (wfidno < 0) {
 		eprintk(KERN_WARNING, "no free fids available\n");
 		return -ENOSPC;
 	}
 
 	result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
-		(char *) file_dentry->d_name.name, NULL);
+		(char *) file_dentry->d_name.name, &fcall);
 	if (result < 0) {
-		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		PRINT_FCALL_ERROR("clone error", fcall);
 		v9fs_put_idpool(wfidno, &v9ses->fidpool);
 		wfidno = -1;
 		goto CleanUpFid;
 	}
+	kfree(fcall);
+	fcall = NULL;
 
 	if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
-		if (!v9fs_t_clunk(v9ses, newfid, &fcall)) {
-			v9fs_put_idpool(wfidno, &v9ses->fidpool);
-		}
+		v9fs_put_idpool(wfidno, &v9ses->fidpool);
 
 		goto CleanUpFid;
 	}
@@ -409,62 +347,43 @@ v9fs_create(struct inode *dir,
 	    (perm & V9FS_DMDEVICE))
 		return 0;
 
-	result = v9fs_t_stat(v9ses, newfid, &fcall);
+	result = v9fs_t_stat(v9ses, wfidno, &fcall);
 	if (result < 0) {
-		dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
-			result);
+		PRINT_FCALL_ERROR("stat error", fcall);
 		goto CleanUpFid;
 	}
 
-	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
 
-	file_inode = v9fs_get_inode(sb, newstat.st_mode);
+	file_inode = v9fs_get_inode(sb,
+		p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode));
+
 	if ((!file_inode) || IS_ERR(file_inode)) {
 		dprintk(DEBUG_ERROR, "create inode failed\n");
 		result = -EBADF;
 		goto CleanUpFid;
 	}
 
-	v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+	v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb);
 	kfree(fcall);
 	fcall = NULL;
 	file_dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(file_dentry, file_inode);
 
-	if (perm & V9FS_DMDIR) {
-		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
-			v9fs_put_idpool(newfid, &v9ses->fidpool);
-		else
-			dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
-				FCALL_ERROR(fcall));
-		kfree(fcall);
-		fid->fidopen = 0;
-		fid->fidcreate = 0;
-		d_drop(file_dentry);
-	}
-
 	return 0;
 
       CleanUpFid:
 	kfree(fcall);
+	fcall = NULL;
 
 	if (newfid >= 0) {
-		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
-			v9fs_put_idpool(newfid, &v9ses->fidpool);
-		else
-			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-				FCALL_ERROR(fcall));
-
-		kfree(fcall);
+ 		err = v9fs_t_clunk(v9ses, newfid);
+ 		if (err < 0)
+ 			dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
 	}
 	if (wfidno >= 0) {
-		if (!v9fs_t_clunk(v9ses, wfidno, &fcall))
-			v9fs_put_idpool(wfidno, &v9ses->fidpool);
-		else
-			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-				FCALL_ERROR(fcall));
-
-		kfree(fcall);
+ 		err = v9fs_t_clunk(v9ses, wfidno);
+ 		if (err < 0)
+ 			dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
 	}
 	return result;
 }
@@ -509,10 +428,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	}
 
 	result = v9fs_t_remove(v9ses, fid, &fcall);
-	if (result < 0)
-		dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
-			FCALL_ERROR(fcall), result);
-	else {
+	if (result < 0) {
+		PRINT_FCALL_ERROR("remove fails", fcall);
+	} else {
 		v9fs_put_idpool(fid, &v9ses->fidpool);
 		v9fs_fid_destroy(v9fid);
 	}
@@ -567,7 +485,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct v9fs_fid *fid;
 	struct inode *inode;
 	struct v9fs_fcall *fcall = NULL;
-	struct stat newstat;
 	int dirfidnum = -1;
 	int newfid = -1;
 	int result = 0;
@@ -620,8 +537,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto FreeFcall;
 	}
 
-	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
-	inode = v9fs_get_inode(sb, newstat.st_mode);
+	inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
+		fcall->params.rstat.stat.mode));
 
 	if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
 		eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
@@ -631,7 +548,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto FreeFcall;
 	}
 
-	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
 
 	fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
 	if (fid == NULL) {
@@ -640,10 +557,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto FreeFcall;
 	}
 
-	fid->qid = fcall->params.rstat.stat->qid;
+	fid->qid = fcall->params.rstat.stat.qid;
 
 	dentry->d_op = &v9fs_dentry_operations;
-	v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 
 	d_add(dentry, inode);
 	kfree(fcall);
@@ -699,7 +616,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    v9fs_fid_lookup(old_dentry->d_parent);
 	struct v9fs_fid *newdirfid =
 	    v9fs_fid_lookup(new_dentry->d_parent);
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_wstat wstat;
 	struct v9fs_fcall *fcall = NULL;
 	int fid = -1;
 	int olddirfidnum = -1;
@@ -708,9 +625,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	dprintk(DEBUG_VFS, "\n");
 
-	if (!mistat)
-		return -ENOMEM;
-
 	if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
 		dprintk(DEBUG_ERROR, "problem with arguments\n");
 		return -EBADF;
@@ -734,33 +648,22 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto FreeFcallnBail;
 	}
 
-	v9fs_blank_mistat(v9ses, mistat);
+	v9fs_blank_wstat(&wstat);
+	wstat.muid = v9ses->name;
+	wstat.name = (char *) new_dentry->d_name.name;
 
-	strcpy(mistat->data + 1, v9ses->name);
-	mistat->name = mistat->data + 1 + strlen(v9ses->name);
-
-	if (new_dentry->d_name.len >
-	    (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
-		dprintk(DEBUG_ERROR, "new name too long\n");
-		goto FreeFcallnBail;
-	}
-
-	strcpy(mistat->name, new_dentry->d_name.name);
-	retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+	retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
 
       FreeFcallnBail:
-	kfree(mistat);
-
 	if (retval < 0)
-		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-			FCALL_ERROR(fcall));
+		PRINT_FCALL_ERROR("wstat error", fcall);
 
 	kfree(fcall);
 	return retval;
 }
 
 /**
- * v9fs_vfs_getattr - retreive file metadata
+ * v9fs_vfs_getattr - retrieve file metadata
  * @mnt - mount information
  * @dentry - file to get attributes on
  * @stat - metadata structure to populate
@@ -788,7 +691,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	if (err < 0)
 		dprintk(DEBUG_ERROR, "stat error\n");
 	else {
-		v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+		v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
 				  dentry->d_inode->i_sb);
 		generic_fillattr(dentry->d_inode, stat);
 	}
@@ -809,57 +712,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
 	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
 	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_wstat wstat;
 	int res = -EPERM;
 
 	dprintk(DEBUG_VFS, "\n");
 
-	if (!mistat)
-		return -ENOMEM;
-
 	if (!fid) {
 		dprintk(DEBUG_ERROR,
 			"Couldn't find fid associated with dentry\n");
 		return -EBADF;
 	}
 
-	v9fs_blank_mistat(v9ses, mistat);
+	v9fs_blank_wstat(&wstat);
 	if (iattr->ia_valid & ATTR_MODE)
-		mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+		wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode);
 
 	if (iattr->ia_valid & ATTR_MTIME)
-		mistat->mtime = iattr->ia_mtime.tv_sec;
+		wstat.mtime = iattr->ia_mtime.tv_sec;
 
 	if (iattr->ia_valid & ATTR_ATIME)
-		mistat->atime = iattr->ia_atime.tv_sec;
+		wstat.atime = iattr->ia_atime.tv_sec;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		mistat->length = iattr->ia_size;
+		wstat.length = iattr->ia_size;
 
 	if (v9ses->extended) {
-		char *ptr = mistat->data+1;
-
-		if (iattr->ia_valid & ATTR_UID) {
-			mistat->uid = ptr;
-			ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
-			mistat->n_uid = iattr->ia_uid;
-		}
+		if (iattr->ia_valid & ATTR_UID)
+			wstat.n_uid = iattr->ia_uid;
 
-		if (iattr->ia_valid & ATTR_GID) {
-			mistat->gid = ptr;
-			ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
-			mistat->n_gid = iattr->ia_gid;
-		}
+		if (iattr->ia_valid & ATTR_GID)
+			wstat.n_gid = iattr->ia_gid;
 	}
 
-	res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+	res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
 
 	if (res < 0)
-		dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+		PRINT_FCALL_ERROR("wstat error", fcall);
 
-	kfree(mistat);
 	kfree(fcall);
-
 	if (res >= 0)
 		res = inode_setattr(dentry->d_inode, iattr);
 
@@ -867,51 +757,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 }
 
 /**
- * v9fs_mistat2inode - populate an inode structure with mistat info
- * @mistat: Plan 9 metadata (mistat) structure
+ * v9fs_stat2inode - populate an inode structure with mistat info
+ * @stat: Plan 9 metadata (mistat) structure
  * @inode: inode to populate
  * @sb: superblock of filesystem
  *
  */
 
 void
-v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
-		  struct super_block *sb)
+v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
+	struct super_block *sb)
 {
+	int n;
+	char ext[32];
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
 	inode->i_nlink = 1;
 
-	inode->i_atime.tv_sec = mistat->atime;
-	inode->i_mtime.tv_sec = mistat->mtime;
-	inode->i_ctime.tv_sec = mistat->mtime;
+	inode->i_atime.tv_sec = stat->atime;
+	inode->i_mtime.tv_sec = stat->mtime;
+	inode->i_ctime.tv_sec = stat->mtime;
 
-	inode->i_uid = -1;
-	inode->i_gid = -1;
+	inode->i_uid = v9ses->uid;
+	inode->i_gid = v9ses->gid;
 
 	if (v9ses->extended) {
-		/* TODO: string to uid mapping via user-space daemon */
-		inode->i_uid = mistat->n_uid;
-		inode->i_gid = mistat->n_gid;
-
-		if (mistat->n_uid == -1)
-			sscanf(mistat->uid, "%x", &inode->i_uid);
-
-		if (mistat->n_gid == -1)
-			sscanf(mistat->gid, "%x", &inode->i_gid);
+		inode->i_uid = stat->n_uid;
+		inode->i_gid = stat->n_gid;
 	}
 
-	if (inode->i_uid == -1)
-		inode->i_uid = v9ses->uid;
-	if (inode->i_gid == -1)
-		inode->i_gid = v9ses->gid;
-
-	inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+	inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
 	if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
 		char type = 0;
 		int major = -1;
 		int minor = -1;
-		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+
+		n = stat->extension.len;
+		if (n > sizeof(ext)-1)
+			n = sizeof(ext)-1;
+		memmove(ext, stat->extension.str, n);
+		ext[n] = 0;
+		sscanf(ext, "%c %u %u", &type, &major, &minor);
 		switch (type) {
 		case 'c':
 			inode->i_mode &= ~S_IFBLK;
@@ -920,14 +806,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
 		case 'b':
 			break;
 		default:
-			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
-				type, mistat->extension);
+			dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
+				type, stat->extension.len, stat->extension.str);
 		};
 		inode->i_rdev = MKDEV(major, minor);
 	} else
 		inode->i_rdev = 0;
 
-	inode->i_size = mistat->length;
+	inode->i_size = stat->length;
 
 	inode->i_blksize = sb->s_blocksize;
 	inode->i_blocks =
@@ -955,71 +841,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
 }
 
 /**
- * v9fs_vfs_symlink - helper function to create symlinks
- * @dir: directory inode containing symlink
- * @dentry: dentry for symlink
- * @symname: symlink data
- *
- * See 9P2000.u RFC for more information
- *
- */
-
-static int
-v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
-	int retval = -EPERM;
-	struct v9fs_fid *newfid;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-
-	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
-		symname);
-
-	if (!mistat)
-		return -ENOMEM;
-
-	if (!v9ses->extended) {
-		dprintk(DEBUG_ERROR, "not extended\n");
-		goto FreeFcall;
-	}
-
-	/* issue a create */
-	retval = v9fs_create(dir, dentry, S_IFLNK, 0);
-	if (retval != 0)
-		goto FreeFcall;
-
-	newfid = v9fs_fid_lookup(dentry);
-
-	/* issue a twstat */
-	v9fs_blank_mistat(v9ses, mistat);
-	strcpy(mistat->data + 1, symname);
-	mistat->extension = mistat->data + 1;
-	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-	if (retval < 0) {
-		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-			FCALL_ERROR(fcall));
-		goto FreeFcall;
-	}
-
-	kfree(fcall);
-
-	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-			FCALL_ERROR(fcall));
-		goto FreeFcall;
-	}
-
-	d_drop(dentry);		/* FID - will this also clunk? */
-
-      FreeFcall:
-	kfree(mistat);
-	kfree(fcall);
-
-	return retval;
-}
-
-/**
  * v9fs_readlink - read a symlink's location (internal version)
  * @dentry: dentry for symlink
  * @buffer: buffer to load symlink location into
@@ -1058,16 +879,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	if (!fcall)
 		return -EIO;
 
-	if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+	if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
 		retval = -EINVAL;
 		goto FreeFcall;
 	}
 
 	/* copy extension buffer into buffer */
-	if (strlen(fcall->params.rstat.stat->extension) < buflen)
-		buflen = strlen(fcall->params.rstat.stat->extension);
+	if (fcall->params.rstat.stat.extension.len < buflen)
+		buflen = fcall->params.rstat.stat.extension.len;
 
-	memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+	memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+	buffer[buflen-1] = 0;
 
 	retval = buflen;
 
@@ -1157,6 +979,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 		__putname(s);
 }
 
+static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
+	int mode, const char *extension)
+{
+	int err, retval;
+	struct v9fs_session_info *v9ses;
+	struct v9fs_fcall *fcall;
+	struct v9fs_fid *fid;
+	struct v9fs_wstat wstat;
+
+	v9ses = v9fs_inode2v9ses(dir);
+	retval = -EPERM;
+	fcall = NULL;
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto free_mem;
+	}
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, mode, 0);
+	if (retval != 0)
+		goto free_mem;
+
+	fid = v9fs_fid_get_created(dentry);
+	if (!fid) {
+		dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+		goto free_mem;
+	}
+
+	/* issue a Twstat */
+	v9fs_blank_wstat(&wstat);
+	wstat.muid = v9ses->name;
+	wstat.extension = (char *) extension;
+	retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
+	if (retval < 0) {
+		PRINT_FCALL_ERROR("wstat error", fcall);
+		goto free_mem;
+	}
+
+	err = v9fs_t_clunk(v9ses, fid->fid);
+	if (err < 0) {
+		dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
+		goto free_mem;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+free_mem:
+	kfree(fcall);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+		symname);
+
+	return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
+}
+
 /**
  * v9fs_vfs_link - create a hardlink
  * @old_dentry: dentry for file to link to
@@ -1173,64 +1066,24 @@ static int
 v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	      struct dentry *dentry)
 {
-	int retval = -EPERM;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
-	struct v9fs_fid *newfid = NULL;
-	char *symname = __getname();
+	int retval;
+	struct v9fs_fid *oldfid;
+	char *name;
 
 	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
 		old_dentry->d_name.name);
 
-	if (!v9ses->extended) {
-		dprintk(DEBUG_ERROR, "not extended\n");
-		goto FreeMem;
-	}
-
-	/* get fid of old_dentry */
-	sprintf(symname, "hardlink(%d)\n", oldfid->fid);
-
-	/* issue a create */
-	retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
-	if (retval != 0)
-		goto FreeMem;
-
-	newfid = v9fs_fid_lookup(dentry);
-	if (!newfid) {
-		dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
-		goto FreeMem;
-	}
-
-	/* issue a twstat */
-	v9fs_blank_mistat(v9ses, mistat);
-	strcpy(mistat->data + 1, symname);
-	mistat->extension = mistat->data + 1;
-	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-	if (retval < 0) {
-		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-			FCALL_ERROR(fcall));
-		goto FreeMem;
-	}
-
-	kfree(fcall);
-
-	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-			FCALL_ERROR(fcall));
-		goto FreeMem;
+	oldfid = v9fs_fid_lookup(old_dentry);
+	if (!oldfid) {
+		dprintk(DEBUG_ERROR, "can't find oldfid\n");
+		return -EPERM;
 	}
 
-	d_drop(dentry);		/* FID - will this also clunk? */
-
-	kfree(fcall);
-	fcall = NULL;
+	name = __getname();
+	sprintf(name, "hardlink(%d)\n", oldfid->fid);
+	retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
+	__putname(name);
 
-      FreeMem:
-	kfree(mistat);
-	kfree(fcall);
-	__putname(symname);
 	return retval;
 }
 
@@ -1246,82 +1099,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 static int
 v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
-	int retval = -EPERM;
-	struct v9fs_fid *newfid;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-	char *symname = __getname();
+	int retval;
+	char *name;
 
 	dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
 		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
 
-	if (!mistat)
-		return -ENOMEM;
-
-	if (!new_valid_dev(rdev)) {
-		retval = -EINVAL;
-		goto FreeMem;
-	}
-
-	if (!v9ses->extended) {
-		dprintk(DEBUG_ERROR, "not extended\n");
-		goto FreeMem;
-	}
-
-	/* issue a create */
-	retval = v9fs_create(dir, dentry, mode, 0);
-
-	if (retval != 0)
-		goto FreeMem;
-
-	newfid = v9fs_fid_lookup(dentry);
-	if (!newfid) {
-		dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
-		retval = -EINVAL;
-		goto FreeMem;
-	}
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
 
+	name = __getname();
 	/* build extension */
 	if (S_ISBLK(mode))
-		sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+		sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
 	else if (S_ISCHR(mode))
-		sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+		sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
 	else if (S_ISFIFO(mode))
-		;	/* DO NOTHING */
+		*name = 0;
 	else {
-		retval = -EINVAL;
-		goto FreeMem;
-	}
-
-	if (!S_ISFIFO(mode)) {
-		/* issue a twstat */
-		v9fs_blank_mistat(v9ses, mistat);
-		strcpy(mistat->data + 1, symname);
-		mistat->extension = mistat->data + 1;
-		retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-		if (retval < 0) {
-			dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-				FCALL_ERROR(fcall));
-			goto FreeMem;
-		}
+		__putname(name);
+		return -EINVAL;
 	}
 
-	/* need to update dcache so we show up */
-	kfree(fcall);
-
-	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-			FCALL_ERROR(fcall));
-		goto FreeMem;
-	}
-
-	d_drop(dentry);		/* FID - will this also clunk? */
-
-      FreeMem:
-	kfree(mistat);
-	kfree(fcall);
-	__putname(symname);
+	retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
+	__putname(name);
 
 	return retval;
 }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 82c5b0084079..2c4fa75be025 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,7 +44,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 static void v9fs_clear_inode(struct inode *);
@@ -92,7 +91,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 	sb->s_op = &v9fs_super_ops;
 
 	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
-	    MS_NODIRATIME | MS_NOATIME;
+	    MS_NOATIME;
 }
 
 /**
@@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	dprintk(DEBUG_VFS, " \n");
 
-	v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+	v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
 	if (!v9ses)
 		return ERR_PTR(-ENOMEM);
 
 	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
 		dprintk(DEBUG_ERROR, "problem initiating session\n");
+		kfree(v9ses);
 		return ERR_PTR(newfid);
 	}
 
@@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 	stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
 	if (stat_result < 0) {
 		dprintk(DEBUG_ERROR, "stat error\n");
-		v9fs_t_clunk(v9ses, newfid, NULL);
+		v9fs_t_clunk(v9ses, newfid);
 		v9fs_put_idpool(newfid, &v9ses->fidpool);
 	} else {
 		/* Setup the Root Inode */
@@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 			goto put_back_sb;
 		}
 
-		root_fid->qid = fcall->params.rstat.stat->qid;
+		root_fid->qid = fcall->params.rstat.stat.qid;
 		root->d_inode->i_ino =
-		    v9fs_qid2ino(&fcall->params.rstat.stat->qid);
-		v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+		    v9fs_qid2ino(&fcall->params.rstat.stat.qid);
+		v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
 	}
 
 	kfree(fcall);
diff --git a/fs/Kconfig b/fs/Kconfig
index 382e3b2883d5..ef78e3a42d32 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -798,7 +798,7 @@ config PROC_KCORE
 
 config PROC_VMCORE
         bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP
+        depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
         help
         Exports the dump image of crashed kernel in ELF format.
 
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 175b2e8177c1..f3d3d81eb7e9 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
 config BINFMT_ELF
 	bool "Kernel support for ELF binaries"
-	depends on MMU
+	depends on MMU && (BROKEN || !FRV)
 	default y
 	---help---
 	  ELF (Executable and Linkable Format) is a format for libraries and
diff --git a/fs/Makefile b/fs/Makefile
index 73676111ebbe..1db711319c80 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,11 +10,11 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o
+		ioprio.o pnode.o drop_caches.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
-obj-$(CONFIG_COMPAT)		+= compat.o
+obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
 
 nfsd-$(CONFIG_NFSD)		:= nfsctl.o
 obj-y				+= $(nfsd-y) $(nfsd-m)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 9ebe881c6786..44d439cb69f4 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -244,10 +244,10 @@ affs_put_inode(struct inode *inode)
 	pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
 	affs_free_prealloc(inode);
 	if (atomic_read(&inode->i_count) == 1) {
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		if (inode->i_size != AFFS_I(inode)->mmu_private)
 			affs_truncate(inode);
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 }
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6682d6d7f294..5c61c24dab2a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
 #endif
 
 	/* determine how many magic numbers there should be in this page */
-	latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT);
+	latter = dir->i_size - page_offset(page);
 	if (latter >= PAGE_SIZE)
 		qty = PAGE_SIZE;
 	else
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index 1e691889c4c9..bfdcf19ba3f3 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -18,8 +18,6 @@
 #include "kafsasyncd.h"
 #include "cache.h"
 
-#define __packed __attribute__((packed))
-
 typedef enum {
 	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
 	AFS_VLUPD_PENDING,		/* on pending queue */
@@ -115,7 +113,7 @@ struct afs_volume
 	struct cachefs_cookie	*cache;		/* caching cookie */
 #endif
 	afs_volid_t		vid;		/* volume ID */
-	afs_voltype_t __packed	type;		/* type of volume */
+	afs_voltype_t		type;		/* type of volume */
 	char			type_force;	/* force volume type (suppress R/O -> R/W) */
 	unsigned short		nservers;	/* number of server slots filled */
 	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
diff --git a/fs/aio.c b/fs/aio.c
index 5a28b69ad223..aec2b1916d1b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,7 +29,6 @@
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
-#include <linux/rcuref.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
diff --git a/fs/attr.c b/fs/attr.c
index 67bcd9b14ea5..b34732506f1d 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok);
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
-	int error = 0;
-
-	if (ia_valid & ATTR_SIZE) {
-		if (attr->ia_size != i_size_read(inode)) {
-			error = vmtruncate(inode, attr->ia_size);
-			if (error || (ia_valid == ATTR_SIZE))
-				goto out;
-		} else {
-			/*
-			 * We skipped the truncate but must still update
-			 * timestamps
-			 */
-			ia_valid |= ATTR_MTIME|ATTR_CTIME;
-		}
+
+	if (ia_valid & ATTR_SIZE &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
 	}
 
 	if (ia_valid & ATTR_UID)
@@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
 		inode->i_mode = mode;
 	}
 	mark_inode_dirty(inode);
-out:
-	return error;
+
+	return 0;
 }
 EXPORT_SYMBOL(inode_setattr);
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a1ab1c0ed215..808134a5a2fa 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -229,9 +229,9 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
 	dentry->d_flags |= DCACHE_AUTOFS_PENDING;
 	d_add(dentry, NULL);
 
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	autofs_revalidate(dentry, nd);
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 
 	/*
 	 * If we are still pending, check if we had to handle
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fca83e28edcf..385bed09b0d8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry)
 	struct dentry *child;
 	int ret = 0;
 
-	list_for_each_entry(child, &dentry->d_subdirs, d_child)
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
 		if (simple_positive(child))
 			goto out;
 	ret = 1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index feb6ac427d05..dc39589df165 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -105,7 +105,7 @@ repeat:
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@ resume:
 	}
 
 	if (this_parent != top) {
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -163,7 +163,7 @@ repeat:
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@ cont:
 	}
 
 	if (this_parent != parent) {
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 	/* On exit from the loop expire is set to a dgot dentry
 	 * to expire or it's NULL */
 	while ( next != &root->d_subdirs ) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@ next:
 			expired, (int)expired->d_name.len, expired->d_name.name);
 		spin_lock(&dcache_lock);
 		list_del(&expired->d_parent->d_subdirs);
-		list_add(&expired->d_parent->d_subdirs, &expired->d_child);
+		list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 		spin_unlock(&dcache_lock);
 		return expired;
 	}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 818b37be5153..2d3082854a29 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -91,7 +91,7 @@ repeat:
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - don`t care */
 		if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@ resume:
 	if (this_parent != sbi->root) {
 		struct dentry *dentry = this_parent;
 
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		spin_unlock(&dcache_lock);
 		DPRINTK("parent dentry %p %.*s",
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2a771ec66956..14aa70282e8c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -86,7 +86,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 
 /* Update usage from here to top of tree, so that scan of
    top-level directories will give a useful result */
-static void autofs4_update_usage(struct dentry *dentry)
+static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry->d_sb->s_root;
 
@@ -95,7 +95,7 @@ static void autofs4_update_usage(struct dentry *dentry)
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		if (ino) {
-			update_atime(dentry->d_inode);
+			touch_atime(mnt, dentry);
 			ino->last_used = jiffies;
 		}
 	}
@@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f
 			}
 
 			while(1) {
-				struct dentry *de = list_entry(list, struct dentry, d_child);
+				struct dentry *de = list_entry(list,
+						struct dentry, d_u.d_child);
 
 				if (!d_unhashed(de) && de->d_inode) {
 					spin_unlock(&dcache_lock);
@@ -288,10 +289,10 @@ out:
 	return autofs4_dcache_readdir(file, dirent, filldir);
 }
 
-static int try_to_fill_dentry(struct dentry *dentry, 
-			      struct super_block *sb,
-			      struct autofs_sb_info *sbi, int flags)
+static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
 {
+	struct super_block *sb = mnt->mnt_sb;
+	struct autofs_sb_info *sbi = autofs4_sbi(sb);
 	struct autofs_info *de_info = autofs4_dentry_ino(dentry);
 	int status = 0;
 
@@ -366,7 +367,7 @@ static int try_to_fill_dentry(struct dentry *dentry,
 	/* We don't update the usages for the autofs daemon itself, this
 	   is necessary for recursive autofs mounts */
 	if (!autofs4_oz_mode(sbi))
-		autofs4_update_usage(dentry);
+		autofs4_update_usage(mnt, dentry);
 
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
@@ -391,7 +392,7 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
-			status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags);
+			status = try_to_fill_dentry(nd->mnt, dentry, flags);
 		return status;
 	}
 
@@ -408,14 +409,14 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
-			status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags);
+			status = try_to_fill_dentry(nd->mnt, dentry, flags);
 		return status;
 	}
 	spin_unlock(&dcache_lock);
 
 	/* Update the usage list */
 	if (!oz_mode)
-		autofs4_update_usage(dentry);
+		autofs4_update_usage(nd->mnt, dentry);
 
 	return 1;
 }
@@ -488,9 +489,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	d_add(dentry, NULL);
 
 	if (dentry->d_op && dentry->d_op->d_revalidate) {
-		up(&dir->i_sem);
+		mutex_unlock(&dir->i_mutex);
 		(dentry->d_op->d_revalidate)(dentry, nd);
-		down(&dir->i_sem);
+		mutex_lock(&dir->i_mutex);
 	}
 
 	/*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 72011826f0cb..f312103434d4 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -33,8 +33,6 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
 
-extern void dump_thread(struct pt_regs *, struct user *);
-
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_aout_binary,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f36f2210204f..a4f6f57d91aa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  * If we don't support core dumping, then supply a NULL so we
  * don't even try.
  */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
 #else
 #define elf_core_dump	NULL
@@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 			struct elf_phdr *eppnt, int prot, int type)
 {
 	unsigned long map_addr;
+	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
 
 	down_write(&current->mm->mmap_sem);
-	map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-			   eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
-			   eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
+	/* mmap() will return -EINVAL if given a zero size, but a
+	 * segment with zero filesize is perfectly valid */
+	if (eppnt->p_filesz + pageoffset)
+		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+				   eppnt->p_filesz + pageoffset, prot, type,
+				   eppnt->p_offset - pageoffset);
+	else
+		map_addr = ELF_PAGESTART(addr);
 	up_write(&current->mm->mmap_sem);
 	return(map_addr);
 }
@@ -616,7 +622,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 				goto out_free_file;
 
 			retval = -ENOMEM;
-			elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
+			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 							   GFP_KERNEL);
 			if (!elf_interpreter)
 				goto out_free_file;
@@ -1107,7 +1113,7 @@ out:
  * Note that some platforms still use traditional core dumps and not
  * the ELF core dump.  Each platform can select it as appropriate.
  */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 
 /*
  * ELF core dumper
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e0344f69c79d..5b3076e8ee90 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -187,7 +187,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 				goto error;
 
 			/* read the name of the interpreter into memory */
-			interpreter_name = (char *) kmalloc(phdr->p_filesz, GFP_KERNEL);
+			interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL);
 			if (!interpreter_name)
 				goto error;
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9d6625829b99..b72dc31a0970 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -77,8 +77,6 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
 static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file);
 
-extern void dump_thread(struct pt_regs *, struct user *);
-
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_flat_binary,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 2568eb41cb3a..9ccc7d8275b8 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -588,11 +588,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 		case 2: set_bit(Enabled, &e->flags);
 			break;
 		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
-			down(&root->d_inode->i_sem);
+			mutex_lock(&root->d_inode->i_mutex);
 
 			kill_node(e);
 
-			up(&root->d_inode->i_sem);
+			mutex_unlock(&root->d_inode->i_mutex);
 			dput(root);
 			break;
 		default: return res;
@@ -622,7 +622,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 		return PTR_ERR(e);
 
 	root = dget(sb->s_root);
-	down(&root->d_inode->i_sem);
+	mutex_lock(&root->d_inode->i_mutex);
 	dentry = lookup_one_len(e->name, root, strlen(e->name));
 	err = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
@@ -658,7 +658,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
 	dput(dentry);
 out:
-	up(&root->d_inode->i_sem);
+	mutex_unlock(&root->d_inode->i_mutex);
 	dput(root);
 
 	if (err) {
@@ -703,12 +703,12 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 		case 1: enabled = 0; break;
 		case 2: enabled = 1; break;
 		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
-			down(&root->d_inode->i_sem);
+			mutex_lock(&root->d_inode->i_mutex);
 
 			while (!list_empty(&entries))
 				kill_node(list_entry(entries.next, Node, list));
 
-			up(&root->d_inode->i_sem);
+			mutex_unlock(&root->d_inode->i_mutex);
 			dput(root);
 		default: return res;
 	}
diff --git a/fs/bio.c b/fs/bio.c
index 38d3e8023a07..7b3069589951 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -126,6 +126,7 @@ static void bio_fs_destructor(struct bio *bio)
 inline void bio_init(struct bio *bio)
 {
 	bio->bi_next = NULL;
+	bio->bi_bdev = NULL;
 	bio->bi_flags = 1 << BIO_UPTODATE;
 	bio->bi_rw = 0;
 	bio->bi_vcnt = 0;
@@ -325,10 +326,31 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 	if (unlikely(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (bio->bi_vcnt >= bio->bi_max_vecs)
+	if (((bio->bi_size + len) >> 9) > max_sectors)
 		return 0;
 
-	if (((bio->bi_size + len) >> 9) > max_sectors)
+	/*
+	 * For filesystems with a blocksize smaller than the pagesize
+	 * we will often be called with the same page as last time and
+	 * a consecutive offset.  Optimize this special case.
+	 */
+	if (bio->bi_vcnt > 0) {
+		struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+		if (page == prev->bv_page &&
+		    offset == prev->bv_offset + prev->bv_len) {
+			prev->bv_len += len;
+			if (q->merge_bvec_fn &&
+			    q->merge_bvec_fn(q, bio, prev) < len) {
+				prev->bv_len -= len;
+				return 0;
+			}
+
+			goto done;
+		}
+	}
+
+	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return 0;
 
 	/*
@@ -382,6 +404,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;
 	bio->bi_hw_segments++;
+ done:
 	bio->bi_size += len;
 	return len;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e0df94c37b7e..6e50346fb1ee 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -202,7 +202,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 	loff_t size;
 	loff_t retval;
 
-	down(&bd_inode->i_sem);
+	mutex_lock(&bd_inode->i_mutex);
 	size = i_size_read(bd_inode);
 
 	switch (origin) {
@@ -219,7 +219,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 		}
 		retval = offset;
 	}
-	up(&bd_inode->i_sem);
+	mutex_unlock(&bd_inode->i_mutex);
 	return retval;
 }
 	
diff --git a/fs/buffer.c b/fs/buffer.c
index 5287be18633b..6466bc8a3dc7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -153,14 +153,8 @@ int sync_blockdev(struct block_device *bdev)
 {
 	int ret = 0;
 
-	if (bdev) {
-		int err;
-
-		ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
-		err = filemap_fdatawait(bdev->bd_inode->i_mapping);
-		if (!ret)
-			ret = err;
-	}
+	if (bdev)
+		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
 	return ret;
 }
 EXPORT_SYMBOL(sync_blockdev);
@@ -358,11 +352,11 @@ static long do_fsync(unsigned int fd, int datasync)
 	 * We need to protect against concurrent writers,
 	 * which could cause livelocks in fsync_buffers_list
 	 */
-	down(&mapping->host->i_sem);
+	mutex_lock(&mapping->host->i_mutex);
 	err = file->f_op->fsync(file, file->f_dentry, datasync);
 	if (!ret)
 		ret = err;
-	up(&mapping->host->i_sem);
+	mutex_unlock(&mapping->host->i_mutex);
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
@@ -1768,7 +1762,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	 * handle that here by just cleaning them.
 	 */
 
-	block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
 	head = page_buffers(page);
 	bh = head;
 
@@ -2160,11 +2154,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
  * truncates.  Uses prepare/commit_write to allow the filesystem to
  * deal with the hole.  
  */
-int generic_cont_expand(struct inode *inode, loff_t size)
+static int __generic_cont_expand(struct inode *inode, loff_t size,
+				 pgoff_t index, unsigned int offset)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long index, offset, limit;
+	unsigned long limit;
 	int err;
 
 	err = -EFBIG;
@@ -2176,24 +2171,24 @@ int generic_cont_expand(struct inode *inode, loff_t size)
 	if (size > inode->i_sb->s_maxbytes)
 		goto out;
 
-	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-
-	/* ugh.  in prepare/commit_write, if from==to==start of block, we 
-	** skip the prepare.  make sure we never send an offset for the start
-	** of a block
-	*/
-	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-		offset++;
-	}
-	index = size >> PAGE_CACHE_SHIFT;
 	err = -ENOMEM;
 	page = grab_cache_page(mapping, index);
 	if (!page)
 		goto out;
 	err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
-	if (!err) {
-		err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+	if (err) {
+		/*
+		 * ->prepare_write() may have instantiated a few blocks
+		 * outside i_size.  Trim these off again.
+		 */
+		unlock_page(page);
+		page_cache_release(page);
+		vmtruncate(inode, inode->i_size);
+		goto out;
 	}
+
+	err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+
 	unlock_page(page);
 	page_cache_release(page);
 	if (err > 0)
@@ -2202,6 +2197,36 @@ out:
 	return err;
 }
 
+int generic_cont_expand(struct inode *inode, loff_t size)
+{
+	pgoff_t index;
+	unsigned int offset;
+
+	offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
+
+	/* ugh.  in prepare/commit_write, if from==to==start of block, we
+	** skip the prepare.  make sure we never send an offset for the start
+	** of a block
+	*/
+	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
+		/* caller must handle this extra byte. */
+		offset++;
+	}
+	index = size >> PAGE_CACHE_SHIFT;
+
+	return __generic_cont_expand(inode, size, index, offset);
+}
+
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
+{
+	loff_t pos = size - 1;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
+
+	/* prepare/commit_write can handle even if from==to==start of block. */
+	return __generic_cont_expand(inode, size, index, offset);
+}
+
 /*
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
@@ -2313,7 +2338,7 @@ int generic_commit_write(struct file *file, struct page *page,
 	__block_commit_write(inode,page,from,to);
 	/*
 	 * No need to use i_size_read() here, the i_size
-	 * cannot change under us because we hold i_sem.
+	 * cannot change under us because we hold i_mutex.
 	 */
 	if (pos > inode->i_size) {
 		i_size_write(inode, pos);
@@ -2610,7 +2635,7 @@ int block_truncate_page(struct address_space *mapping,
 	pgoff_t index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
 	unsigned blocksize;
-	pgoff_t iblock;
+	sector_t iblock;
 	unsigned length, pos;
 	struct inode *inode = mapping->host;
 	struct page *page;
@@ -2626,7 +2651,7 @@ int block_truncate_page(struct address_space *mapping,
 		return 0;
 
 	length = blocksize - length;
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
 	
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -3145,6 +3170,7 @@ EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_commit_write);
 EXPORT_SYMBOL(generic_cont_expand);
+EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
 EXPORT_SYMBOL(ll_rw_block);
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h
index decd138f14d4..da2ad5b451ac 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/cifs/cifs_uniupr.h
@@ -242,7 +242,7 @@ static signed char UniCaseRangeLff20[27] = {
 /*
  * Lower Case Range
  */
-const static struct UniCaseRange CifsUniLowerRange[] = {
+static const struct UniCaseRange CifsUniLowerRange[] = {
 	0x0380, 0x03ab, UniCaseRangeL0380,
 	0x0400, 0x042f, UniCaseRangeL0400,
 	0x0490, 0x04cb, UniCaseRangeL0490,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2a13a2bac8f1..e10213b7541e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -860,9 +860,9 @@ static int cifs_oplock_thread(void * dummyarg)
 				DeleteOplockQEntry(oplock_item);
 				/* can not grab inode sem here since it would
 				deadlock when oplock received on delete 
-				since vfs_unlink holds the i_sem across
+				since vfs_unlink holds the i_mutex across
 				the call */
-				/* down(&inode->i_sem);*/
+				/* mutex_lock(&inode->i_mutex);*/
 				if (S_ISREG(inode->i_mode)) {
 					rc = filemap_fdatawrite(inode->i_mapping);
 					if(CIFS_I(inode)->clientCanCacheRead == 0) {
@@ -871,7 +871,7 @@ static int cifs_oplock_thread(void * dummyarg)
 					}
 				} else
 					rc = 0;
-				/* up(&inode->i_sem);*/
+				/* mutex_unlock(&inode->i_mutex);*/
 				if (rc)
 					CIFS_I(inode)->write_behind_rc = rc;
 				cFYI(1,("Oplock flush inode %p rc %d",inode,rc));
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 14a1c72ced92..5ade53d7bca8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 		if (file->f_dentry->d_inode->i_mapping) {
 		/* BB no need to lock inode until after invalidate
 		   since namei code should already have it locked? */
-			filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
-			filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
+			filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
 		}
 		cFYI(1, ("invalidating remote inode since open detected it "
 			 "changed"));
@@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
 		pCifsInode = CIFS_I(inode);
 		if (pCifsInode) {
 			if (can_flush) {
-				filemap_fdatawrite(inode->i_mapping);
-				filemap_fdatawait(inode->i_mapping);
+				filemap_write_and_wait(inode->i_mapping);
 			/* temporarily disable caching while we
 			   go to server to get inode info */
 				pCifsInode->clientCanCacheAll = FALSE;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 411c1f7f84da..3ebce9430f4a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1040,9 +1040,9 @@ int cifs_revalidate(struct dentry *direntry)
 	}
 
 	/* can not grab this sem since kernel filesys locking documentation
-	   indicates i_sem may be taken by the kernel on lookup and rename
-	   which could deadlock if we grab the i_sem here as well */
-/*	down(&direntry->d_inode->i_sem);*/
+	   indicates i_mutex may be taken by the kernel on lookup and rename
+	   which could deadlock if we grab the i_mutex here as well */
+/*	mutex_lock(&direntry->d_inode->i_mutex);*/
 	/* need to write out dirty pages here  */
 	if (direntry->d_inode->i_mapping) {
 		/* do we need to lock inode until after invalidate completes
@@ -1066,7 +1066,7 @@ int cifs_revalidate(struct dentry *direntry)
 			}
 		}
 	}
-/*	up(&direntry->d_inode->i_sem); */
+/*	mutex_unlock(&direntry->d_inode->i_mutex); */
 	
 	kfree(full_path);
 	FreeXid(xid);
@@ -1148,8 +1148,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	/* BB check if we need to refresh inode from server now ? BB */
 
 	/* need to flush data before changing file size on server */
-	filemap_fdatawrite(direntry->d_inode->i_mapping);
-	filemap_fdatawait(direntry->d_inode->i_mapping);
+	filemap_write_and_wait(direntry->d_inode->i_mapping);
 
 	if (attrs->ia_valid & ATTR_SIZE) {
 		/* To avoid spurious oplock breaks from server, in the case of
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 80072fd9b7fa..c607d923350a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
 	spin_lock(&dcache_lock);
 	list_for_each(child, &parent->d_subdirs)
 	{
-		de = list_entry(child, struct dentry, d_child);
+		de = list_entry(child, struct dentry, d_u.d_child);
 		/* don't know what to do with negative dentries */
 		if ( ! de->d_inode ) 
 			continue;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2391766e9c7c..8f1a517f8b4e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 	coda_vfs_stat.readdir++;
 
 	host_inode = host_file->f_dentry->d_inode;
-	down(&host_inode->i_sem);
+	mutex_lock(&host_inode->i_mutex);
 	host_file->f_pos = coda_file->f_pos;
 
 	if (!host_file->f_op->readdir) {
@@ -475,7 +475,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 	}
 out:
 	coda_file->f_pos = host_file->f_pos;
-	up(&host_inode->i_sem);
+	mutex_unlock(&host_inode->i_mutex);
 
 	return ret;
 }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index e6bc022568f3..30b4630bd735 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -77,14 +77,14 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 		return -EINVAL;
 
 	host_inode = host_file->f_dentry->d_inode;
-	down(&coda_inode->i_sem);
+	mutex_lock(&coda_inode->i_mutex);
 
 	ret = host_file->f_op->write(host_file, buf, count, ppos);
 
 	coda_inode->i_size = host_inode->i_size;
 	coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
 	coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
-	up(&coda_inode->i_sem);
+	mutex_unlock(&coda_inode->i_mutex);
 
 	return ret;
 }
@@ -272,9 +272,9 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	if (host_file->f_op && host_file->f_op->fsync) {
 		host_dentry = host_file->f_dentry;
 		host_inode = host_dentry->d_inode;
-		down(&host_inode->i_sem);
+		mutex_lock(&host_inode->i_mutex);
 		err = host_file->f_op->fsync(host_file, host_dentry, datasync);
-		up(&host_inode->i_sem);
+		mutex_unlock(&host_inode->i_mutex);
 	}
 
 	if ( !err && !datasync ) {
diff --git a/fs/compat.c b/fs/compat.c
index 55ac0324aaf1..271b75d1597f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -494,9 +494,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 		ret = sys_fcntl(fd, cmd, (unsigned long)&f);
 		set_fs(old_fs);
 		if (cmd == F_GETLK && ret == 0) {
-			if ((f.l_start >= COMPAT_OFF_T_MAX) ||
-			    ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX))
+			/* GETLK was successfule and we need to return the data...
+			 * but it needs to fit in the compat structure.
+			 * l_start shouldn't be too big, unless the original
+			 * start + end is greater than COMPAT_OFF_T_MAX, in which
+			 * case the app was asking for trouble, so we return
+			 * -EOVERFLOW in that case.
+			 * l_len could be too big, in which case we just truncate it,
+			 * and only allow the app to see that part of the conflicting
+			 * lock that might make sense to it anyway
+			 */
+
+			if (f.l_start > COMPAT_OFF_T_MAX)
 				ret = -EOVERFLOW;
+			if (f.l_len > COMPAT_OFF_T_MAX)
+				f.l_len = COMPAT_OFF_T_MAX;
 			if (ret == 0)
 				ret = put_compat_flock(&f, compat_ptr(arg));
 		}
@@ -515,9 +527,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 				(unsigned long)&f);
 		set_fs(old_fs);
 		if (cmd == F_GETLK64 && ret == 0) {
-			if ((f.l_start >= COMPAT_LOFF_T_MAX) ||
-			    ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX))
+			/* need to return lock information - see above for commentary */
+			if (f.l_start > COMPAT_LOFF_T_MAX)
 				ret = -EOVERFLOW;
+			if (f.l_len > COMPAT_LOFF_T_MAX)
+				f.l_len = COMPAT_LOFF_T_MAX;
 			if (ret == 0)
 				ret = put_compat_flock64(&f, compat_ptr(arg));
 		}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 43a2508ac696..890bc30fbe20 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -10,7 +10,6 @@
  * ioctls.
  */
 
-#ifdef INCLUDES
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/compat.h>
@@ -81,13 +80,9 @@
 #include <linux/capi.h>
 
 #include <scsi/scsi.h>
-/* Ugly hack. */
-#undef __KERNEL__
 #include <scsi/scsi_ioctl.h>
-#define __KERNEL__
 #include <scsi/sg.h>
 
-#include <asm/types.h>
 #include <asm/uaccess.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
@@ -95,7 +90,6 @@
 #include <linux/watchdog.h>
 #include <linux/dm-ioctl.h>
 
-#include <asm/module.h>
 #include <linux/soundcard.h>
 #include <linux/lp.h>
 #include <linux/ppdev.h>
@@ -128,11 +122,6 @@
 #include <linux/dvb/frontend.h>
 #include <linux/dvb/video.h>
 
-#undef INCLUDES
-#endif
-
-#ifdef CODE
-
 /* Aiee. Someone does not find a difference between int and long */
 #define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
 #define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
@@ -148,6 +137,12 @@
 #define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
 #define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
 
+static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
+			      unsigned long arg, struct file *f)
+{
+	return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
+}
+
 static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	mm_segment_t old_fs = get_fs();
@@ -207,244 +202,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
 }
 
-struct video_tuner32 {
-	compat_int_t tuner;
-	char name[32];
-	compat_ulong_t rangelow, rangehigh;
-	u32 flags;	/* It is really u32 in videodev.h */
-	u16 mode, signal;
-};
-
-static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-	int i;
-
-	if(get_user(kp->tuner, &up->tuner))
-		return -EFAULT;
-	for(i = 0; i < 32; i++)
-		__get_user(kp->name[i], &up->name[i]);
-	__get_user(kp->rangelow, &up->rangelow);
-	__get_user(kp->rangehigh, &up->rangehigh);
-	__get_user(kp->flags, &up->flags);
-	__get_user(kp->mode, &up->mode);
-	__get_user(kp->signal, &up->signal);
-	return 0;
-}
-
-static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-	int i;
-
-	if(put_user(kp->tuner, &up->tuner))
-		return -EFAULT;
-	for(i = 0; i < 32; i++)
-		__put_user(kp->name[i], &up->name[i]);
-	__put_user(kp->rangelow, &up->rangelow);
-	__put_user(kp->rangehigh, &up->rangehigh);
-	__put_user(kp->flags, &up->flags);
-	__put_user(kp->mode, &up->mode);
-	__put_user(kp->signal, &up->signal);
-	return 0;
-}
-
-struct video_buffer32 {
-	compat_caddr_t base;
-	compat_int_t height, width, depth, bytesperline;
-};
-
-static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-	u32 tmp;
-
-	if (get_user(tmp, &up->base))
-		return -EFAULT;
-
-	/* This is actually a physical address stored
-	 * as a void pointer.
-	 */
-	kp->base = (void *)(unsigned long) tmp;
-
-	__get_user(kp->height, &up->height);
-	__get_user(kp->width, &up->width);
-	__get_user(kp->depth, &up->depth);
-	__get_user(kp->bytesperline, &up->bytesperline);
-	return 0;
-}
-
-static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-	u32 tmp = (u32)((unsigned long)kp->base);
-
-	if(put_user(tmp, &up->base))
-		return -EFAULT;
-	__put_user(kp->height, &up->height);
-	__put_user(kp->width, &up->width);
-	__put_user(kp->depth, &up->depth);
-	__put_user(kp->bytesperline, &up->bytesperline);
-	return 0;
-}
-
-struct video_clip32 {
-	s32 x, y, width, height;	/* Its really s32 in videodev.h */
-	compat_caddr_t next;
-};
-
-struct video_window32 {
-	u32 x, y, width, height, chromakey, flags;
-	compat_caddr_t clips;
-	compat_int_t clipcount;
-};
-
-/* You get back everything except the clips... */
-static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
-{
-	if(put_user(kp->x, &up->x))
-		return -EFAULT;
-	__put_user(kp->y, &up->y);
-	__put_user(kp->width, &up->width);
-	__put_user(kp->height, &up->height);
-	__put_user(kp->chromakey, &up->chromakey);
-	__put_user(kp->flags, &up->flags);
-	__put_user(kp->clipcount, &up->clipcount);
-	return 0;
-}
-
-#define VIDIOCGTUNER32		_IOWR('v',4, struct video_tuner32)
-#define VIDIOCSTUNER32		_IOW('v',5, struct video_tuner32)
-#define VIDIOCGWIN32		_IOR('v',9, struct video_window32)
-#define VIDIOCSWIN32		_IOW('v',10, struct video_window32)
-#define VIDIOCGFBUF32		_IOR('v',11, struct video_buffer32)
-#define VIDIOCSFBUF32		_IOW('v',12, struct video_buffer32)
-#define VIDIOCGFREQ32		_IOR('v',14, u32)
-#define VIDIOCSFREQ32		_IOW('v',15, u32)
-
-enum {
-	MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip)
-};
-
-static int do_set_window(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	struct video_window32 __user *up = compat_ptr(arg);
-	struct video_window __user *vw;
-	struct video_clip __user *p;
-	int nclips;
-	u32 n;
-
-	if (get_user(nclips, &up->clipcount))
-		return -EFAULT;
-
-	/* Peculiar interface... */
-	if (nclips < 0)
-		nclips = VIDEO_CLIPMAP_SIZE;
-
-	if (nclips > MaxClips)
-		return -ENOMEM;
-
-	vw = compat_alloc_user_space(sizeof(struct video_window) +
-				    nclips * sizeof(struct video_clip));
-
-	p = nclips ? (struct video_clip __user *)(vw + 1) : NULL;
-
-	if (get_user(n, &up->x) || put_user(n, &vw->x) ||
-	    get_user(n, &up->y) || put_user(n, &vw->y) ||
-	    get_user(n, &up->width) || put_user(n, &vw->width) ||
-	    get_user(n, &up->height) || put_user(n, &vw->height) ||
-	    get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) ||
-	    get_user(n, &up->flags) || put_user(n, &vw->flags) ||
-	    get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) ||
-	    get_user(n, &up->clips) || put_user(p, &vw->clips))
-		return -EFAULT;
-
-	if (nclips) {
-		struct video_clip32 __user *u = compat_ptr(n);
-		int i;
-		if (!u)
-			return -EINVAL;
-		for (i = 0; i < nclips; i++, u++, p++) {
-			s32 v;
-			if (get_user(v, &u->x) ||
-			    put_user(v, &p->x) ||
-			    get_user(v, &u->y) ||
-			    put_user(v, &p->y) ||
-			    get_user(v, &u->width) ||
-			    put_user(v, &p->width) ||
-			    get_user(v, &u->height) ||
-			    put_user(v, &p->height) ||
-			    put_user(NULL, &p->next))
-				return -EFAULT;
-		}
-	}
-
-	return sys_ioctl(fd, VIDIOCSWIN, (unsigned long)p);
-}
-
-static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	union {
-		struct video_tuner vt;
-		struct video_buffer vb;
-		struct video_window vw;
-		unsigned long vx;
-	} karg;
-	mm_segment_t old_fs = get_fs();
-	void __user *up = compat_ptr(arg);
-	int err = 0;
-
-	/* First, convert the command. */
-	switch(cmd) {
-	case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
-	case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
-	case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
-	case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
-	case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
-	case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
-	case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
-	};
-
-	switch(cmd) {
-	case VIDIOCSTUNER:
-	case VIDIOCGTUNER:
-		err = get_video_tuner32(&karg.vt, up);
-		break;
-
-	case VIDIOCSFBUF:
-		err = get_video_buffer32(&karg.vb, up);
-		break;
-
-	case VIDIOCSFREQ:
-		err = get_user(karg.vx, (u32 __user *)up);
-		break;
-	};
-	if(err)
-		goto out;
-
-	set_fs(KERNEL_DS);
-	err = sys_ioctl(fd, cmd, (unsigned long)&karg);
-	set_fs(old_fs);
-
-	if(err == 0) {
-		switch(cmd) {
-		case VIDIOCGTUNER:
-			err = put_video_tuner32(&karg.vt, up);
-			break;
-
-		case VIDIOCGWIN:
-			err = put_video_window32(&karg.vw, up);
-			break;
-
-		case VIDIOCGFBUF:
-			err = put_video_buffer32(&karg.vb, up);
-			break;
-
-		case VIDIOCGFREQ:
-			err = put_user(((u32)karg.vx), (u32 __user *)up);
-			break;
-		};
-	}
-out:
-	return err;
-}
-
 struct compat_dmx_event {
 	dmx_event_t	event;
 	compat_time_t	timeStamp;
@@ -2713,6 +2470,49 @@ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg
 	return -EINVAL;
 }
 
+#define RTC_IRQP_READ32		_IOR('p', 0x0b, compat_ulong_t)
+#define RTC_IRQP_SET32		_IOW('p', 0x0c, compat_ulong_t)
+#define RTC_EPOCH_READ32	_IOR('p', 0x0d, compat_ulong_t)
+#define RTC_EPOCH_SET32		_IOW('p', 0x0e, compat_ulong_t)
+
+static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
+{
+	mm_segment_t oldfs = get_fs();
+	compat_ulong_t val32;
+	unsigned long kval;
+	int ret;
+
+	switch (cmd) {
+	case RTC_IRQP_READ32:
+	case RTC_EPOCH_READ32:
+		set_fs(KERNEL_DS);
+		ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
+					RTC_IRQP_READ : RTC_EPOCH_READ,
+					(unsigned long)&kval);
+		set_fs(oldfs);
+		if (ret)
+			return ret;
+		val32 = kval;
+		return put_user(val32, (unsigned int __user *)arg);
+	case RTC_IRQP_SET32:
+	case RTC_EPOCH_SET32:
+		ret = get_user(val32, (unsigned int __user *)arg);
+		if (ret)
+			return ret;
+		kval = val32;
+
+		set_fs(KERNEL_DS);
+		ret = sys_ioctl(fd, (cmd == RTC_IRQP_SET32) ?
+				RTC_IRQP_SET : RTC_EPOCH_SET,
+				(unsigned long)&kval);
+		set_fs(oldfs);
+		return ret;
+	default:
+		/* unreached */
+		return -ENOIOCTLCMD;
+	}
+}
+
 #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
 struct ncp_ioctl_request_32 {
 	u32 function;
@@ -2900,10 +2700,20 @@ static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned lon
 }
 #endif
 
-#undef CODE
-#endif
+#define HANDLE_IOCTL(cmd,handler) \
+	{ (cmd), (ioctl_trans_handler_t)(handler) },
+
+/* pointer to compatible structure or no argument */
+#define COMPATIBLE_IOCTL(cmd) \
+	{ (cmd), do_ioctl32_pointer },
+
+/* argument is an unsigned long integer, not a pointer */
+#define ULONG_IOCTL(cmd) \
+	{ (cmd), (ioctl_trans_handler_t)sys_ioctl },
 
-#ifdef DECLARES
+
+struct ioctl_trans ioctl_start[] = {
+#include <linux/compat_ioctl.h>
 HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
 HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
 #ifdef CONFIG_NET
@@ -3015,14 +2825,6 @@ COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
 #ifdef CONFIG_JBD_DEBUG
 HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
 #endif
-HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSWIN32, do_set_window)
-HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl)
 /* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -3104,6 +2906,10 @@ HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
 HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
+HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
+HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
 
 #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
 HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest)
@@ -3120,6 +2926,6 @@ HANDLE_IOCTL(DMX_GET_EVENT, do_dmx_get_event)
 HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
 HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
 HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
+};
 
-#undef DECLARES
-#endif
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e48b539243a1..b668ec61527e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -288,10 +288,10 @@ static struct dentry * configfs_lookup(struct inode *dir,
 
 /*
  * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
- * attributes and are removed by rmdir().  We recurse, taking i_sem
+ * attributes and are removed by rmdir().  We recurse, taking i_mutex
  * on all children that are candidates for default detach.  If the
  * result is clean, then configfs_detach_group() will handle dropping
- * i_sem.  If there is an error, the caller will clean up the i_sem
+ * i_mutex.  If there is an error, the caller will clean up the i_mutex
  * holders via configfs_detach_rollback().
  */
 static int configfs_detach_prep(struct dentry *dentry)
@@ -309,8 +309,8 @@ static int configfs_detach_prep(struct dentry *dentry)
 		if (sd->s_type & CONFIGFS_NOT_PINNED)
 			continue;
 		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
-			down(&sd->s_dentry->d_inode->i_sem);
-			/* Mark that we've taken i_sem */
+			mutex_lock(&sd->s_dentry->d_inode->i_mutex);
+			/* Mark that we've taken i_mutex */
 			sd->s_type |= CONFIGFS_USET_DROPPING;
 
 			ret = configfs_detach_prep(sd->s_dentry);
@@ -327,7 +327,7 @@ out:
 }
 
 /*
- * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is
+ * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is
  * set.
  */
 static void configfs_detach_rollback(struct dentry *dentry)
@@ -341,7 +341,7 @@ static void configfs_detach_rollback(struct dentry *dentry)
 
 			if (sd->s_type & CONFIGFS_USET_DROPPING) {
 				sd->s_type &= ~CONFIGFS_USET_DROPPING;
-				up(&sd->s_dentry->d_inode->i_sem);
+				mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
 			}
 		}
 	}
@@ -424,11 +424,11 @@ static void detach_groups(struct config_group *group)
 
 		/*
 		 * From rmdir/unregister, a configfs_detach_prep() pass
-		 * has taken our i_sem for us.  Drop it.
+		 * has taken our i_mutex for us.  Drop it.
 		 * From mkdir/register cleanup, there is no sem held.
 		 */
 		if (sd->s_type & CONFIGFS_USET_DROPPING)
-			up(&child->d_inode->i_sem);
+			mutex_unlock(&child->d_inode->i_mutex);
 
 		d_delete(child);
 		dput(child);
@@ -493,11 +493,11 @@ static int populate_groups(struct config_group *group)
 		/* FYI, we're faking mkdir here
 		 * I'm not sure we need this semaphore, as we're called
 		 * from our parent's mkdir.  That holds our parent's
-		 * i_sem, so afaik lookup cannot continue through our
+		 * i_mutex, so afaik lookup cannot continue through our
 		 * parent to find us, let alone mess with our tree.
-		 * That said, taking our i_sem is closer to mkdir
+		 * That said, taking our i_mutex is closer to mkdir
 		 * emulation, and shouldn't hurt. */
-		down(&dentry->d_inode->i_sem);
+		mutex_lock(&dentry->d_inode->i_mutex);
 
 		for (i = 0; group->default_groups[i]; i++) {
 			new_group = group->default_groups[i];
@@ -507,7 +507,7 @@ static int populate_groups(struct config_group *group)
 				break;
 		}
 
-		up(&dentry->d_inode->i_sem);
+		mutex_unlock(&dentry->d_inode->i_mutex);
 	}
 
 	if (ret)
@@ -856,7 +856,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
 	down_write(&configfs_rename_sem);
 	parent = item->parent->dentry;
 
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 
 	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
 	if (!IS_ERR(new_dentry)) {
@@ -872,7 +872,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
 			error = -EEXIST;
 		dput(new_dentry);
 	}
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 	up_write(&configfs_rename_sem);
 
 	return error;
@@ -884,9 +884,9 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 	struct dentry * dentry = file->f_dentry;
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	file->private_data = configfs_new_dirent(parent_sd, NULL);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	return file->private_data ? 0 : -ENOMEM;
 
@@ -897,9 +897,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
 	struct dentry * dentry = file->f_dentry;
 	struct configfs_dirent * cursor = file->private_data;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	list_del_init(&cursor->s_sibling);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	release_configfs_dirent(cursor);
 
@@ -975,7 +975,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
 	struct dentry * dentry = file->f_dentry;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -983,7 +983,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			up(&file->f_dentry->d_inode->i_sem);
+			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -1007,7 +1007,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			list_add_tail(&cursor->s_sibling, p);
 		}
 	}
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -1037,7 +1037,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	sd = configfs_sb->s_root->d_fsdata;
 	link_group(to_config_group(sd->s_element), group);
 
-	down(&configfs_sb->s_root->d_inode->i_sem);
+	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
 
 	name.name = group->cg_item.ci_name;
 	name.len = strlen(name.name);
@@ -1057,7 +1057,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	else
 		d_delete(dentry);
 
-	up(&configfs_sb->s_root->d_inode->i_sem);
+	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
 
 	if (dentry) {
 	    dput(dentry);
@@ -1079,18 +1079,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 		return;
 	}
 
-	down(&configfs_sb->s_root->d_inode->i_sem);
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	if (configfs_detach_prep(dentry)) {
 		printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
 	}
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	d_delete(dentry);
 
-	up(&configfs_sb->s_root->d_inode->i_sem);
+	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
 
 	dput(dentry);
 
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index af1ffc9a15c0..c26cd61f13af 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -336,9 +336,9 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att
 	umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
 	int error = 0;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return error;
 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 6b274c6d428f..6577c588de9d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -122,7 +122,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
 
 /*
  * Unhashes the dentry corresponding to given configfs_dirent
- * Called with parent inode's i_sem held.
+ * Called with parent inode's i_mutex held.
  */
 void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 {
@@ -145,7 +145,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
 	struct configfs_dirent * sd;
 	struct configfs_dirent * parent_sd = dir->d_fsdata;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
 			continue;
@@ -156,7 +156,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
 			break;
 		}
 	}
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 }
 
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 17e439138681..134d6775183f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = {
 
 static void d_callback(struct rcu_head *head)
 {
-	struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
 
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
- 	call_rcu(&dentry->d_rcu, d_callback);
+ 	call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
@@ -193,7 +193,7 @@ kill_it: {
   			list_del(&dentry->d_lru);
   			dentry_stat.nr_unused--;
   		}
-  		list_del(&dentry->d_child);
+  		list_del(&dentry->d_u.d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/*drops the locks, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
@@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
 	struct dentry * parent;
 
 	__d_drop(dentry);
-	list_del(&dentry->d_child);
+	list_del(&dentry->d_u.d_child);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
@@ -518,7 +518,7 @@ repeat:
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 		/* Have we found a mount point ? */
 		if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@ resume:
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -569,7 +569,7 @@ repeat:
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
 		if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 #ifdef DCACHE_DEBUG
 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 		dentry->d_parent = dget(parent);
 		dentry->d_sb = parent->d_sb;
 	} else {
-		INIT_LIST_HEAD(&dentry->d_child);
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
 	}
 
 	spin_lock(&dcache_lock);
 	if (parent)
-		list_add(&dentry->d_child, &parent->d_subdirs);
+		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
 
@@ -808,10 +808,14 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  *
  * Fill in inode information in the entry. On success, it returns NULL.
  * If an unhashed alias of "entry" already exists, then we return the
- * aliased dentry instead.
+ * aliased dentry instead and drop one reference to inode.
  *
  * Note that in order to avoid conflicts with rename() etc, the caller
  * had better be holding the parent directory semaphore.
+ *
+ * This also assumes that the inode count has been incremented
+ * (or otherwise set) by the caller to indicate that it is now
+ * in use by the dcache.
  */
 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
@@ -838,6 +842,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 		dget_locked(alias);
 		spin_unlock(&dcache_lock);
 		BUG_ON(!d_unhashed(alias));
+		iput(inode);
 		return alias;
 	}
 	list_add(&entry->d_alias, &inode->i_dentry);
@@ -1310,8 +1315,8 @@ already_unhashed:
 	/* Unhash the target: dput() will then get rid of it */
 	__d_drop(target);
 
-	list_del(&dentry->d_child);
-	list_del(&target->d_child);
+	list_del(&dentry->d_u.d_child);
+	list_del(&target->d_u.d_child);
 
 	/* Switch the names.. */
 	switch_names(dentry, target);
@@ -1322,15 +1327,15 @@ already_unhashed:
 	if (IS_ROOT(dentry)) {
 		dentry->d_parent = target->d_parent;
 		target->d_parent = target;
-		INIT_LIST_HEAD(&target->d_child);
+		INIT_LIST_HEAD(&target->d_u.d_child);
 	} else {
 		do_switch(dentry->d_parent, target->d_parent);
 
 		/* And add them back to the (new) parent lists */
-		list_add(&target->d_child, &target->d_parent->d_subdirs);
+		list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
 	}
 
-	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 	spin_unlock(&target->d_lock);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
@@ -1568,7 +1573,7 @@ repeat:
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 		if (d_unhashed(dentry)||!dentry->d_inode)
 			continue;
@@ -1579,7 +1584,7 @@ resume:
 		atomic_dec(&dentry->d_count);
 	}
 	if (this_parent != root) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next;
 		atomic_dec(&this_parent->d_count);
 		this_parent = this_parent->d_parent;
 		goto resume;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a86ac4aeaedb..d4f1a2cddd47 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -146,7 +146,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 	}
 
 	*dentry = NULL;
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
@@ -155,7 +155,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 			error = debugfs_create(parent->d_inode, *dentry, mode);
 	} else
 		error = PTR_ERR(dentry);
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 
 	return error;
 }
@@ -273,7 +273,7 @@ void debugfs_remove(struct dentry *dentry)
 	if (!parent || !parent->d_inode)
 		return;
 
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	if (debugfs_positive(dentry)) {
 		if (dentry->d_inode) {
 			if (S_ISDIR(dentry->d_inode->i_mode))
@@ -283,7 +283,7 @@ void debugfs_remove(struct dentry *dentry)
 		dput(dentry);
 		}
 	}
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 	simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 1274422a5384..b621521e09d4 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2162,27 +2162,27 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
 	 *
 	 * make sure that
 	 *   d_instantiate always runs under lock
-	 *   we release i_sem lock before going to sleep
+	 *   we release i_mutex lock before going to sleep
 	 *
 	 * unfortunately sometimes d_revalidate is called with
-	 * and sometimes without i_sem lock held. The following checks
+	 * and sometimes without i_mutex lock held. The following checks
 	 * attempt to deduce when we need to add (and drop resp.) lock
 	 * here. This relies on current (2.6.2) calling coventions:
 	 *
-	 *   lookup_hash is always run under i_sem and is passing NULL
+	 *   lookup_hash is always run under i_mutex and is passing NULL
 	 *   as nd
 	 *
-	 *   open(...,O_CREATE,...) calls _lookup_hash under i_sem
+	 *   open(...,O_CREATE,...) calls _lookup_hash under i_mutex
 	 *   and sets flags to LOOKUP_OPEN|LOOKUP_CREATE
 	 *
 	 *   all other invocations of ->d_revalidate seem to happen
-	 *   outside of i_sem
+	 *   outside of i_mutex
 	 */
 	need_lock = nd &&
 	    (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT));
 
 	if (need_lock)
-		down(&dir->i_sem);
+		mutex_lock(&dir->i_mutex);
 
 	if (is_devfsd_or_child(fs_info)) {
 		devfs_handle_t de = lookup_info->de;
@@ -2221,9 +2221,9 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
 		add_wait_queue(&lookup_info->wait_queue, &wait);
 		read_unlock(&parent->u.dir.lock);
 		/* at this point it is always (hopefully) locked */
-		up(&dir->i_sem);
+		mutex_unlock(&dir->i_mutex);
 		schedule();
-		down(&dir->i_sem);
+		mutex_lock(&dir->i_mutex);
 		/*
 		 * This does not need nor should remove wait from wait_queue.
 		 * Wait queue head is never reused - nothing is ever added to it
@@ -2238,7 +2238,7 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
 
       out:
 	if (need_lock)
-		up(&dir->i_sem);
+		mutex_unlock(&dir->i_mutex);
 	return 1;
 }				/*  End Function devfs_d_revalidate_wait  */
 
@@ -2284,9 +2284,9 @@ static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry,
 	/*  Unlock directory semaphore, which will release any waiters. They
 	   will get the hashed dentry, and may be forced to wait for
 	   revalidation  */
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	wait_for_devfsd_finished(fs_info);	/*  If I'm not devfsd, must wait  */
-	down(&dir->i_sem);	/*  Grab it again because them's the rules  */
+	mutex_lock(&dir->i_mutex);	/*  Grab it again because them's the rules  */
 	de = lookup_info.de;
 	/*  If someone else has been so kind as to make the inode, we go home
 	   early  */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f2be44d4491f..bfb8a230bac9 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,7 +130,7 @@ static struct dentry *get_node(int num)
 {
 	char s[12];
 	struct dentry *root = devpts_root;
-	down(&root->d_inode->i_sem);
+	mutex_lock(&root->d_inode->i_mutex);
 	return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
 
@@ -161,7 +161,7 @@ int devpts_pty_new(struct tty_struct *tty)
 	if (!IS_ERR(dentry) && !dentry->d_inode)
 		d_instantiate(dentry, inode);
 
-	up(&devpts_root->d_inode->i_sem);
+	mutex_unlock(&devpts_root->d_inode->i_mutex);
 
 	return 0;
 }
@@ -178,7 +178,7 @@ struct tty_struct *devpts_get_tty(int number)
 		dput(dentry);
 	}
 
-	up(&devpts_root->d_inode->i_sem);
+	mutex_unlock(&devpts_root->d_inode->i_mutex);
 
 	return tty;
 }
@@ -196,7 +196,7 @@ void devpts_pty_kill(int number)
 		}
 		dput(dentry);
 	}
-	up(&devpts_root->d_inode->i_sem);
+	mutex_unlock(&devpts_root->d_inode->i_mutex);
 }
 
 static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3931e7f1e6bf..30dbbd1df511 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -56,7 +56,7 @@
  * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
  * This determines whether we need to do the fancy locking which prevents
  * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_sem is
+ * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
  * not held for the entire direct write (taken briefly, initially, during a
  * direct read though, but its never held for the duration of a direct-IO).
  */
@@ -930,7 +930,7 @@ out:
 }
 
 /*
- * Releases both i_sem and i_alloc_sem
+ * Releases both i_mutex and i_alloc_sem
  */
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
@@ -1062,11 +1062,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 	/*
 	 * All block lookups have been performed. For READ requests
-	 * we can let i_sem go now that its achieved its purpose
+	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
 	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
-		up(&dio->inode->i_sem);
+		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
 	 * OK, all BIOs are submitted, so we can decrement bio_count to truly
@@ -1145,18 +1145,18 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
  * The locking rules are governed by the dio_lock_type parameter.
  *
  * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_sem is not held on entry; it is never taken.
+ * For writes, i_mutex is not held on entry; it is never taken.
  *
  * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_sem and return with i_sem held, even though
+ * For writes we are called under i_mutex and return with i_mutex held, even though
  * it is internally dropped.
- * For reads, i_sem is not held on entry, but it is taken and dropped before
+ * For reads, i_mutex is not held on entry, but it is taken and dropped before
  * returning.
  *
  * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
  *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_sem, return without it, never touch it.
- * For reads, i_sem is held on entry and will be released before returning.
+ * For writes we are called without i_mutex, return without it, never touch it.
+ * For reads, i_mutex is held on entry and will be released before returning.
  *
  * Additional i_alloc_sem locking requirements described inline below.
  */
@@ -1214,11 +1214,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * For block device access DIO_NO_LOCKING is used,
 	 *	neither readers nor writers do any locking at all
 	 * For regular files using DIO_LOCKING,
-	 *	readers need to grab i_sem and i_alloc_sem
-	 *	writers need to grab i_alloc_sem only (i_sem is already held)
+	 *	readers need to grab i_mutex and i_alloc_sem
+	 *	writers need to grab i_alloc_sem only (i_mutex is already held)
 	 * For regular files using DIO_OWN_LOCKING,
 	 *	neither readers nor writers take any locks here
-	 *	(i_sem is already held and release for writers here)
+	 *	(i_mutex is already held and release for writers here)
 	 */
 	dio->lock_type = dio_lock_type;
 	if (dio_lock_type != DIO_NO_LOCKING) {
@@ -1228,7 +1228,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 			mapping = iocb->ki_filp->f_mapping;
 			if (dio_lock_type != DIO_OWN_LOCKING) {
-				down(&inode->i_sem);
+				mutex_lock(&inode->i_mutex);
 				reader_with_isem = 1;
 			}
 
@@ -1240,7 +1240,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 			}
 
 			if (dio_lock_type == DIO_OWN_LOCKING) {
-				up(&inode->i_sem);
+				mutex_unlock(&inode->i_mutex);
 				reader_with_isem = 0;
 			}
 		}
@@ -1266,7 +1266,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 out:
 	if (reader_with_isem)
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	if (rw & WRITE)
 		current->flags &= ~PF_SYNCWRITE;
 	return retval;
diff --git a/fs/dquot.c b/fs/dquot.c
index 2a62b3dc20ec..cb6d5bfbdfd5 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -100,7 +100,7 @@
  * operation is just reading pointers from inode (or not using them at all) the
  * read lock is enough. If pointers are altered function must hold write lock
  * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
- * for altering the flag i_sem is also needed).  If operation is holding
+ * for altering the flag i_mutex is also needed).  If operation is holding
  * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
  * dqonoff_sem.
  * This locking assures that:
@@ -117,9 +117,9 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
+ *   i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
  *   > dquot->dq_lock > dqio_sem
- * i_sem on quota files is special (it's below dqio_sem)
+ * i_mutex on quota files is special (it's below dqio_sem)
  */
 
 static DEFINE_SPINLOCK(dq_list_lock);
@@ -1369,11 +1369,11 @@ int vfs_quota_off(struct super_block *sb, int type)
 			/* If quota was reenabled in the meantime, we have
 			 * nothing to do */
 			if (!sb_has_quota_enabled(sb, cnt)) {
-				down(&toputinode[cnt]->i_sem);
+				mutex_lock(&toputinode[cnt]->i_mutex);
 				toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
 				  S_NOATIME | S_NOQUOTA);
 				truncate_inode_pages(&toputinode[cnt]->i_data, 0);
-				up(&toputinode[cnt]->i_sem);
+				mutex_unlock(&toputinode[cnt]->i_mutex);
 				mark_inode_dirty(toputinode[cnt]);
 				iput(toputinode[cnt]);
 			}
@@ -1417,7 +1417,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	write_inode_now(inode, 1);
 	/* And now flush the block cache so that kernel sees the changes */
 	invalidate_bdev(sb->s_bdev, 0);
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	down(&dqopt->dqonoff_sem);
 	if (sb_has_quota_enabled(sb, type)) {
 		error = -EBUSY;
@@ -1449,7 +1449,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 		goto out_file_init;
 	}
 	up(&dqopt->dqio_sem);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	set_enable_flags(dqopt, type);
 
 	add_dquot_ref(sb, type);
@@ -1470,7 +1470,7 @@ out_lock:
 		inode->i_flags |= oldflags;
 		up_write(&dqopt->dqptr_sem);
 	}
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 out_fmt:
 	put_quota_format(fmt);
 
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644
index 000000000000..4e4762389bdc
--- /dev/null
+++ b/fs/drop_caches.c
@@ -0,0 +1,68 @@
+/*
+ * Implement the manual drop-all-pagecache function
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/sysctl.h>
+#include <linux/gfp.h>
+
+/* A global variable is a bit ugly, but it keeps the code simple */
+int sysctl_drop_caches;
+
+static void drop_pagecache_sb(struct super_block *sb)
+{
+	struct inode *inode;
+
+	spin_lock(&inode_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE))
+			continue;
+		invalidate_inode_pages(inode->i_mapping);
+	}
+	spin_unlock(&inode_lock);
+}
+
+void drop_pagecache(void)
+{
+	struct super_block *sb;
+
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root)
+			drop_pagecache_sb(sb);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
+	}
+	spin_unlock(&sb_lock);
+}
+
+void drop_slab(void)
+{
+	int nr_objects;
+
+	do {
+		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+	} while (nr_objects > 10);
+}
+
+int drop_caches_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (write) {
+		if (sysctl_drop_caches & 1)
+			drop_pagecache();
+		if (sysctl_drop_caches & 2)
+			drop_slab();
+	}
+	return 0;
+}
diff --git a/fs/exec.c b/fs/exec.c
index 22533cce0611..b5bcf1aae0ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -324,7 +324,7 @@ void install_arg_page(struct vm_area_struct *vma,
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
-	page_add_anon_rmap(page, vma, address);
+	page_add_new_anon_rmap(page, vma, address);
 	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
@@ -632,10 +632,10 @@ static inline int de_thread(struct task_struct *tsk)
 		 * synchronize with any firing (by calling del_timer_sync)
 		 * before we can safely let the old group leader die.
 		 */
-		sig->real_timer.data = (unsigned long)current;
+		sig->real_timer.data = current;
 		spin_unlock_irq(lock);
-		if (del_timer_sync(&sig->real_timer))
-			add_timer(&sig->real_timer);
+		if (hrtimer_cancel(&sig->real_timer))
+			hrtimer_restart(&sig->real_timer);
 		spin_lock_irq(lock);
 	}
 	while (atomic_read(&sig->count) > count) {
@@ -760,7 +760,7 @@ no_thread_group:
 		spin_lock(&oldsighand->siglock);
 		spin_lock(&newsighand->siglock);
 
-		current->sighand = newsighand;
+		rcu_assign_pointer(current->sighand, newsighand);
 		recalc_sigpending();
 
 		spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
 		write_unlock_irq(&tasklist_lock);
 
 		if (atomic_dec_and_test(&oldsighand->count))
-			kmem_cache_free(sighand_cachep, oldsighand);
+			sighand_free(oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(current));
@@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
 		current->signal->flags = SIGNAL_GROUP_EXIT;
 		current->signal->group_exit_code = exit_code;
+		current->signal->group_stop_count = 0;
 		retval = 0;
 	}
 	spin_unlock_irq(&current->sighand->siglock);
@@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * Clear any false indication of pending signals that might
 	 * be seen by the filesystem code called to write the core file.
 	 */
-	current->signal->group_stop_count = 0;
 	clear_thread_flag(TIF_SIGPENDING);
 
 	if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		goto close_fail;
 	if (!file->f_op->write)
 		goto close_fail;
-	if (do_truncate(file->f_dentry, 0, file) != 0)
+	if (do_truncate(file->f_dentry, 0, 0, file) != 0)
 		goto close_fail;
 
 	retval = binfmt->core_dump(signr, regs, file);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c49d6254379a..5bfe40085fbc 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -177,9 +177,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 			struct dentry *ppd;
 			struct dentry *npd;
 
-			down(&pd->d_inode->i_sem);
+			mutex_lock(&pd->d_inode->i_mutex);
 			ppd = CALL(nops,get_parent)(pd);
-			up(&pd->d_inode->i_sem);
+			mutex_unlock(&pd->d_inode->i_mutex);
 
 			if (IS_ERR(ppd)) {
 				err = PTR_ERR(ppd);
@@ -201,9 +201,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 				break;
 			}
 			dprintk("find_exported_dentry: found name: %s\n", nbuf);
-			down(&ppd->d_inode->i_sem);
+			mutex_lock(&ppd->d_inode->i_mutex);
 			npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
-			up(&ppd->d_inode->i_sem);
+			mutex_unlock(&ppd->d_inode->i_mutex);
 			if (IS_ERR(npd)) {
 				err = PTR_ERR(npd);
 				dprintk("find_exported_dentry: lookup failed: %d\n", err);
@@ -242,9 +242,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 		struct dentry *nresult;
 		err = CALL(nops,get_name)(target_dir, nbuf, result);
 		if (!err) {
-			down(&target_dir->d_inode->i_sem);
+			mutex_lock(&target_dir->d_inode->i_mutex);
 			nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
-			up(&target_dir->d_inode->i_sem);
+			mutex_unlock(&target_dir->d_inode->i_mutex);
 			if (!IS_ERR(nresult)) {
 				if (nresult->d_inode) {
 					dput(result);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 6af2f4130290..239133d01d91 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -149,7 +149,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 }
 
 /*
- * inode->i_sem: don't care
+ * inode->i_mutex: don't care
  */
 static struct posix_acl *
 ext2_get_acl(struct inode *inode, int type)
@@ -211,7 +211,7 @@ ext2_get_acl(struct inode *inode, int type)
 }
 
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
  */
 static int
 ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
@@ -301,8 +301,8 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
 /*
  * Initialize the ACLs of a new inode. Called from ext2_new_inode.
  *
- * dir->i_sem: down
- * inode->i_sem: up (access to inode is still exclusive)
+ * dir->i_mutex: down
+ * inode->i_mutex: up (access to inode is still exclusive)
  */
 int
 ext2_init_acl(struct inode *inode, struct inode *dir)
@@ -361,7 +361,7 @@ cleanup:
  * for directories) are added. There are no more bits available in the
  * file mode.
  *
- * inode->i_sem: down
+ * inode->i_mutex: down
  */
 int
 ext2_acl_chmod(struct inode *inode)
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
index 20145b74623f..e9983a0dd396 100644
--- a/fs/ext2/bitmap.c
+++ b/fs/ext2/bitmap.c
@@ -7,8 +7,12 @@
  * Universite Pierre et Marie Curie (Paris VI)
  */
 
+#ifdef EXT2FS_DEBUG
+
 #include <linux/buffer_head.h>
 
+#include "ext2.h"
+
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
@@ -23,3 +27,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
 			nibblemap[(map->b_data[i] >> 4) & 0xf];
 	return (sum);
 }
+
+#endif  /*  EXT2FS_DEBUG  */
+
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e977f8566d14..00de0a7312a2 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -53,7 +53,7 @@ struct ext2_inode_info {
 #ifdef CONFIG_EXT2_FS_XATTR
 	/*
 	 * Extended attributes can be read independently of the main file
-	 * data. Taking i_sem even when reading would cause contention
+	 * data. Taking i_mutex even when reading would cause contention
 	 * between readers of EAs and writers of regular file data, so
 	 * instead we synchronize on xattr_sem when reading or changing
 	 * EAs.
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 522fa70dd8ea..8d6819846fc9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1152,7 +1152,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
 	struct buffer_head tmp_bh;
 	struct buffer_head *bh;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
@@ -1189,7 +1189,7 @@ out:
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(inode);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 0099462d4271..a2ca3107d475 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -325,7 +325,7 @@ cleanup:
 /*
  * Inode operation listxattr()
  *
- * dentry->d_inode->i_sem: don't care
+ * dentry->d_inode->i_mutex: don't care
  */
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -389,10 +389,6 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
 		  name_index, name, value, (long)value_len);
 
-	if (IS_RDONLY(inode))
-		return -EROFS;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
 	if (value == NULL)
 		value_len = 0;
 	if (name == NULL)
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 52b30ee6a25f..2c072bfea23b 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -38,8 +38,6 @@ ext2_xattr_trusted_get(struct inode *inode, const char *name,
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
@@ -50,8 +48,6 @@ ext2_xattr_trusted_set(struct inode *inode, const char *name,
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 0c03ea131a94..f383e7c3a7b5 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -35,16 +35,10 @@ static int
 ext2_xattr_user_get(struct inode *inode, const char *name,
 		    void *buffer, size_t size)
 {
-	int error;
-
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	error = permission(inode, MAY_READ, NULL);
-	if (error)
-		return error;
-
 	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
 }
 
@@ -52,18 +46,10 @@ static int
 ext2_xattr_user_set(struct inode *inode, const char *name,
 		    const void *value, size_t size, int flags)
 {
-	int error;
-
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	if ( !S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-		return -EPERM;
-	error = permission(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
 
 	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
 			      value, size, flags);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 3ac38266fc9e..9ed132c96034 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -152,7 +152,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 /*
  * Inode operation get_posix_acl().
  *
- * inode->i_sem: don't care
+ * inode->i_mutex: don't care
  */
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
@@ -216,7 +216,7 @@ ext3_get_acl(struct inode *inode, int type)
 /*
  * Set the access or default ACL of an inode.
  *
- * inode->i_sem: down unless called from ext3_new_inode
+ * inode->i_mutex: down unless called from ext3_new_inode
  */
 static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
@@ -306,8 +306,8 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
 /*
  * Initialize the ACLs of a new inode. Called from ext3_new_inode.
  *
- * dir->i_sem: down
- * inode->i_sem: up (access to inode is still exclusive)
+ * dir->i_mutex: down
+ * inode->i_mutex: up (access to inode is still exclusive)
  */
 int
 ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
@@ -368,7 +368,7 @@ cleanup:
  * for directories) are added. There are no more bits available in the
  * file mode.
  *
- * inode->i_sem: down
+ * inode->i_mutex: down
  */
 int
 ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ae1148c24c53..c6393fb4c35a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,8 +20,6 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
-#include "bitmap.h"
-
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 5b4ba3e246e6..cb16b4c5d5df 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,8 +7,11 @@
  * Universite Pierre et Marie Curie (Paris VI)
  */
 
+#ifdef EXT3FS_DEBUG
+
 #include <linux/buffer_head.h>
-#include "bitmap.h"
+
+#include "ext3_fs.h"
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
@@ -24,3 +27,6 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
 			nibblemap[(map->b_data[i] >> 4) & 0xf];
 	return (sum);
 }
+
+#endif  /*  EXT3FS_DEBUG  */
+
diff --git a/fs/ext3/bitmap.h b/fs/ext3/bitmap.h
deleted file mode 100644
index 6ee503a6bb4e..000000000000
--- a/fs/ext3/bitmap.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext3/bitmap.c
- *
- * Copyright (C) 2005 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
-*/
-
-extern unsigned long ext3_count_free (struct buffer_head *, unsigned int );
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9e4a24376210..dc826464f313 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -26,7 +26,6 @@
 
 #include <asm/byteorder.h>
 
-#include "bitmap.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -651,7 +650,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
 		ext3_warning(sb, __FUNCTION__,
-			     "bad orphan ino %lu!  e2fsck was run?\n", ino);
+			     "bad orphan ino %lu!  e2fsck was run?", ino);
 		goto out;
 	}
 
@@ -660,7 +659,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
 		ext3_warning(sb, __FUNCTION__,
-			     "inode bitmap error for orphan %lu\n", ino);
+			     "inode bitmap error for orphan %lu", ino);
 		goto out;
 	}
 
@@ -672,7 +671,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
 			NEXT_ORPHAN(inode) > max_ino) {
 		ext3_warning(sb, __FUNCTION__,
-			     "bad orphan inode %lu!  e2fsck was run?\n", ino);
+			     "bad orphan inode %lu!  e2fsck was run?", ino);
 		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
 		       bit, (unsigned long long)bitmap_bh->b_blocknr,
 		       ext3_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b3c690a3b54a..af193a304ee5 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1476,7 +1476,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 		if (levels && (dx_get_count(frames->entries) ==
 			       dx_get_limit(frames->entries))) {
 			ext3_warning(sb, __FUNCTION__,
-				     "Directory index full!\n");
+				     "Directory index full!");
 			err = -ENOSPC;
 			goto cleanup;
 		}
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 6104ad310507..1041dab6de2f 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb,
 	unsigned start = le32_to_cpu(es->s_blocks_count);
 	unsigned end = start + input->blocks_count;
 	unsigned group = input->group;
-	unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
+	unsigned itend = input->inode_table + sbi->s_itb_per_group;
 	unsigned overhead = ext3_bg_has_super(sb, group) ?
 		(1 + ext3_bg_num_gdb(sb, group) +
 		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
@@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb,
 	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
 		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
 			ext3_warning(sb, __FUNCTION__,
-				     "reserved GDT %ld missing grp %d (%ld)\n",
+				     "reserved GDT %ld missing grp %d (%ld)",
 				     blk, grp,
 				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
 			return -EINVAL;
@@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
 	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
 		ext3_warning(sb, __FUNCTION__,
-			"won't resize using backup superblock at %llu\n",
+			"won't resize using backup superblock at %llu",
 			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
 		return -EPERM;
 	}
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	data = (__u32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
 		ext3_warning(sb, __FUNCTION__,
-			     "new group %u GDT block %lu not reserved\n",
+			     "new group %u GDT block %lu not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
@@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
 			ext3_warning(sb, __FUNCTION__,
-				     "reserved block %lu not at offset %ld\n",
+				     "reserved block %lu not at offset %ld",
 				     blk, (long)(data - (__u32 *)dind->b_data));
 			err = -EINVAL;
 			goto exit_bh;
@@ -683,7 +683,7 @@ exit_err:
 	if (err) {
 		ext3_warning(sb, __FUNCTION__,
 			     "can't update backup for group %d (err %d), "
-			     "forcing fsck on next reboot\n", group, err);
+			     "forcing fsck on next reboot", group, err);
 		sbi->s_mount_state &= ~EXT3_VALID_FS;
 		sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
 		mark_buffer_dirty(sbi->s_sbh);
@@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
 					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
 		ext3_warning(sb, __FUNCTION__,
-			     "Can't resize non-sparse filesystem further\n");
+			     "Can't resize non-sparse filesystem further");
 		return -EPERM;
 	}
 
@@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 		if (!EXT3_HAS_COMPAT_FEATURE(sb,
 					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
 			ext3_warning(sb, __FUNCTION__,
-				     "No reserved GDT blocks, can't resize\n");
+				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
 		inode = iget(sb, EXT3_RESIZE_INO);
 		if (!inode || is_bad_inode(inode)) {
 			ext3_warning(sb, __FUNCTION__,
-				     "Error opening resize inode\n");
+				     "Error opening resize inode");
 			iput(inode);
 			return -ENOENT;
 		}
@@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	}
 
 	lock_super(sb);
-	if (input->group != EXT3_SB(sb)->s_groups_count) {
+	if (input->group != sbi->s_groups_count) {
 		ext3_warning(sb, __FUNCTION__,
-			     "multiple resizers run on filesystem!\n");
+			     "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_journal;
 	}
@@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 * data.  So we need to be careful to set all of the relevant
 	 * group descriptor data etc. *before* we enable the group.
 	 *
-	 * The key field here is EXT3_SB(sb)->s_groups_count: as long as
+	 * The key field here is sbi->s_groups_count: as long as
 	 * that retains its old value, nobody is going to access the new
 	 * group.
 	 *
@@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	smp_wmb();
 
 	/* Update the global fs size fields */
-	EXT3_SB(sb)->s_groups_count++;
+	sbi->s_groups_count++;
 
 	ext3_journal_dirty_metadata(handle, primary);
 
@@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	percpu_counter_mod(&sbi->s_freeinodes_counter,
 			   EXT3_INODES_PER_GROUP(sb));
 
-	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
 	sb->s_dirt = 1;
 
 exit_journal:
@@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
 	if (last == 0) {
 		ext3_warning(sb, __FUNCTION__,
-			     "need to use ext2online to resize further\n");
+			     "need to use ext2online to resize further");
 		return -EPERM;
 	}
 
@@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	lock_super(sb);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __FUNCTION__,
-			     "multiple resizers run on filesystem!\n");
+			     "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_put;
 	}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4e6730622d90..56bf76586019 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -43,7 +43,8 @@
 #include "acl.h"
 #include "namei.h"
 
-static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
+static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+			     unsigned long journal_devnum);
 static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
 			       int);
 static void ext3_commit_super (struct super_block * sb,
@@ -628,7 +629,7 @@ enum {
 	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
-	Opt_commit, Opt_journal_update, Opt_journal_inum,
+	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
@@ -666,6 +667,7 @@ static match_table_t tokens = {
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
 	{Opt_journal_inum, "journal=%u"},
+	{Opt_journal_dev, "journal_dev=%u"},
 	{Opt_abort, "abort"},
 	{Opt_data_journal, "data=journal"},
 	{Opt_data_ordered, "data=ordered"},
@@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data)
 	return sb_block;
 }
 
-static int parse_options (char * options, struct super_block *sb,
-			  unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+static int parse_options (char *options, struct super_block *sb,
+			  unsigned long *inum, unsigned long *journal_devnum,
+			  unsigned long *n_blocks_count, int is_remount)
 {
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	char * p;
@@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb,
 				return 0;
 			*inum = option;
 			break;
+		case Opt_journal_dev:
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option))
+				return 0;
+			*journal_devnum = option;
+			break;
 		case Opt_noload:
 			set_opt (sbi->s_mount_opt, NOLOAD);
 			break;
@@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	unsigned long logic_sb_block;
 	unsigned long offset = 0;
 	unsigned long journal_inum = 0;
+	unsigned long journal_devnum = 0;
 	unsigned long def_mount_opts;
 	struct inode *root;
 	int blocksize;
@@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 	set_opt(sbi->s_mount_opt, RESERVATION);
 
-	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
+	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+			    NULL, 0))
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 */
 	if (!test_opt(sb, NOLOAD) &&
 	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-		if (ext3_load_journal(sb, es))
+		if (ext3_load_journal(sb, es, journal_devnum))
 			goto failed_mount2;
 	} else if (journal_inum) {
 		if (ext3_create_journal(sb, es, journal_inum))
@@ -1902,15 +1917,24 @@ out_bdev:
 	return NULL;
 }
 
-static int ext3_load_journal(struct super_block * sb,
-			     struct ext3_super_block * es)
+static int ext3_load_journal(struct super_block *sb,
+			     struct ext3_super_block *es,
+			     unsigned long journal_devnum)
 {
 	journal_t *journal;
 	int journal_inum = le32_to_cpu(es->s_journal_inum);
-	dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+	dev_t journal_dev;
 	int err = 0;
 	int really_read_only;
 
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+			"numbers have changed\n");
+		journal_dev = new_decode_dev(journal_devnum);
+	} else
+		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+
 	really_read_only = bdev_read_only(sb->s_bdev);
 
 	/*
@@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb,
 
 	EXT3_SB(sb)->s_journal = journal;
 	ext3_clear_journal_err(sb, es);
+
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		es->s_journal_dev = cpu_to_le32(journal_devnum);
+		sb->s_dirt = 1;
+
+		/* Make sure we flush the recovery flag to disk. */
+		ext3_commit_super(sb, es, 1);
+	}
+
 	return 0;
 }
 
@@ -2116,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb)
 
 static void ext3_write_super (struct super_block * sb)
 {
-	if (down_trylock(&sb->s_lock) == 0)
+	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
@@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 	/*
 	 * Allow the "check" option to be passed as a remount option.
 	 */
-	if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
+	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
 		err = -EINVAL;
 		goto restore_opts;
 	}
@@ -2567,7 +2601,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
@@ -2610,7 +2644,7 @@ out:
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	ext3_mark_inode_dirty(handle, inode);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 430de9f63be3..e8d60bf6b7df 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -140,7 +140,7 @@ ext3_xattr_handler(int name_index)
 /*
  * Inode operation listxattr()
  *
- * dentry->d_inode->i_sem: don't care
+ * dentry->d_inode->i_mutex: don't care
  */
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -946,10 +946,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 	};
 	int error;
 
-	if (IS_RDONLY(inode))
-		return -EROFS;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
 	if (!name)
 		return -EINVAL;
 	if (strlen(name) > 255)
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index f68bfd1cf519..7c693c94f14d 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -39,8 +39,6 @@ ext3_xattr_trusted_get(struct inode *inode, const char *name,
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
@@ -51,8 +49,6 @@ ext3_xattr_trusted_set(struct inode *inode, const char *name,
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index e907cae7a07c..a85a0a17c4fd 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -37,16 +37,10 @@ static int
 ext3_xattr_user_get(struct inode *inode, const char *name,
 		    void *buffer, size_t size)
 {
-	int error;
-
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	error = permission(inode, MAY_READ, NULL);
-	if (error)
-		return error;
-
 	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
 }
 
@@ -54,19 +48,10 @@ static int
 ext3_xattr_user_set(struct inode *inode, const char *name,
 		    const void *value, size_t size, int flags)
 {
-	int error;
-
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	if ( !S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-		return -EPERM;
-	error = permission(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
-
 	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
 			      value, size, flags);
 }
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 77c24fcf712a..1acc941245fb 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
 	return dclus;
 }
 
-int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
+int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+	     unsigned long *mapped_blocks)
 {
 	struct super_block *sb = inode->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
 	int cluster, offset;
 
 	*phys = 0;
+	*mapped_blocks = 0;
 	if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
-		if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits))
+		if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
 			*phys = sector + sbi->dir_start;
+			*mapped_blocks = 1;
+		}
 		return 0;
 	}
 	last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1))
@@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
 	cluster = fat_bmap_cluster(inode, cluster);
 	if (cluster < 0)
 		return cluster;
-	else if (cluster)
+	else if (cluster) {
 		*phys = fat_clus_to_blknr(sbi, cluster) + offset;
+		*mapped_blocks = sbi->sec_per_clus - offset;
+		if (*mapped_blocks > last_block - sector)
+			*mapped_blocks = last_block - sector;
+	}
 	return 0;
 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ba824964b9bb..db0de5c621c7 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
 	if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
 		return;
 
-	bh = sb_getblk(sb, phys);
-	if (bh && !buffer_uptodate(bh)) {
+	bh = sb_find_get_block(sb, phys);
+	if (bh == NULL || !buffer_uptodate(bh)) {
 		for (sec = 0; sec < sbi->sec_per_clus; sec++)
 			sb_breadahead(sb, phys + sec);
 	}
@@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
 {
 	struct super_block *sb = dir->i_sb;
 	sector_t phys, iblock;
-	int offset;
-	int err;
+	unsigned long mapped_blocks;
+	int err, offset;
 
 next:
 	if (*bh)
@@ -77,7 +77,7 @@ next:
 
 	*bh = NULL;
 	iblock = *pos >> sb->s_blocksize_bits;
-	err = fat_bmap(dir, iblock, &phys);
+	err = fat_bmap(dir, iblock, &phys, &mapped_blocks);
 	if (err || !phys)
 		return -1;	/* beyond EOF or error */
 
@@ -418,7 +418,7 @@ EODir:
 	return err;
 }
 
-EXPORT_SYMBOL(fat_search_long);
+EXPORT_SYMBOL_GPL(fat_search_long);
 
 struct fat_ioctl_filldir_callback {
 	struct dirent __user *dirent;
@@ -729,13 +729,13 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 
 	buf.dirent = d1;
 	buf.result = 0;
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	ret = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
 		ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir,
 				    short_only, both);
 	}
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	if (ret >= 0)
 		ret = buf.result;
 	return ret;
@@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
 	return -ENOENT;
 }
 
-EXPORT_SYMBOL(fat_get_dotdot_entry);
+EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
 
 /* See if directory is empty */
 int fat_dir_empty(struct inode *dir)
@@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir)
 	return result;
 }
 
-EXPORT_SYMBOL(fat_dir_empty);
+EXPORT_SYMBOL_GPL(fat_dir_empty);
 
 /*
  * fat_subdirs counts the number of sub-directories of dir. It can be run
@@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name,
 	return -ENOENT;
 }
 
-EXPORT_SYMBOL(fat_scan);
+EXPORT_SYMBOL_GPL(fat_scan);
 
 static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 {
@@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
 	return 0;
 }
 
-EXPORT_SYMBOL(fat_remove_entries);
+EXPORT_SYMBOL_GPL(fat_remove_entries);
 
 static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
 			      struct buffer_head **bhs, int nr_bhs)
@@ -1048,7 +1048,7 @@ error:
 	return err;
 }
 
-EXPORT_SYMBOL(fat_alloc_new_dir);
+EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
 
 static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 			       int *nr_cluster, struct msdos_dir_entry **de,
@@ -1264,4 +1264,4 @@ error_remove:
 	return err;
 }
 
-EXPORT_SYMBOL(fat_add_entries);
+EXPORT_SYMBOL_GPL(fat_add_entries);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 4164cd54c4d1..a1a9e0451217 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
 				sbi->prev_free = entry;
 				if (sbi->free_clusters != -1)
 					sbi->free_clusters--;
+				sb->s_dirt = 1;
 
 				cluster[idx_clus] = entry;
 				idx_clus++;
@@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
 
 	/* Couldn't allocate the free entries */
 	sbi->free_clusters = 0;
+	sb->s_dirt = 1;
 	err = -ENOSPC;
 
 out:
@@ -509,7 +511,6 @@ out:
 	}
 	for (i = 0; i < nr_bhs; i++)
 		brelse(bhs[i]);
-	fat_clusters_flush(sb);
 
 	if (err && idx_clus)
 		fat_free_clusters(inode, cluster[0]);
@@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster)
 		}
 
 		ops->ent_put(&fatent, FAT_ENT_FREE);
-		if (sbi->free_clusters != -1)
+		if (sbi->free_clusters != -1) {
 			sbi->free_clusters++;
+			sb->s_dirt = 1;
+		}
 
 		if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
 			if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -578,7 +581,7 @@ error:
 	return err;
 }
 
-EXPORT_SYMBOL(fat_free_clusters);
+EXPORT_SYMBOL_GPL(fat_free_clusters);
 
 int fat_count_free_clusters(struct super_block *sb)
 {
@@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb)
 		} while (fat_ent_next(sbi, &fatent));
 	}
 	sbi->free_clusters = free;
+	sb->s_dirt = 1;
 	fatent_brelse(&fatent);
 out:
 	unlock_fat(sbi);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7134403d5be2..d30876cf35f5 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -11,6 +11,7 @@
 #include <linux/msdos_fs.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
@@ -40,7 +41,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		if (err)
 			return err;
 
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 
 		if (IS_RDONLY(inode)) {
 			err = -EROFS;
@@ -102,7 +103,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
 		mark_inode_dirty(inode);
 	up:
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 		return err;
 	}
 	default:
@@ -124,6 +125,24 @@ struct file_operations fat_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
+static int fat_cont_expand(struct inode *inode, loff_t size)
+{
+	struct address_space *mapping = inode->i_mapping;
+	loff_t start = inode->i_size, count = size - inode->i_size;
+	int err;
+
+	err = generic_cont_expand_simple(inode, size);
+	if (err)
+		goto out;
+
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+	mark_inode_dirty(inode);
+	if (IS_SYNC(inode))
+		err = sync_page_range_nolock(inode, mapping, start, count);
+out:
+	return err;
+}
+
 int fat_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr)
 
 	lock_kernel();
 
-	/* FAT cannot truncate to a longer file */
+	/*
+	 * Expand the file. Since inode_setattr() updates ->i_size
+	 * before calling the ->truncate(), but FAT needs to fill the
+	 * hole before it.
+	 */
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (attr->ia_size > inode->i_size) {
-			error = -EPERM;
-			goto out;
+			error = fat_cont_expand(inode, attr->ia_size);
+			if (error || attr->ia_valid == ATTR_SIZE)
+				goto out;
+			attr->ia_valid &= ~ATTR_SIZE;
 		}
 	}
 
@@ -173,7 +198,7 @@ out:
 	return error;
 }
 
-EXPORT_SYMBOL(fat_notify_change);
+EXPORT_SYMBOL_GPL(fat_notify_change);
 
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a0f9b9fe1307..e7f4aa7fc686 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,10 +18,12 @@
 #include <linux/seq_file.h>
 #include <linux/msdos_fs.h>
 #include <linux/pagemap.h>
+#include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 
 #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode)
 	return err;
 }
 
-static int fat_get_block(struct inode *inode, sector_t iblock,
-			 struct buffer_head *bh_result, int create)
+static int __fat_get_blocks(struct inode *inode, sector_t iblock,
+			    unsigned long *max_blocks,
+			    struct buffer_head *bh_result, int create)
 {
 	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	sector_t phys;
-	int err;
+	unsigned long mapped_blocks;
+	int err, offset;
 
-	err = fat_bmap(inode, iblock, &phys);
+	err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
 	if (err)
 		return err;
 	if (phys) {
 		map_bh(bh_result, sb, phys);
+		*max_blocks = min(mapped_blocks, *max_blocks);
 		return 0;
 	}
 	if (!create)
 		return 0;
+
 	if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
 		fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
 			     MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
 		return -EIO;
 	}
-	if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) {
+
+	offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
+	if (!offset) {
+		/* TODO: multiple cluster allocation would be desirable. */
 		err = fat_add_cluster(inode);
 		if (err)
 			return err;
 	}
-	MSDOS_I(inode)->mmu_private += sb->s_blocksize;
-	err = fat_bmap(inode, iblock, &phys);
+	/* available blocks on this cluster */
+	mapped_blocks = sbi->sec_per_clus - offset;
+
+	*max_blocks = min(mapped_blocks, *max_blocks);
+	MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+
+	err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
 	if (err)
 		return err;
-	if (!phys)
-		BUG();
+	BUG_ON(!phys);
+	BUG_ON(*max_blocks != mapped_blocks);
 	set_buffer_new(bh_result);
 	map_bh(bh_result, sb, phys);
 	return 0;
 }
 
+static int fat_get_blocks(struct inode *inode, sector_t iblock,
+			  unsigned long max_blocks,
+			  struct buffer_head *bh_result, int create)
+{
+	struct super_block *sb = inode->i_sb;
+	int err;
+
+	err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+	if (err)
+		return err;
+	bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+	return 0;
+}
+
+static int fat_get_block(struct inode *inode, sector_t iblock,
+			 struct buffer_head *bh_result, int create)
+{
+	unsigned long max_blocks = 1;
+	return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+}
+
 static int fat_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, fat_get_block, wbc);
 }
 
+static int fat_writepages(struct address_space *mapping,
+			  struct writeback_control *wbc)
+{
+	return mpage_writepages(mapping, wbc, fat_get_block);
+}
+
 static int fat_readpage(struct file *file, struct page *page)
 {
-	return block_read_full_page(page, fat_get_block);
+	return mpage_readpage(page, fat_get_block);
+}
+
+static int fat_readpages(struct file *file, struct address_space *mapping,
+			 struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
 }
 
 static int fat_prepare_write(struct file *file, struct page *page,
@@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page,
 	return err;
 }
 
+static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
+			     const struct iovec *iov,
+			     loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+
+	if (rw == WRITE) {
+		/*
+		 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+		 * so we need to update the ->mmu_private to block boundary.
+		 *
+		 * But we must fill the remaining area or hole by nul for
+		 * updating ->mmu_private.
+		 */
+		loff_t size = offset + iov_length(iov, nr_segs);
+		if (MSDOS_I(inode)->mmu_private < size)
+			return -EINVAL;
+	}
+
+	/*
+	 * FAT need to use the DIO_LOCKING for avoiding the race
+	 * condition of fat_get_block() and ->truncate().
+	 */
+	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				  offset, nr_segs, fat_get_blocks, NULL);
+}
+
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping, block, fat_get_block);
@@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 
 static struct address_space_operations fat_aops = {
 	.readpage	= fat_readpage,
+	.readpages	= fat_readpages,
 	.writepage	= fat_writepage,
+	.writepages	= fat_writepages,
 	.sync_page	= block_sync_page,
 	.prepare_write	= fat_prepare_write,
 	.commit_write	= fat_commit_write,
+	.direct_IO	= fat_direct_IO,
 	.bmap		= _fat_bmap
 };
 
@@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos)
 	spin_unlock(&sbi->inode_hash_lock);
 }
 
-EXPORT_SYMBOL(fat_attach);
+EXPORT_SYMBOL_GPL(fat_attach);
 
 void fat_detach(struct inode *inode)
 {
@@ -193,7 +272,7 @@ void fat_detach(struct inode *inode)
 	spin_unlock(&sbi->inode_hash_lock);
 }
 
-EXPORT_SYMBOL(fat_detach);
+EXPORT_SYMBOL_GPL(fat_detach);
 
 struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
@@ -347,7 +426,7 @@ out:
 	return inode;
 }
 
-EXPORT_SYMBOL(fat_build_inode);
+EXPORT_SYMBOL_GPL(fat_build_inode);
 
 static void fat_delete_inode(struct inode *inode)
 {
@@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static void fat_put_super(struct super_block *sb)
+static void fat_write_super(struct super_block *sb)
 {
-	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY))
 		fat_clusters_flush(sb);
+}
+
+static void fat_put_super(struct super_block *sb)
+{
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
 	if (sbi->nls_disk) {
 		unload_nls(sbi->nls_disk);
@@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode)
 	return fat_write_inode(inode, 1);
 }
 
-EXPORT_SYMBOL(fat_sync_inode);
+EXPORT_SYMBOL_GPL(fat_sync_inode);
 
 static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
 static struct super_operations fat_sops = {
@@ -546,6 +630,7 @@ static struct super_operations fat_sops = {
 	.write_inode	= fat_write_inode,
 	.delete_inode	= fat_delete_inode,
 	.put_super	= fat_put_super,
+	.write_super	= fat_write_super,
 	.statfs		= fat_statfs,
 	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
@@ -1347,7 +1432,7 @@ out_fail:
 	return error;
 }
 
-EXPORT_SYMBOL(fat_fill_super);
+EXPORT_SYMBOL_GPL(fat_fill_super);
 
 int __init fat_cache_init(void);
 void fat_cache_destroy(void);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 2a0df2122f5d..32fb0a3f1da4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
 	}
 }
 
-EXPORT_SYMBOL(fat_fs_panic);
+EXPORT_SYMBOL_GPL(fat_fs_panic);
 
 /* Flushes the number of free clusters on FAT32 */
 /* XXX: Need to write one per FSINFO block.  Currently only writes 1 */
@@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb)
 		if (sbi->prev_free != -1)
 			fsinfo->next_cluster = cpu_to_le32(sbi->prev_free);
 		mark_buffer_dirty(bh);
-		if (sb->s_flags & MS_SYNCHRONOUS)
-			sync_dirty_buffer(bh);
 	}
 	brelse(bh);
 }
@@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
 	*date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9));
 }
 
-EXPORT_SYMBOL(fat_date_unix2dos);
+EXPORT_SYMBOL_GPL(fat_date_unix2dos);
 
 int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 {
@@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 	return err;
 }
 
-EXPORT_SYMBOL(fat_sync_bhs);
+EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 863b46e0d78a..9903bde475f2 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -457,11 +457,11 @@ static void send_sigio_to_task(struct task_struct *p,
 			else
 				si.si_band = band_table[reason - POLL_IN];
 			si.si_fd    = fd;
-			if (!send_group_sig_info(fown->signum, &si, p))
+			if (!group_send_sig_info(fown->signum, &si, p))
 				break;
 		/* fall-through: fall back on the old plain SIGIO signal */
 		case 0:
-			send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
+			group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
 	}
 }
 
@@ -495,7 +495,7 @@ static void send_sigurg_to_task(struct task_struct *p,
                                 struct fown_struct *fown)
 {
 	if (sigio_perm(p, fown, SIGURG))
-		send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
+		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
 }
 
 int send_sigurg(struct fown_struct *fown)
diff --git a/fs/fifo.c b/fs/fifo.c
index 5455916241f0..923371b753ab 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -35,7 +35,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	int ret;
 
 	ret = -ERESTARTSYS;
-	if (down_interruptible(PIPE_SEM(*inode)))
+	if (mutex_lock_interruptible(PIPE_MUTEX(*inode)))
 		goto err_nolock_nocleanup;
 
 	if (!inode->i_pipe) {
@@ -119,7 +119,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 	return 0;
 
 err_rd:
@@ -139,7 +139,7 @@ err:
 		free_pipe_info(inode);
 
 err_nocleanup:
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 err_nolock_nocleanup:
 	return ret;
diff --git a/fs/file_table.c b/fs/file_table.c
index c3a5e2fd663b..6142250104a6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
 {
-	if (rcuref_dec_and_test(&file->f_count))
+	if (atomic_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd)
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (file) {
-		if (!rcuref_inc_lf(&file->f_count)) {
+		if (!atomic_inc_not_zero(&file->f_count)) {
 			/* File object ref couldn't be taken */
 			rcu_read_unlock();
 			return NULL;
@@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			if (rcuref_inc_lf(&file->f_count))
+			if (atomic_inc_not_zero(&file->f_count))
 				*fput_needed = 1;
 			else
 				/* Didn't get the reference, someone's freed */
@@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 
 void put_filp(struct file *file)
 {
-	if (rcuref_dec_and_test(&file->f_count)) {
+	if (atomic_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index d0401dc68d41..6f5df1700e95 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -99,8 +99,8 @@ static int
 vxfs_immed_readpage(struct file *fp, struct page *pp)
 {
 	struct vxfs_inode_info	*vip = VXFS_INO(pp->mapping->host);
-	u_int64_t		offset = pp->index << PAGE_CACHE_SHIFT;
-	caddr_t			kaddr;
+	u_int64_t	offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
+	caddr_t		kaddr;
 
 	kaddr = kmap(pp);
 	memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8f873e621f41..e08ab4702d97 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,6 +148,26 @@ void fuse_release_background(struct fuse_req *req)
 	spin_unlock(&fuse_lock);
 }
 
+static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int i;
+	struct fuse_init_out *arg = &req->misc.init_out;
+
+	if (arg->major != FUSE_KERNEL_VERSION)
+		fc->conn_error = 1;
+	else {
+		fc->minor = arg->minor;
+		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+	}
+
+	/* After INIT reply is received other requests can go
+	   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
+	   up()s on outstanding_sem.  The last up() is done in
+	   fuse_putback_request() */
+	for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+		up(&fc->outstanding_sem);
+}
+
 /*
  * This function is called when a request is finished.  Either a reply
  * has arrived or it was interrupted (and not yet sent) or some error
@@ -172,19 +192,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 		up_read(&fc->sbput_sem);
 	}
 	wake_up(&req->waitq);
-	if (req->in.h.opcode == FUSE_INIT) {
-		int i;
-
-		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
-			fc->conn_error = 1;
-
-		/* After INIT reply is received other requests can go
-		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
-		   up()s on outstanding_sem.  The last up() is done in
-		   fuse_putback_request() */
-		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
-			up(&fc->outstanding_sem);
-	} else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
+	if (req->in.h.opcode == FUSE_INIT)
+		process_init_reply(fc, req);
+	else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
 		/* Special case for failed iget in CREATE */
 		u64 nodeid = req->in.h.nodeid;
 		__fuse_get_request(req);
@@ -357,7 +367,7 @@ void fuse_send_init(struct fuse_conn *fc)
 	/* This is called from fuse_read_super() so there's guaranteed
 	   to be a request available */
 	struct fuse_req *req = do_get_request(fc);
-	struct fuse_init_in_out *arg = &req->misc.init_in_out;
+	struct fuse_init_in *arg = &req->misc.init_in;
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	req->in.h.opcode = FUSE_INIT;
@@ -365,8 +375,12 @@ void fuse_send_init(struct fuse_conn *fc)
 	req->in.args[0].size = sizeof(*arg);
 	req->in.args[0].value = arg;
 	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(*arg);
-	req->out.args[0].value = arg;
+	/* Variable length arguement used for backward compatibility
+	   with interface version < 7.5.  Rest of init_out is zeroed
+	   by do_get_request(), so a short reply is not a problem */
+	req->out.argvar = 1;
+	req->out.args[0].size = sizeof(struct fuse_init_out);
+	req->out.args[0].value = &req->misc.init_out;
 	request_send_background(fc, req);
 }
 
@@ -615,6 +629,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
 	struct fuse_copy_state cs;
 	unsigned reqsize;
 
+ restart:
 	spin_lock(&fuse_lock);
 	fc = file->private_data;
 	err = -EPERM;
@@ -630,20 +645,25 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
 
 	req = list_entry(fc->pending.next, struct fuse_req, list);
 	list_del_init(&req->list);
-	spin_unlock(&fuse_lock);
 
 	in = &req->in;
-	reqsize = req->in.h.len;
-	fuse_copy_init(&cs, 1, req, iov, nr_segs);
-	err = -EINVAL;
-	if (iov_length(iov, nr_segs) >= reqsize) {
-		err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
-		if (!err)
-			err = fuse_copy_args(&cs, in->numargs, in->argpages,
-					     (struct fuse_arg *) in->args, 0);
+	reqsize = in->h.len;
+	/* If request is too large, reply with an error and restart the read */
+	if (iov_length(iov, nr_segs) < reqsize) {
+		req->out.h.error = -EIO;
+		/* SETXATTR is special, since it may contain too large data */
+		if (in->h.opcode == FUSE_SETXATTR)
+			req->out.h.error = -E2BIG;
+		request_end(fc, req);
+		goto restart;
 	}
+	spin_unlock(&fuse_lock);
+	fuse_copy_init(&cs, 1, req, iov, nr_segs);
+	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+	if (!err)
+		err = fuse_copy_args(&cs, in->numargs, in->argpages,
+				     (struct fuse_arg *) in->args, 0);
 	fuse_copy_finish(&cs);
-
 	spin_lock(&fuse_lock);
 	req->locked = 0;
 	if (!err && req->interrupted)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 51f5da652771..417bcee466f6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -13,8 +13,16 @@
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
-#include <linux/mount.h>
 
+/*
+ * FUSE caches dentries and attributes with separate timeout.  The
+ * time in jiffies until the dentry/attributes are valid is stored in
+ * dentry->d_time and fuse_inode->i_time respectively.
+ */
+
+/*
+ * Calculate the time in jiffies until a dentry/attributes are valid
+ */
 static inline unsigned long time_to_jiffies(unsigned long sec,
 					    unsigned long nsec)
 {
@@ -22,6 +30,50 @@ static inline unsigned long time_to_jiffies(unsigned long sec,
 	return jiffies + timespec_to_jiffies(&ts);
 }
 
+/*
+ * Set dentry and possibly attribute timeouts from the lookup/mk*
+ * replies
+ */
+static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
+{
+	entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec);
+	if (entry->d_inode)
+		get_fuse_inode(entry->d_inode)->i_time =
+			time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+
+/*
+ * Mark the attributes as stale, so that at the next call to
+ * ->getattr() they will be fetched from userspace
+ */
+void fuse_invalidate_attr(struct inode *inode)
+{
+	get_fuse_inode(inode)->i_time = jiffies - 1;
+}
+
+/*
+ * Just mark the entry as stale, so that a next attempt to look it up
+ * will result in a new lookup call to userspace
+ *
+ * This is called when a dentry is about to become negative and the
+ * timeout is unknown (unlink, rmdir, rename and in some cases
+ * lookup)
+ */
+static void fuse_invalidate_entry_cache(struct dentry *entry)
+{
+	entry->d_time = jiffies - 1;
+}
+
+/*
+ * Same as fuse_invalidate_entry_cache(), but also try to remove the
+ * dentry from the hash
+ */
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+	d_invalidate(entry);
+	fuse_invalidate_entry_cache(entry);
+}
+
 static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
 			     struct dentry *entry,
 			     struct fuse_entry_out *outarg)
@@ -37,17 +89,34 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
 	req->out.args[0].value = outarg;
 }
 
+/*
+ * Check whether the dentry is still valid
+ *
+ * If the entry validity timeout has expired and the dentry is
+ * positive, try to redo the lookup.  If the lookup results in a
+ * different inode, then let the VFS invalidate the dentry and redo
+ * the lookup once more.  If the lookup results in the same inode,
+ * then refresh the attributes, timeouts and mark the dentry valid.
+ */
 static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 {
-	if (!entry->d_inode || is_bad_inode(entry->d_inode))
+	struct inode *inode = entry->d_inode;
+
+	if (inode && is_bad_inode(inode))
 		return 0;
 	else if (time_after(jiffies, entry->d_time)) {
 		int err;
 		struct fuse_entry_out outarg;
-		struct inode *inode = entry->d_inode;
-		struct fuse_inode *fi = get_fuse_inode(inode);
-		struct fuse_conn *fc = get_fuse_conn(inode);
-		struct fuse_req *req = fuse_get_request(fc);
+		struct fuse_conn *fc;
+		struct fuse_req *req;
+
+		/* Doesn't hurt to "reset" the validity timeout */
+		fuse_invalidate_entry_cache(entry);
+		if (!inode)
+			return 0;
+
+		fc = get_fuse_conn(inode);
+		req = fuse_get_request(fc);
 		if (!req)
 			return 0;
 
@@ -55,6 +124,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		request_send(fc, req);
 		err = req->out.h.error;
 		if (!err) {
+			struct fuse_inode *fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
 				fuse_send_forget(fc, req, outarg.nodeid, 1);
 				return 0;
@@ -66,18 +136,18 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 			return 0;
 
 		fuse_change_attributes(inode, &outarg.attr);
-		entry->d_time = time_to_jiffies(outarg.entry_valid,
-						outarg.entry_valid_nsec);
-		fi->i_time = time_to_jiffies(outarg.attr_valid,
-					     outarg.attr_valid_nsec);
+		fuse_change_timeout(entry, &outarg);
 	}
 	return 1;
 }
 
+/*
+ * Check if there's already a hashed alias of this directory inode.
+ * If yes, then lookup and mkdir must not create a new alias.
+ */
 static int dir_alias(struct inode *inode)
 {
 	if (S_ISDIR(inode->i_mode)) {
-		/* Don't allow creating an alias to a directory  */
 		struct dentry *alias = d_find_alias(inode);
 		if (alias) {
 			dput(alias);
@@ -96,8 +166,14 @@ static struct dentry_operations fuse_dentry_operations = {
 	.d_revalidate	= fuse_dentry_revalidate,
 };
 
-static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
-			    struct inode **inodep)
+static inline int valid_mode(int m)
+{
+	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
+		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+				  struct nameidata *nd)
 {
 	int err;
 	struct fuse_entry_out outarg;
@@ -106,53 +182,49 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 	struct fuse_req *req;
 
 	if (entry->d_name.len > FUSE_NAME_MAX)
-		return -ENAMETOOLONG;
+		return ERR_PTR(-ENAMETOOLONG);
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -EINTR;
+		return ERR_PTR(-EINTR);
 
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
 	err = req->out.h.error;
-	if (!err && invalid_nodeid(outarg.nodeid))
+	if (!err && ((outarg.nodeid && invalid_nodeid(outarg.nodeid)) ||
+		     !valid_mode(outarg.attr.mode)))
 		err = -EIO;
-	if (!err) {
+	if (!err && outarg.nodeid) {
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 				  &outarg.attr);
 		if (!inode) {
 			fuse_send_forget(fc, req, outarg.nodeid, 1);
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 		}
 	}
 	fuse_put_request(fc, req);
 	if (err && err != -ENOENT)
-		return err;
+		return ERR_PTR(err);
 
-	if (inode) {
-		struct fuse_inode *fi = get_fuse_inode(inode);
-		entry->d_time =	time_to_jiffies(outarg.entry_valid,
-						outarg.entry_valid_nsec);
-		fi->i_time = time_to_jiffies(outarg.attr_valid,
-					     outarg.attr_valid_nsec);
+	if (inode && dir_alias(inode)) {
+		iput(inode);
+		return ERR_PTR(-EIO);
 	}
-
+	d_add(entry, inode);
 	entry->d_op = &fuse_dentry_operations;
-	*inodep = inode;
-	return 0;
-}
-
-void fuse_invalidate_attr(struct inode *inode)
-{
-	get_fuse_inode(inode)->i_time = jiffies - 1;
-}
-
-static void fuse_invalidate_entry(struct dentry *entry)
-{
-	d_invalidate(entry);
-	entry->d_time = jiffies - 1;
+	if (!err)
+		fuse_change_timeout(entry, &outarg);
+	else
+		fuse_invalidate_entry_cache(entry);
+	return NULL;
 }
 
+/*
+ * Atomic create+open operation
+ *
+ * If the filesystem doesn't support this, then fall back to separate
+ * 'mknod' + 'open' requests.
+ */
 static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 			    struct nameidata *nd)
 {
@@ -163,7 +235,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_open_in inarg;
 	struct fuse_open_out outopen;
 	struct fuse_entry_out outentry;
-	struct fuse_inode *fi;
 	struct fuse_file *ff;
 	struct file *file;
 	int flags = nd->intent.open.flags - 1;
@@ -172,10 +243,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (fc->no_create)
 		goto out;
 
-	err = -ENAMETOOLONG;
-	if (entry->d_name.len > FUSE_NAME_MAX)
-		goto out;
-
 	err = -EINTR;
 	req = fuse_get_request(fc);
 	if (!req)
@@ -220,17 +287,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
 		ff->fh = outopen.fh;
+		/* Special release, with inode = NULL, this will
+		   trigger a 'forget' request when the release is
+		   complete */
 		fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0);
 		goto out_put_request;
 	}
 	fuse_put_request(fc, req);
-	entry->d_time =	time_to_jiffies(outentry.entry_valid,
-					outentry.entry_valid_nsec);
-	fi = get_fuse_inode(inode);
-	fi->i_time = time_to_jiffies(outentry.attr_valid,
-				     outentry.attr_valid_nsec);
-
 	d_instantiate(entry, inode);
+	fuse_change_timeout(entry, &outentry);
 	file = lookup_instantiate_filp(nd, entry, generic_file_open);
 	if (IS_ERR(file)) {
 		ff->fh = outopen.fh;
@@ -248,13 +313,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	return err;
 }
 
+/*
+ * Code shared between mknod, mkdir, symlink and link
+ */
 static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 			    struct inode *dir, struct dentry *entry,
 			    int mode)
 {
 	struct fuse_entry_out outarg;
 	struct inode *inode;
-	struct fuse_inode *fi;
 	int err;
 
 	req->in.h.nodeid = get_node_id(dir);
@@ -268,10 +335,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 		fuse_put_request(fc, req);
 		return err;
 	}
-	if (invalid_nodeid(outarg.nodeid)) {
-		fuse_put_request(fc, req);
-		return -EIO;
-	}
+	err = -EIO;
+	if (invalid_nodeid(outarg.nodeid))
+		goto out_put_request;
+
+	if ((outarg.attr.mode ^ mode) & S_IFMT)
+		goto out_put_request;
+
 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 			  &outarg.attr);
 	if (!inode) {
@@ -280,22 +350,19 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	}
 	fuse_put_request(fc, req);
 
-	/* Don't allow userspace to do really stupid things... */
-	if (((inode->i_mode ^ mode) & S_IFMT) || dir_alias(inode)) {
+	if (dir_alias(inode)) {
 		iput(inode);
 		return -EIO;
 	}
 
-	entry->d_time = time_to_jiffies(outarg.entry_valid,
-					outarg.entry_valid_nsec);
-
-	fi = get_fuse_inode(inode);
-	fi->i_time = time_to_jiffies(outarg.attr_valid,
-				     outarg.attr_valid_nsec);
-
 	d_instantiate(entry, inode);
+	fuse_change_timeout(entry, &outarg);
 	fuse_invalidate_attr(dir);
 	return 0;
+
+ out_put_request:
+	fuse_put_request(fc, req);
+	return err;
 }
 
 static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
@@ -355,12 +422,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 {
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	unsigned len = strlen(link) + 1;
-	struct fuse_req *req;
-
-	if (len > FUSE_SYMLINK_MAX)
-		return -ENAMETOOLONG;
-
-	req = fuse_get_request(fc);
+	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -399,6 +461,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 		inode->i_nlink = 0;
 		fuse_invalidate_attr(inode);
 		fuse_invalidate_attr(dir);
+		fuse_invalidate_entry_cache(entry);
 	} else if (err == -EINTR)
 		fuse_invalidate_entry(entry);
 	return err;
@@ -424,6 +487,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 	if (!err) {
 		entry->d_inode->i_nlink = 0;
 		fuse_invalidate_attr(dir);
+		fuse_invalidate_entry_cache(entry);
 	} else if (err == -EINTR)
 		fuse_invalidate_entry(entry);
 	return err;
@@ -459,6 +523,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
 		fuse_invalidate_attr(olddir);
 		if (olddir != newdir)
 			fuse_invalidate_attr(newdir);
+
+		/* newent will end up negative */
+		if (newent->d_inode)
+			fuse_invalidate_entry_cache(newent);
 	} else if (err == -EINTR) {
 		/* If request was interrupted, DEITY only knows if the
 		   rename actually took place.  If the invalidation
@@ -566,6 +634,15 @@ static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
 	return 0;
 }
 
+/*
+ * Check whether the inode attributes are still valid
+ *
+ * If the attribute validity timeout has expired, then fetch the fresh
+ * attributes with a 'getattr' request
+ *
+ * I'm not sure why cached attributes are never returned for the root
+ * inode, this is probably being too cautious.
+ */
 static int fuse_revalidate(struct dentry *entry)
 {
 	struct inode *inode = entry->d_inode;
@@ -613,6 +690,19 @@ static int fuse_access(struct inode *inode, int mask)
 	return err;
 }
 
+/*
+ * Check permission.  The two basic access models of FUSE are:
+ *
+ * 1) Local access checking ('default_permissions' mount option) based
+ * on file mode.  This is the plain old disk filesystem permission
+ * modell.
+ *
+ * 2) "Remote" access checking, where server is responsible for
+ * checking permission in each inode operation.  An exception to this
+ * is if ->permission() was invoked from sys_access() in which case an
+ * access request is sent.  Execute permission is still checked
+ * locally based on file mode.
+ */
 static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -631,14 +721,10 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 				err = generic_permission(inode, mask, NULL);
 		}
 
-		/* FIXME: Need some mechanism to revoke permissions:
-		   currently if the filesystem suddenly changes the
-		   file mode, we will not be informed about it, and
-		   continue to allow access to the file/directory.
-
-		   This is actually not so grave, since the user can
-		   simply keep access to the file/directory anyway by
-		   keeping it open... */
+		/* Note: the opposite of the above test does not
+		   exist.  So if permissions are revoked this won't be
+		   noticed immediately, only after the attribute
+		   timeout has expired */
 
 		return err;
 	} else {
@@ -691,7 +777,12 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	struct page *page;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_request(fc);
+	struct fuse_req *req;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -806,6 +897,15 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
 	}
 }
 
+/*
+ * Set attributes, and at the same time refresh them.
+ *
+ * Truncation is slightly complicated, because the 'truncate' request
+ * may fail, in which case we don't want to touch the mapping.
+ * vmtruncate() doesn't allow for this case.  So do the rlimit
+ * checking by hand and call vmtruncate() only after the file has
+ * actually been truncated.
+ */
 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 {
 	struct inode *inode = entry->d_inode;
@@ -883,23 +983,6 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
 	return err;
 }
 
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
-				  struct nameidata *nd)
-{
-	struct inode *inode;
-	int err;
-
-	err = fuse_lookup_iget(dir, entry, &inode);
-	if (err)
-		return ERR_PTR(err);
-	if (inode && dir_alias(inode)) {
-		iput(inode);
-		return ERR_PTR(-EIO);
-	}
-	d_add(entry, inode);
-	return NULL;
-}
-
 static int fuse_setxattr(struct dentry *entry, const char *name,
 			 const void *value, size_t size, int flags)
 {
@@ -909,9 +992,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 	struct fuse_setxattr_in inarg;
 	int err;
 
-	if (size > FUSE_XATTR_SIZE_MAX)
-		return -E2BIG;
-
 	if (fc->no_setxattr)
 		return -EOPNOTSUPP;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2ca86141d13a..63d2980df5c9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -163,6 +163,9 @@ static int fuse_flush(struct file *file)
 	struct fuse_flush_in inarg;
 	int err;
 
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	if (fc->no_flush)
 		return 0;
 
@@ -199,6 +202,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
 	struct fuse_fsync_in inarg;
 	int err;
 
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
 		return 0;
 
@@ -272,16 +278,22 @@ static int fuse_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
-	struct fuse_req *req = fuse_get_request(fc);
-	int err = -EINTR;
+	struct fuse_req *req;
+	int err;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
+	err = -EINTR;
+	req = fuse_get_request(fc);
 	if (!req)
 		goto out;
 
 	req->out.page_zeroing = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
+	fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err)
@@ -295,7 +307,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 static int fuse_send_readpages(struct fuse_req *req, struct file *file,
 			       struct inode *inode)
 {
-	loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
+	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
 	unsigned i;
 	req->out.page_zeroing = 1;
@@ -345,6 +357,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_readpages_data data;
 	int err;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	data.file = file;
 	data.inode = inode;
 	data.req = fuse_get_request(fc);
@@ -402,8 +418,13 @@ static int fuse_commit_write(struct file *file, struct page *page,
 	unsigned count = to - offset;
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
-	struct fuse_req *req = fuse_get_request(fc);
+	loff_t pos = page_offset(page) + offset;
+	struct fuse_req *req;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -454,7 +475,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 
 	nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
 	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+	npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ);
 	down_read(&current->mm->mmap_sem);
 	npages = get_user_pages(current, current->mm, user_addr, npages, write,
 				0, req->pages, NULL);
@@ -475,12 +496,16 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	ssize_t res = 0;
-	struct fuse_req *req = fuse_get_request(fc);
+	struct fuse_req *req;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
 	while (count) {
-		size_t tmp;
 		size_t nres;
 		size_t nbytes = min(count, nmax);
 		int err = fuse_get_user_pages(req, buf, nbytes, !write);
@@ -488,8 +513,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 			res = err;
 			break;
 		}
-		tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
-		nbytes = min(nbytes, tmp);
+		nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+		nbytes = min(count, nbytes);
 		if (write)
 			nres = fuse_send_write(req, file, inode, pos, nbytes);
 		else
@@ -535,9 +560,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 	struct inode *inode = file->f_dentry->d_inode;
 	ssize_t res;
 	/* Don't allow parallel writes to the same file */
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	res = fuse_direct_io(file, buf, count, ppos, 1);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return res;
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0ea5301f86be..74c8d098a14a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,9 @@
 /** If more requests are outstanding, then the operation will block */
 #define FUSE_MAX_OUTSTANDING 10
 
+/** It could be as large as PATH_MAX, but would that have any uses? */
+#define FUSE_NAME_MAX 1024
+
 /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
     module will check permissions based on the file mode.  Otherwise no
     permission checking is done in the kernel */
@@ -108,9 +111,6 @@ struct fuse_out {
 	struct fuse_arg args[3];
 };
 
-struct fuse_req;
-struct fuse_conn;
-
 /**
  * A request to the client
  */
@@ -159,7 +159,8 @@ struct fuse_req {
 	union {
 		struct fuse_forget_in forget_in;
 		struct fuse_release_in release_in;
-		struct fuse_init_in_out init_in_out;
+		struct fuse_init_in init_in;
+		struct fuse_init_out init_out;
 	} misc;
 
 	/** page vector */
@@ -272,6 +273,9 @@ struct fuse_conn {
 	/** Is create not implemented by fs? */
 	unsigned no_create : 1;
 
+	/** Negotiated minor version */
+	unsigned minor;
+
 	/** Backing dev info */
 	struct backing_dev_info bdi;
 };
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e69a546844d0..04c80cc957a3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -135,12 +135,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 		fuse_init_common(inode);
 		init_special_inode(inode, inode->i_mode,
 				   new_decode_dev(attr->rdev));
-	} else {
-		/* Don't let user create weird files */
-		inode->i_mode = S_IFREG;
-		fuse_init_common(inode);
-		fuse_init_file_inode(inode);
-	}
+	} else
+		BUG();
 }
 
 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
@@ -218,6 +214,7 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
 {
 	stbuf->f_type    = FUSE_SUPER_MAGIC;
 	stbuf->f_bsize   = attr->bsize;
+	stbuf->f_frsize  = attr->frsize;
 	stbuf->f_blocks  = attr->blocks;
 	stbuf->f_bfree   = attr->bfree;
 	stbuf->f_bavail  = attr->bavail;
@@ -238,10 +235,12 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
 	if (!req)
 		return -EINTR;
 
+	memset(&outarg, 0, sizeof(outarg));
 	req->in.numargs = 0;
 	req->in.h.opcode = FUSE_STATFS;
 	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].size =
+		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
 	req->out.args[0].value = &outarg;
 	request_send(fc, req);
 	err = req->out.h.error;
@@ -482,7 +481,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fc->max_read = d.max_read;
 	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
 		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
-	fc->max_write = FUSE_MAX_IN / 2;
 
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d499393a8ae7..050a49276499 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -547,13 +547,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
 	if (atomic_read(&file->f_count) != 0)
 		return 0;
 	if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		hfs_file_truncate(inode);
 		//if (inode->i_flags & S_DEAD) {
 		//	hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
 		//	hfs_delete_inode(inode);
 		//}
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	return 0;
 }
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index c7d316455fa0..9fb51632303c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -29,7 +29,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
 		return size;
 
 	dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
-	down(&HFSPLUS_SB(sb).alloc_file->i_sem);
+	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
 	page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
 			       (filler_t *)mapping->a_ops->readpage, NULL);
@@ -143,7 +143,7 @@ done:
 	sb->s_dirt = 1;
 	dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
 out:
-	up(&HFSPLUS_SB(sb).alloc_file->i_sem);
+	mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	return start;
 }
 
@@ -164,7 +164,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 	if ((offset + count) > HFSPLUS_SB(sb).total_blocks)
 		return -2;
 
-	down(&HFSPLUS_SB(sb).alloc_file->i_sem);
+	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
 	pnr = offset / PAGE_CACHE_BITS;
 	page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL);
@@ -215,7 +215,7 @@ out:
 	kunmap(page);
 	HFSPLUS_SB(sb).free_blocks += len;
 	sb->s_dirt = 1;
-	up(&HFSPLUS_SB(sb).alloc_file->i_sem);
+	mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 
 	return 0;
 }
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index df16fcbff3fb..0fa1ab6250bf 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -143,9 +143,6 @@ struct hfsplus_sb_info {
 
 	unsigned long flags;
 
-	atomic_t inode_cnt;
-	u32 last_inode_cnt;
-
 	struct hlist_head rsrc_inodes;
 };
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index fc98583cf045..7acff6c5464f 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -182,11 +182,6 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 	igrab(dir);
 	hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
 	mark_inode_dirty(inode);
-	{
-	void hfsplus_inode_check(struct super_block *sb);
-	atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
-	hfsplus_inode_check(sb);
-	}
 out:
 	d_add(dentry, inode);
 	return NULL;
@@ -276,13 +271,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 	if (atomic_read(&file->f_count) != 0)
 		return 0;
 	if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		hfsplus_file_truncate(inode);
 		if (inode->i_flags & S_DEAD) {
 			hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
 			hfsplus_delete_inode(inode);
 		}
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	return 0;
 }
@@ -317,11 +312,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
 	if (!inode)
 		return NULL;
 
-	{
-	void hfsplus_inode_check(struct super_block *sb);
-	atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
-	hfsplus_inode_check(sb);
-	}
 	inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
 	inode->i_mode = mode;
 	inode->i_uid = current->fsuid;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 8093351bd7c3..d791780def50 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -22,29 +22,12 @@ static void hfsplus_destroy_inode(struct inode *inode);
 
 #include "hfsplus_fs.h"
 
-void hfsplus_inode_check(struct super_block *sb)
-{
-#if 0
-	u32 cnt = atomic_read(&HFSPLUS_SB(sb).inode_cnt);
-	u32 last_cnt = HFSPLUS_SB(sb).last_inode_cnt;
-
-	if (cnt <= (last_cnt / 2) ||
-	    cnt >= (last_cnt * 2)) {
-		HFSPLUS_SB(sb).last_inode_cnt = cnt;
-		printk("inode_check: %u,%u,%u\n", cnt, last_cnt,
-			HFSPLUS_SB(sb).cat_tree ? HFSPLUS_SB(sb).cat_tree->node_hash_cnt : 0);
-	}
-#endif
-}
-
 static void hfsplus_read_inode(struct inode *inode)
 {
 	struct hfs_find_data fd;
 	struct hfsplus_vh *vhdr;
 	int err;
 
-	atomic_inc(&HFSPLUS_SB(inode->i_sb).inode_cnt);
-	hfsplus_inode_check(inode->i_sb);
 	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
 	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
 	HFSPLUS_I(inode).flags = 0;
@@ -155,12 +138,10 @@ static int hfsplus_write_inode(struct inode *inode, int unused)
 static void hfsplus_clear_inode(struct inode *inode)
 {
 	dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino);
-	atomic_dec(&HFSPLUS_SB(inode->i_sb).inode_cnt);
 	if (HFSPLUS_IS_RSRC(inode)) {
 		HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
 		iput(HFSPLUS_I(inode).rsrc_inode);
 	}
-	hfsplus_inode_check(inode->i_sb);
 }
 
 static void hfsplus_write_super(struct super_block *sb)
@@ -320,7 +301,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	/* temporarily use utf8 to correctly find the hidden dir below */
 	nls = sbi->nls;
 	sbi->nls = load_nls("utf8");
-	if (!nls) {
+	if (!sbi->nls) {
 		printk("HFS+: unable to load nls for utf8\n");
 		err = -EINVAL;
 		goto cleanup;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 0217c3a04441..5591f9623aa2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -32,19 +32,19 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 
 	/*printk("dir lseek\n");*/
 	if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
-	down(&i->i_sem);
+	mutex_lock(&i->i_mutex);
 	pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
 	while (pos != new_off) {
 		if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
 		else goto fail;
 		if (pos == 12) goto fail;
 	}
-	up(&i->i_sem);
+	mutex_unlock(&i->i_mutex);
 ok:
 	unlock_kernel();
 	return filp->f_pos = new_off;
 fail:
-	up(&i->i_sem);
+	mutex_unlock(&i->i_mutex);
 	/*printk("illegal lseek: %016llx\n", new_off);*/
 	unlock_kernel();
 	return -ESPIPE;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 52930915bad8..a44dc5897399 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -171,12 +171,12 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
 
 	err = -ENOMEM;
 	parent = HPPFS_I(ino)->proc_dentry;
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	proc_dentry = d_lookup(parent, &dentry->d_name);
 	if(proc_dentry == NULL){
 		proc_dentry = d_alloc(parent, &dentry->d_name);
 		if(proc_dentry == NULL){
-			up(&parent->d_inode->i_sem);
+			mutex_unlock(&parent->d_inode->i_mutex);
 			goto out;
 		}
 		new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
@@ -186,7 +186,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
 			proc_dentry = new;
 		}
 	}
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 
 	if(IS_ERR(proc_dentry))
 		return(proc_dentry);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8c1cef3bb677..ff1b7d108bd0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -100,9 +100,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	loff_t len, vma_len;
 	int ret;
 
-	if ((vma->vm_flags & (VM_MAYSHARE | VM_WRITE)) == VM_WRITE)
-		return -EINVAL;
-
 	if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1))
 		return -EINVAL;
 
@@ -121,7 +118,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	file_accessed(file);
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
@@ -136,7 +133,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (inode->i_size < len)
 		inode->i_size = len;
 out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	return ret;
 }
diff --git a/fs/inode.c b/fs/inode.c
index d8d04bd72b59..108138d4e909 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/mount.h>
 
 /*
  * This is needed for the following functions:
@@ -192,7 +193,7 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
-	sema_init(&inode->i_sem, 1);
+	mutex_init(&inode->i_mutex);
 	init_rwsem(&inode->i_alloc_sem);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	rwlock_init(&inode->i_data.tree_lock);
@@ -770,7 +771,7 @@ EXPORT_SYMBOL(igrab);
  *
  * Note, @test is called with the inode_lock held, so can't sleep.
  */
-static inline struct inode *ifind(struct super_block *sb,
+static struct inode *ifind(struct super_block *sb,
 		struct hlist_head *head, int (*test)(struct inode *, void *),
 		void *data, const int wait)
 {
@@ -804,7 +805,7 @@ static inline struct inode *ifind(struct super_block *sb,
  *
  * Otherwise NULL is returned.
  */
-static inline struct inode *ifind_fast(struct super_block *sb,
+static struct inode *ifind_fast(struct super_block *sb,
 		struct hlist_head *head, unsigned long ino)
 {
 	struct inode *inode;
@@ -1176,22 +1177,33 @@ sector_t bmap(struct inode * inode, sector_t block)
 EXPORT_SYMBOL(bmap);
 
 /**
- *	update_atime	-	update the access time
+ *	touch_atime	-	update the access time
+ *	@mnt: mount the inode is accessed on
  *	@inode: inode accessed
  *
  *	Update the accessed time on an inode and mark it for writeback.
  *	This function automatically handles read only file systems and media,
  *	as well as the "noatime" flag and inode specific "noatime" markers.
  */
-void update_atime(struct inode *inode)
+void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 {
+	struct inode *inode = dentry->d_inode;
 	struct timespec now;
 
-	if (IS_NOATIME(inode))
+	if (IS_RDONLY(inode))
 		return;
-	if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode))
+
+	if ((inode->i_flags & S_NOATIME) ||
+	    (inode->i_sb->s_flags & MS_NOATIME) ||
+	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		return;
-	if (IS_RDONLY(inode))
+
+	/*
+	 * We may have a NULL vfsmount when coming from NFSD
+	 */
+	if (mnt &&
+	    ((mnt->mnt_flags & MNT_NOATIME) ||
+	     ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))))
 		return;
 
 	now = current_fs_time(inode->i_sb);
@@ -1201,19 +1213,23 @@ void update_atime(struct inode *inode)
 	}
 }
 
-EXPORT_SYMBOL(update_atime);
+EXPORT_SYMBOL(touch_atime);
 
 /**
- *	inode_update_time	-	update mtime and ctime time
- *	@inode: inode accessed
- *	@ctime_too: update ctime too
+ *	file_update_time	-	update mtime and ctime time
+ *	@file: file accessed
  *
- *	Update the mtime time on an inode and mark it for writeback.
- *	When ctime_too is specified update the ctime too.
+ *	Update the mtime and ctime members of an inode and mark the inode
+ *	for writeback.  Note that this function is meant exclusively for
+ *	usage in the file write path of filesystems, and filesystems may
+ *	choose to explicitly ignore update via this function with the
+ *	S_NOCTIME inode flag, e.g. for network filesystem where these
+ *	timestamps are handled by the server.
  */
 
-void inode_update_time(struct inode *inode, int ctime_too)
+void file_update_time(struct file *file)
 {
+	struct inode *inode = file->f_dentry->d_inode;
 	struct timespec now;
 	int sync_it = 0;
 
@@ -1227,16 +1243,15 @@ void inode_update_time(struct inode *inode, int ctime_too)
 		sync_it = 1;
 	inode->i_mtime = now;
 
-	if (ctime_too) {
-		if (!timespec_equal(&inode->i_ctime, &now))
-			sync_it = 1;
-		inode->i_ctime = now;
-	}
+	if (!timespec_equal(&inode->i_ctime, &now))
+		sync_it = 1;
+	inode->i_ctime = now;
+
 	if (sync_it)
 		mark_inode_dirty_sync(inode);
 }
 
-EXPORT_SYMBOL(inode_update_time);
+EXPORT_SYMBOL(file_update_time);
 
 int inode_needs_sync(struct inode *inode)
 {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 014a51fd00d7..cb3cef525c3b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,75 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction. 
+ * Unlink a buffer from a transaction checkpoint list.
  *
  * Called with j_list_lock held.
  */
 
-static inline void __buffer_unlink(struct journal_head *jh)
+static void __buffer_unlink_first(struct journal_head *jh)
 {
 	transaction_t *transaction;
 
 	transaction = jh->b_cp_transaction;
-	jh->b_cp_transaction = NULL;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
+	if (transaction->t_checkpoint_list == jh) {
 		transaction->t_checkpoint_list = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
-		transaction->t_checkpoint_list = NULL;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+	__buffer_unlink_first(jh);
+
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
 }
 
 /*
  * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it.
+ * Returns 1 if we released it and 2 if we also released the
+ * whole transaction.
+ *
  * Requires j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
@@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		__journal_remove_checkpoint(jh);
+		ret = __journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
 		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
-		ret = 1;
 	} else {
 		jbd_unlock_bh_state(bh);
 	}
@@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 }
 
 /*
- * Clean up a transaction's checkpoint list.  
- *
- * We wait for any pending IO to complete and make sure any clean
- * buffers are removed from the transaction. 
- *
- * Return 1 if we performed any actions which might have destroyed the
- * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
- * the last checkpoint buffer is cleansed)
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that we take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
  *
  * Called with j_list_lock held.
  */
-static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
+
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
 {
-	struct journal_head *jh, *next_jh, *last_jh;
+	struct journal_head *jh;
 	struct buffer_head *bh;
-	int ret = 0;
-
-	assert_spin_locked(&journal->j_list_lock);
-	jh = transaction->t_checkpoint_list;
-	if (!jh)
-		return 0;
-
-	last_jh = jh->b_cpprev;
-	next_jh = jh;
-	do {
-		jh = next_jh;
+	tid_t this_tid;
+	int released = 0;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Didn't somebody clean up the transaction in the meanwhile */
+	if (journal->j_checkpoint_transactions != transaction ||
+		transaction->t_tid != this_tid)
+		return;
+	while (!released && transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
 		bh = jh2bh(jh);
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		if (buffer_locked(bh)) {
 			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
 			__brelse(bh);
-			goto out_return_1;
-		}
-
-		/*
-		 * This is foul
-		 */
-		if (!jbd_trylock_bh_state(bh)) {
-			jbd_sync_bh(journal, bh);
-			goto out_return_1;
+			spin_lock(&journal->j_list_lock);
+			goto restart;
 		}
-
-		if (jh->b_transaction != NULL) {
-			transaction_t *t = jh->b_transaction;
-			tid_t tid = t->t_tid;
-
-			spin_unlock(&journal->j_list_lock);
-			jbd_unlock_bh_state(bh);
-			log_start_commit(journal, tid);
-			log_wait_commit(journal, tid);
-			goto out_return_1;
-		}
-
 		/*
-		 * AKPM: I think the buffer_jbddirty test is redundant - it
-		 * shouldn't have NULL b_transaction?
+		 * Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list
 		 */
-		next_jh = jh->b_cpnext;
-		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
-			BUFFER_TRACE(bh, "remove from checkpoint");
-			__journal_remove_checkpoint(jh);
-			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);
-			__brelse(bh);
-			ret = 1;
-		} else {
-			jbd_unlock_bh_state(bh);
-		}
-	} while (jh != last_jh);
-
-	return ret;
-out_return_1:
-	spin_lock(&journal->j_list_lock);
-	return 1;
+		released = __journal_remove_checkpoint(jh);
+		jbd_unlock_bh_state(bh);
+	}
 }
 
 #define NR_BATCH	64
@@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
-	spin_unlock(&journal->j_list_lock);
 	ll_rw_block(SWRITE, *batch_count, bhs);
-	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
@@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Return 1 if something happened which requires us to abort the current
  * scan of the checkpoint list.  
  *
- * Called with j_list_lock held.
+ * Called with j_list_lock held and drops it if 1 is returned
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
-static int __flush_buffer(journal_t *journal, struct journal_head *jh,
-			struct buffer_head **bhs, int *batch_count,
-			int *drop_count)
+static int __process_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count)
 {
 	struct buffer_head *bh = jh2bh(jh);
 	int ret = 0;
 
-	if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
-		J_ASSERT_JH(jh, jh->b_transaction == NULL);
+	if (buffer_locked(bh)) {
+		get_bh(bh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		wait_on_buffer(bh);
+		/* the journal_head may have gone by now */
+		BUFFER_TRACE(bh, "brelse");
+		put_bh(bh);
+		ret = 1;
+	}
+	else if (jh->b_transaction != NULL) {
+		transaction_t *t = jh->b_transaction;
+		tid_t tid = t->t_tid;
 
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		log_start_commit(journal, tid);
+		log_wait_commit(journal, tid);
+		ret = 1;
+	}
+	else if (!buffer_dirty(bh)) {
+		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+		BUFFER_TRACE(bh, "remove from checkpoint");
+		__journal_remove_checkpoint(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		put_bh(bh);
+		ret = 1;
+	}
+	else {
 		/*
 		 * Important: we are about to write the buffer, and
 		 * possibly block, while still holding the journal lock.
@@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
+			spin_unlock(&journal->j_list_lock);
 			__flush_batch(journal, bhs, batch_count);
 			ret = 1;
 		}
-	} else {
-		int last_buffer = 0;
-		if (jh->b_cpnext == jh) {
-			/* We may be about to drop the transaction.  Tell the
-			 * caller that the lists have changed.
-			 */
-			last_buffer = 1;
-		}
-		if (__try_to_free_cp_buf(jh)) {
-			(*drop_count)++;
-			ret = last_buffer;
-		}
 	}
 	return ret;
 }
 
 /*
- * Perform an actual checkpoint.  We don't write out only enough to
- * satisfy the current blocked requests: rather we submit a reasonably
- * sized chunk of the outstanding data to disk at once for
- * efficiency.  __log_wait_for_space() will retry if we didn't free enough.
+ * Perform an actual checkpoint. We take the first transaction on the
+ * list of transactions to be checkpointed and send all its buffers
+ * to disk. We submit larger chunks of data at once.
  * 
- * However, we _do_ take into account the amount requested so that once
- * the IO has been queued, we can return as soon as enough of it has
- * completed to disk.  
- *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
 {
+	transaction_t *transaction;
+	tid_t this_tid;
 	int result;
-	int batch_count = 0;
-	struct buffer_head *bhs[NR_BATCH];
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal)
 		return result;
 
 	/*
-	 * OK, we need to start writing disk blocks.  Try to free up a
-	 * quarter of the log in a single checkpoint if we can.
+	 * OK, we need to start writing disk blocks.  Take one transaction
+	 * and write it.
 	 */
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions)
+		goto out;
+	transaction = journal->j_checkpoint_transactions;
+	this_tid = transaction->t_tid;
+restart:
 	/*
-	 * AKPM: check this code.  I had a feeling a while back that it
-	 * degenerates into a busy loop at unmount time.
+	 * If someone cleaned up this transaction while we slept, we're
+	 * done (maybe it's a new transaction, but it fell at the same
+	 * address).
 	 */
-	spin_lock(&journal->j_list_lock);
-	while (journal->j_checkpoint_transactions) {
-		transaction_t *transaction;
-		struct journal_head *jh, *last_jh, *next_jh;
-		int drop_count = 0;
-		int cleanup_ret, retry = 0;
-		tid_t this_tid;
-
-		transaction = journal->j_checkpoint_transactions;
-		this_tid = transaction->t_tid;
-		jh = transaction->t_checkpoint_list;
-		last_jh = jh->b_cpprev;
-		next_jh = jh;
-		do {
+ 	if (journal->j_checkpoint_transactions == transaction ||
+			transaction->t_tid == this_tid) {
+		int batch_count = 0;
+		struct buffer_head *bhs[NR_BATCH];
+		struct journal_head *jh;
+		int retry = 0;
+
+		while (!retry && transaction->t_checkpoint_list) {
 			struct buffer_head *bh;
 
-			jh = next_jh;
-			next_jh = jh->b_cpnext;
+			jh = transaction->t_checkpoint_list;
 			bh = jh2bh(jh);
 			if (!jbd_trylock_bh_state(bh)) {
 				jbd_sync_bh(journal, bh);
-				spin_lock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
-			if (cond_resched_lock(&journal->j_list_lock)) {
+			retry = __process_buffer(journal, jh, bhs,
+						&batch_count);
+			if (!retry &&
+			    lock_need_resched(&journal->j_list_lock)) {
+				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-		} while (jh != last_jh && !retry);
+		}
 
 		if (batch_count) {
+			if (!retry) {
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+			}
 			__flush_batch(journal, bhs, &batch_count);
-			retry = 1;
 		}
 
+		if (retry) {
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		/*
-		 * If someone cleaned up this transaction while we slept, we're
-		 * done
-		 */
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
-		if (retry)
-			continue;
-		/*
-		 * Maybe it's a new transaction, but it fell at the same
-		 * address
-		 */
-		if (transaction->t_tid != this_tid)
-			continue;
-		/*
-		 * We have walked the whole transaction list without
-		 * finding anything to write to disk.  We had better be
-		 * able to make some progress or we are in trouble. 
+		 * Now we have cleaned up the first transaction's checkpoint
+		 * list.  Let's clean up the second one.
 		 */
-		cleanup_ret = __cleanup_transaction(journal, transaction);
-		J_ASSERT(drop_count != 0 || cleanup_ret != 0);
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
+		__wait_cp_io(journal, transaction);
 	}
+out:
 	spin_unlock(&journal->j_list_lock);
 	result = cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
-
 	return 0;
 }
 
@@ -456,52 +472,91 @@ int cleanup_journal_tail(journal_t *journal)
 /* Checkpoint list management */
 
 /*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret, freed = 0;
+
+	*released = 0;
+	if (!jh)
+		return 0;
+
+ 	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh))) {
+			ret = __try_to_free_cp_buf(jh);
+			if (ret) {
+				freed++;
+				if (ret == 2) {
+					*released = 1;
+					return freed;
+				}
+			}
+		}
+		/*
+		 * This function only frees up some memory if possible so we
+		 * dont have an obligation to finish processing. Bail out if
+		 * preemption requested:
+		 */
+		if (need_resched())
+			return freed;
+	} while (jh != last_jh);
+
+	return freed;
+}
+
+/*
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
+ * Returns number of buffers reaped (for debug)
  */
 
 int __journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
-	int ret = 0;
+	int ret = 0, released;
 
 	transaction = journal->j_checkpoint_transactions;
-	if (transaction == 0)
+	if (!transaction)
 		goto out;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
-		struct journal_head *jh;
-
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		jh = transaction->t_checkpoint_list;
-		if (jh) {
-			struct journal_head *last_jh = jh->b_cpprev;
-			struct journal_head *next_jh = jh;
-
-			do {
-				jh = next_jh;
-				next_jh = jh->b_cpnext;
-				/* Use trylock because of the ranknig */
-				if (jbd_trylock_bh_state(jh2bh(jh)))
-					ret += __try_to_free_cp_buf(jh);
-				/*
-				 * This function only frees up some memory
-				 * if possible so we dont have an obligation
-				 * to finish processing. Bail out if preemption
-				 * requested:
-				 */
-				if (need_resched())
-					goto out;
-			} while (jh != last_jh);
-		}
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list, &released);
+		if (need_resched())
+			goto out;
+		if (released)
+			continue;
+		/*
+		 * It is essential that we are as careful as in the case of
+		 * t_checkpoint_list with removing the buffer from the list as
+		 * we can possibly see not yet submitted buffers on io_list
+		 */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list, &released);
+		if (need_resched())
+			goto out;
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -516,18 +571,22 @@ out:
  * buffer updates committed in that transaction have safely been stored
  * elsewhere on disk.  To achieve this, all of the buffers in a
  * transaction need to be maintained on the transaction's checkpoint
- * list until they have been rewritten, at which point this function is
+ * lists until they have been rewritten, at which point this function is
  * called to remove the buffer from the existing transaction's
- * checkpoint list.  
+ * checkpoint lists.
+ *
+ * The function returns 1 if it frees the transaction, 0 otherwise.
  *
  * This function is called with the journal locked.
  * This function is called with j_list_lock held.
+ * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-void __journal_remove_checkpoint(struct journal_head *jh)
+int __journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
+	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL)
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
 	wake_up(&journal->j_wait_logspace);
+	ret = 1;
 out:
 	JBUFFER_TRACE(jh, "exit");
+	return ret;
 }
 
 /*
@@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3dcc6d2162cb..fc3855a1aef3 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 
 	read_len = 0;
 	result = 0;
-	offset = page->index << PAGE_CACHE_SHIFT;
+	offset = page_offset(page);
 
 	kmap(page);
 	buf = page_address(page);
@@ -1415,7 +1415,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	 * This will never trigger with sane page sizes.  leave it in
 	 * anyway, since I'm thinking about how to merge larger writes
 	 * (the current idea is to poke a thread that does the actual
-	 * I/O and starts by doing a down(&inode->i_sem).  then we
+	 * I/O and starts by doing a mutex_lock(&inode->i_mutex).  then we
 	 * would need to get the page cache pages and have a list of
 	 * I/O requests and do write-merging here.
 	 * -- prumpf
@@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page,
 {
        void *addr = page_address(page) + from;
        /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
-       loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from;
+       loff_t pos = page_offset(page) + from;
 
        return jffs_file_write(filp, addr, to-from, &pos);
 } /* jffs_commit_write() */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb6..2967b7393415 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap)
 	/*
 	 * write out dirty pages of bmap
 	 */
-	filemap_fdatawrite(ipbmap->i_mapping);
-	filemap_fdatawait(ipbmap->i_mapping);
+	filemap_write_and_wait(ipbmap->i_mapping);
 
 	diWriteSpecial(ipbmap, 0);
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 28201b194f53..31b4aa13dd4b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -265,8 +265,7 @@ int diSync(struct inode *ipimap)
 	/*
 	 * write out dirty pages of imap
 	 */
-	filemap_fdatawrite(ipimap->i_mapping);
-	filemap_fdatawait(ipimap->i_mapping);
+	filemap_write_and_wait(ipimap->i_mapping);
 
 	diWriteSpecial(ipimap, 0);
 
@@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip)
 		jfs_err("diFreeSpecial called with NULL ip!");
 		return;
 	}
-	filemap_fdatawrite(ip->i_mapping);
-	filemap_fdatawait(ip->i_mapping);
+	filemap_write_and_wait(ip->i_mapping);
 	truncate_inode_pages(ip->i_mapping, 0);
 	iput(ip);
 }
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index c0fd7b3eadc6..dc21a5bd54d4 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -58,7 +58,7 @@ struct jfs_inode_info {
 	/*
 	 * rdwrlock serializes xtree between reads & writes and synchronizes
 	 * changes to special inodes.  It's use would be redundant on
-	 * directories since the i_sem taken in the VFS is sufficient.
+	 * directories since the i_mutex taken in the VFS is sufficient.
 	 */
 	struct rw_semaphore rdwrlock;
 	/*
@@ -68,7 +68,7 @@ struct jfs_inode_info {
 	 * inode is blocked in txBegin or TxBeginAnon
 	 */
 	struct semaphore commit_sem;
-	/* xattr_sem allows us to access the xattrs without taking i_sem */
+	/* xattr_sem allows us to access the xattrs without taking i_mutex */
 	struct rw_semaphore xattr_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
 #ifdef CONFIG_JFS_POSIX_ACL
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index b660c93c92de..2ddb6b892bcf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1231,10 +1231,8 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		 * when we don't need to worry about it at all.
 		 *
 		 * if ((!S_ISDIR(ip->i_mode))
-		 *    && (tblk->flag & COMMIT_DELETE) == 0) {
-		 *	filemap_fdatawrite(ip->i_mapping);
-		 *	filemap_fdatawait(ip->i_mapping);
-		 * }
+		 *    && (tblk->flag & COMMIT_DELETE) == 0)
+		 *	filemap_write_and_wait(ip->i_mapping);
 		 */
 
 		/*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index 5cf91785b541..21eaf7ac0fcb 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb)
 	 * Make sure all metadata makes it to disk before we mark
 	 * the superblock as clean
 	 */
-	filemap_fdatawrite(sbi->direct_inode->i_mapping);
-	filemap_fdatawait(sbi->direct_inode->i_mapping);
+	filemap_write_and_wait(sbi->direct_inode->i_mapping);
 
 	/*
 	 * ensure all file system file pages are propagated to their
@@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb)
 	 * mark the superblock clean before everything is flushed to
 	 * disk.
 	 */
-	filemap_fdatawrite(sbi->direct_inode->i_mapping);
-	filemap_fdatawait(sbi->direct_inode->i_mapping);
+	filemap_write_and_wait(sbi->direct_inode->i_mapping);
 
 	updateSuper(sb, FM_CLEAN);
 
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index c6dc254d3253..45180361871c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 * by txCommit();
 	 */
 	filemap_fdatawait(ipbmap->i_mapping);
-	filemap_fdatawrite(ipbmap->i_mapping);
-	filemap_fdatawait(ipbmap->i_mapping);
+	filemap_write_and_wait(ipbmap->i_mapping);
 	diWriteSpecial(ipbmap, 0);
 
 	newPage = nPages;	/* first new page number */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4226af3ea91b..8d31f1336431 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -502,8 +502,7 @@ out_no_rw:
 		jfs_err("jfs_umount failed with return code %d", rc);
 	}
 out_mount_failed:
-	filemap_fdatawrite(sbi->direct_inode->i_mapping);
-	filemap_fdatawait(sbi->direct_inode->i_mapping);
+	filemap_write_and_wait(sbi->direct_inode->i_mapping);
 	truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
 	make_bad_inode(sbi->direct_inode);
 	iput(sbi->direct_inode);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 23aa5066b5a4..952da5f917cd 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -83,21 +83,6 @@ struct ea_buffer {
 #define EA_NEW		0x0004
 #define EA_MALLOC	0x0008
 
-/* Namespaces */
-#define XATTR_SYSTEM_PREFIX "system."
-#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)
-
-#define XATTR_USER_PREFIX "user."
-#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
-
-#define XATTR_OS2_PREFIX "os2."
-#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
-
-/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */
-#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1)
-
-#define XATTR_TRUSTED_PREFIX "trusted."
-#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1)
 
 /*
  * These three routines are used to recognize on-disk extended attributes
@@ -773,36 +758,23 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 static int can_set_xattr(struct inode *inode, const char *name,
 			 const void *value, size_t value_len)
 {
-	if (IS_RDONLY(inode))
-		return -EROFS;
-
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-
-	if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
-		/*
-		 * "system.*"
-		 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return can_set_system_xattr(inode, name, value, value_len);
 
-	if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
-		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
-
-#ifdef CONFIG_JFS_SECURITY
-	if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)
-	    == 0)
-		return 0;	/* Leave it to the security module */
-#endif
-		
-	if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) &&
-	   (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0))
+	/*
+	 * Don't allow setting an attribute in an unknown namespace.
+	 */
+	if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
 	if (!S_ISREG(inode->i_mode) &&
 	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
 		return -EPERM;
 
-	return permission(inode, MAY_WRITE, NULL);
+	return 0;
 }
 
 int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
@@ -972,22 +944,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	return rc;
 }
 
-static int can_get_xattr(struct inode *inode, const char *name)
-{
-#ifdef CONFIG_JFS_SECURITY
-	if(strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0)
-		return 0;
-#endif
-
-	if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
-		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
-
-	if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
-		return 0;
-
-	return permission(inode, MAY_READ, NULL);
-}
-
 ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
 		       size_t buf_size)
 {
@@ -998,12 +954,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
 	ssize_t size;
 	int namelen = strlen(name);
 	char *os2name = NULL;
-	int rc;
 	char *value;
 
-	if ((rc = can_get_xattr(inode, name)))
-		return rc;
-
 	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
 		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
 				  GFP_KERNEL);
diff --git a/fs/libfs.c b/fs/libfs.c
index 58101dff2c66..63c020e6589e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -74,7 +74,7 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
-	down(&file->f_dentry->d_inode->i_sem);
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -82,7 +82,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			up(&file->f_dentry->d_inode->i_sem);
+			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -93,20 +93,20 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			loff_t n = file->f_pos - 2;
 
 			spin_lock(&dcache_lock);
-			list_del(&cursor->d_child);
+			list_del(&cursor->d_u.d_child);
 			p = file->f_dentry->d_subdirs.next;
 			while (n && p != &file->f_dentry->d_subdirs) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
+				next = list_entry(p, struct dentry, d_u.d_child);
 				if (!d_unhashed(next) && next->d_inode)
 					n--;
 				p = p->next;
 			}
-			list_add_tail(&cursor->d_child, p);
+			list_add_tail(&cursor->d_u.d_child, p);
 			spin_unlock(&dcache_lock);
 		}
 	}
-	up(&file->f_dentry->d_inode->i_sem);
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	struct dentry *dentry = filp->f_dentry;
 	struct dentry *cursor = filp->private_data;
-	struct list_head *p, *q = &cursor->d_child;
+	struct list_head *p, *q = &cursor->d_u.d_child;
 	ino_t ino;
 	int i = filp->f_pos;
 
@@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			}
 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
+				next = list_entry(p, struct dentry, d_u.d_child);
 				if (d_unhashed(next) || !next->d_inode)
 					continue;
 
@@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry)
 	int ret = 0;
 
 	spin_lock(&dcache_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_child)
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
 		if (simple_positive(child))
 			goto out;
 	ret = 1;
@@ -356,7 +356,7 @@ int simple_commit_write(struct file *file, struct page *page,
 
 	/*
 	 * No need to use i_size_read() here, the i_size
-	 * cannot change under us because we hold the i_sem.
+	 * cannot change under us because we hold the i_mutex.
 	 */
 	if (pos > inode->i_size)
 		i_size_write(inode, pos);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index c5a33648e9fd..145524039577 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -26,11 +26,12 @@
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_unlock(struct nlm_rqst *, struct file_lock *);
-static void	nlmclnt_unlock_callback(struct rpc_task *);
-static void	nlmclnt_cancel_callback(struct rpc_task *);
 static int	nlm_stat_to_errno(u32 stat);
 static void	nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host);
 
+static const struct rpc_call_ops nlmclnt_unlock_ops;
+static const struct rpc_call_ops nlmclnt_cancel_ops;
+
 /*
  * Cookie counter for NLM requests
  */
@@ -221,8 +222,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 			goto done;
 		}
 		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr     = nfssrv->client->cl_intr;
-		clnt->cl_chatty   = nfssrv->client->cl_chatty;
+		clnt->cl_intr = nfssrv->client->cl_intr;
 	}
 
 	/* Keep the old signal mask */
@@ -399,8 +399,7 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int
-nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -419,13 +418,12 @@ nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
 
 	return status;
 }
 
-static int
-nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -448,7 +446,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 	/* Increment host refcount */
 	nlm_get_host(host);
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
 	if (status < 0)
 		nlm_release_host(host);
 	return status;
@@ -664,7 +662,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (req->a_flags & RPC_TASK_ASYNC) {
 		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					nlmclnt_unlock_callback);
+					&nlmclnt_unlock_ops);
 		/* Hrmf... Do the unlock early since locks_remove_posix()
 		 * really expects us to free the lock synchronously */
 		do_vfs_lock(fl);
@@ -692,10 +690,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	return -ENOLCK;
 }
 
-static void
-nlmclnt_unlock_callback(struct rpc_task *task)
+static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*req = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*req = data;
 	int		status = req->a_res.status;
 
 	if (RPC_ASSASSINATED(task))
@@ -722,6 +719,10 @@ die:
 	rpc_restart_call(task);
 }
 
+static const struct rpc_call_ops nlmclnt_unlock_ops = {
+	.rpc_call_done = nlmclnt_unlock_callback,
+};
+
 /*
  * Cancel a blocked lock request.
  * We always use an async RPC call for this in order not to hang a
@@ -750,8 +751,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 
 	nlmclnt_setlockargs(req, fl);
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL,
-					nlmclnt_cancel_callback);
+	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status < 0) {
 		nlmclnt_release_lockargs(req);
 		kfree(req);
@@ -765,10 +765,9 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	return status;
 }
 
-static void
-nlmclnt_cancel_callback(struct rpc_task *task)
+static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*req = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*req = data;
 
 	if (RPC_ASSASSINATED(task))
 		goto die;
@@ -807,6 +806,10 @@ retry_cancel:
 	rpc_delay(task, 30 * HZ);
 }
 
+static const struct rpc_call_ops nlmclnt_cancel_ops = {
+	.rpc_call_done = nlmclnt_cancel_callback,
+};
+
 /*
  * Convert an NLM status code to a generic kernel errno
  */
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c4c8601096e0..82f7a0b1d8ae 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -177,7 +177,7 @@ nlm_bind_host(struct nlm_host *host)
 	if ((clnt = host->h_rpcclnt) != NULL) {
 		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
-			clnt->cl_port = 0;
+			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 			dprintk("lockd: next rebind in %ld jiffies\n",
 					host->h_nextrebind - jiffies);
@@ -217,7 +217,7 @@ nlm_rebind_host(struct nlm_host *host)
 {
 	dprintk("lockd: rebind host %s\n", host->h_name);
 	if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
-		host->h_rpcclnt->cl_port = 0;
+		rpc_force_rebind(host->h_rpcclnt);
 		host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 	}
 }
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2d144abe84ad..0edc03e67966 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -123,7 +123,6 @@ nsm_create(void)
 	if (IS_ERR(clnt))
 		goto out_err;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
 	return clnt;
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 12a857c29e25..71a30b416d1a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -178,6 +178,8 @@ lockd(struct svc_rqst *rqstp)
 
 	}
 
+	flush_signals(current);
+
 	/*
 	 * Check whether there's a new lockd process before
 	 * shutting down the hosts and clearing the slot.
@@ -192,8 +194,6 @@ lockd(struct svc_rqst *rqstp)
 			"lockd: new process, skipping host shutdown\n");
 	wake_up(&lockd_exit);
 
-	flush_signals(current);
-
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 489670e21769..4063095d849e 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -22,7 +22,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-static void	nlm4svc_callback_exit(struct rpc_task *);
+
+static const struct rpc_call_ops nlm4svc_callback_ops;
 
 /*
  * Obtain client and file from arguments
@@ -470,7 +471,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 
-
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -494,7 +494,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, nlm4svc_callback_exit) < 0)
+	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
 		goto error;
 
 	return rpc_success;
@@ -504,10 +504,9 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	return rpc_system_err;
 }
 
-static void
-nlm4svc_callback_exit(struct rpc_task *task)
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*call = data;
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: %4d callback failed (errno = %d)\n",
@@ -517,6 +516,10 @@ nlm4svc_callback_exit(struct rpc_task *task)
 	kfree(call);
 }
 
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+};
+
 /*
  * NLM Server procedures.
  */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 49f959796b66..9cfced65d4a2 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -41,7 +41,8 @@
 
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
-static void	nlmsvc_grant_callback(struct rpc_task *task);
+
+static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
  * The list of blocked locks to retry
@@ -226,31 +227,27 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static void
+static int
 nlmsvc_delete_block(struct nlm_block *block, int unlock)
 {
 	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
 	struct nlm_file		*file = block->b_file;
 	struct nlm_block	**bp;
+	int status = 0;
 
 	dprintk("lockd: deleting block %p...\n", block);
 
 	/* Remove block from list */
 	nlmsvc_remove_block(block);
-	if (fl->fl_next)
-		posix_unblock_lock(file->f_file, fl);
-	if (unlock) {
-		fl->fl_type = F_UNLCK;
-		posix_lock_file(file->f_file, fl);
-		block->b_granted = 0;
-	}
+	if (unlock)
+		status = posix_unblock_lock(file->f_file, fl);
 
 	/* If the block is in the middle of a GRANT callback,
 	 * don't kill it yet. */
 	if (block->b_incall) {
 		nlmsvc_insert_block(block, NLM_NEVER);
 		block->b_done = 1;
-		return;
+		return status;
 	}
 
 	/* Remove block from file's list of blocks */
@@ -265,6 +262,7 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
 		nlm_release_host(block->b_host);
 	nlmclnt_freegrantargs(&block->b_call);
 	kfree(block);
+	return status;
 }
 
 /*
@@ -275,6 +273,7 @@ int
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	struct nlm_block	*block, *next;
+	/* XXX: Will everything get cleaned up if we don't unlock here? */
 
 	down(&file->f_sema);
 	for (block = file->f_blocks; block; block = next) {
@@ -444,6 +443,7 @@ u32
 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	*block;
+	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -454,9 +454,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 
 	down(&file->f_sema);
 	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		nlmsvc_delete_block(block, 1);
+		status = nlmsvc_delete_block(block, 1);
 	up(&file->f_sema);
-	return nlm_granted;
+	return status ? nlm_lck_denied : nlm_granted;
 }
 
 /*
@@ -562,7 +562,7 @@ callback:
 	/* Call the client */
 	nlm_get_host(block->b_call.a_host);
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
-						nlmsvc_grant_callback) < 0)
+						&nlmsvc_grant_ops) < 0)
 		nlm_release_host(block->b_call.a_host);
 	up(&file->f_sema);
 }
@@ -575,10 +575,9 @@ callback:
  * chain once more in order to have it removed by lockd itself (which can
  * then sleep on the file semaphore without disrupting e.g. the nfs client).
  */
-static void
-nlmsvc_grant_callback(struct rpc_task *task)
+static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst		*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst		*call = data;
 	struct nlm_block	*block;
 	unsigned long		timeout;
 	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
@@ -614,6 +613,10 @@ nlmsvc_grant_callback(struct rpc_task *task)
 	nlm_release_host(call->a_host);
 }
 
+static const struct rpc_call_ops nlmsvc_grant_ops = {
+	.rpc_call_done = nlmsvc_grant_callback,
+};
+
 /*
  * We received a GRANT_RES callback. Try to find the corresponding
  * block.
@@ -633,11 +636,12 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 
 	file->f_count++;
 	down(&file->f_sema);
-	if ((block = nlmsvc_find_block(cookie,&rqstp->rq_addr)) != NULL) {
+	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
+	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
-			block = NULL;
+			up(&file->f_sema);
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
@@ -648,8 +652,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 				nlmsvc_delete_block(block, 1);
 		}
 	}
-	if (!block)
-		up(&file->f_sema);
 	nlm_release_file(file);
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 757e344cf200..3bc437e0cf5b 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -23,7 +23,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-static void	nlmsvc_callback_exit(struct rpc_task *);
+
+static const struct rpc_call_ops nlmsvc_callback_ops;
 
 #ifdef CONFIG_LOCKD_V4
 static u32
@@ -518,7 +519,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0)
+	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
 		goto error;
 
 	return rpc_success;
@@ -528,10 +529,9 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	return rpc_system_err;
 }
 
-static void
-nlmsvc_callback_exit(struct rpc_task *task)
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*call = data;
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: %4d callback failed (errno = %d)\n",
@@ -541,6 +541,10 @@ nlmsvc_callback_exit(struct rpc_task *task)
 	kfree(call);
 }
 
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+};
+
 /*
  * NLM Server procedures.
  */
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ae4d6b426c62..fdcf105a5303 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -354,7 +354,9 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
 		return 0;
 	argp->state = ntohl(*p++);
 	/* Preserve the address in network byte order */
-	argp->addr = *p++;
+	argp->addr  = *p++;
+	argp->vers  = *p++;
+	argp->proto = *p++;
 	return xdr_argsize_check(rqstp, p);
 }
 
diff --git a/fs/locks.c b/fs/locks.c
index 250ef53d25ef..909eab8fb1d0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void)
 }
 
 /* Free a lock which is not in use. */
-static inline void locks_free_lock(struct file_lock *fl)
+static void locks_free_lock(struct file_lock *fl)
 {
 	if (fl == NULL) {
 		BUG();
@@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 /*
  * Check whether two locks have the same owner.
  */
-static inline int
-posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
+static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 {
 	if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
 		return fl2->fl_lmops == fl1->fl_lmops &&
@@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 /* Remove waiter from blocker's block list.
  * When blocker ends up pointing to itself then the list is empty.
  */
-static inline void __locks_delete_block(struct file_lock *waiter)
+static void __locks_delete_block(struct file_lock *waiter)
 {
 	list_del_init(&waiter->fl_block);
 	list_del_init(&waiter->fl_link);
@@ -1958,22 +1957,18 @@ EXPORT_SYMBOL(posix_block_lock);
  *
  *	lockd needs to block waiting for locks.
  */
-void
+int
 posix_unblock_lock(struct file *filp, struct file_lock *waiter)
 {
-	/* 
-	 * A remote machine may cancel the lock request after it's been
-	 * granted locally.  If that happens, we need to delete the lock.
-	 */
+	int status = 0;
+
 	lock_kernel();
-	if (waiter->fl_next) {
+	if (waiter->fl_next)
 		__locks_delete_block(waiter);
-		unlock_kernel();
-	} else {
-		unlock_kernel();
-		waiter->fl_type = F_UNLCK;
-		posix_lock_file(filp, waiter);
-	}
+	else
+		status = -ENOENT;
+	unlock_kernel();
+	return status;
 }
 
 EXPORT_SYMBOL(posix_unblock_lock);
diff --git a/fs/mpage.c b/fs/mpage.c
index f1d2d02bd4c8..e431cb3878d6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	if (page_has_buffers(page))
 		goto confused;
 
-	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
 
 	bh.b_page = page;
@@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	 * The page has no buffers: map it to disk
 	 */
 	BUG_ON(!PageUptodate(page));
-	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (i_size - 1) >> blkbits;
 	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
diff --git a/fs/namei.c b/fs/namei.c
index 6dbbd42d8b95..0a8f073435af 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -438,7 +438,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 	struct dentry * result;
 	struct inode *dir = parent->d_inode;
 
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 	/*
 	 * First re-do the cached lookup just in case it was created
 	 * while we waited for the directory semaphore..
@@ -464,7 +464,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 			else
 				result = dentry;
 		}
-		up(&dir->i_sem);
+		mutex_unlock(&dir->i_mutex);
 		return result;
 	}
 
@@ -472,7 +472,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 	 * Uhhuh! Nasty case: the cache was re-populated while
 	 * we waited on the semaphore. Need to revalidate.
 	 */
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	if (result->d_op && result->d_op->d_revalidate) {
 		if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
 			dput(result);
@@ -1366,7 +1366,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	struct dentry *p;
 
 	if (p1 == p2) {
-		down(&p1->d_inode->i_sem);
+		mutex_lock(&p1->d_inode->i_mutex);
 		return NULL;
 	}
 
@@ -1374,30 +1374,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
 	for (p = p1; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p2) {
-			down(&p2->d_inode->i_sem);
-			down(&p1->d_inode->i_sem);
+			mutex_lock(&p2->d_inode->i_mutex);
+			mutex_lock(&p1->d_inode->i_mutex);
 			return p;
 		}
 	}
 
 	for (p = p2; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p1) {
-			down(&p1->d_inode->i_sem);
-			down(&p2->d_inode->i_sem);
+			mutex_lock(&p1->d_inode->i_mutex);
+			mutex_lock(&p2->d_inode->i_mutex);
 			return p;
 		}
 	}
 
-	down(&p1->d_inode->i_sem);
-	down(&p2->d_inode->i_sem);
+	mutex_lock(&p1->d_inode->i_mutex);
+	mutex_lock(&p2->d_inode->i_mutex);
 	return NULL;
 }
 
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
-	up(&p1->d_inode->i_sem);
+	mutex_unlock(&p1->d_inode->i_mutex);
 	if (p1 != p2) {
-		up(&p2->d_inode->i_sem);
+		mutex_unlock(&p2->d_inode->i_mutex);
 		up(&p1->d_inode->i_sb->s_vfs_rename_sem);
 	}
 }
@@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		if (!error) {
 			DQUOT_INIT(inode);
 			
-			error = do_truncate(dentry, 0, NULL);
+			error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
 		}
 		put_write_access(inode);
 		if (error)
@@ -1563,14 +1563,14 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 
 	dir = nd->dentry;
 	nd->flags &= ~LOOKUP_PARENT;
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	path.dentry = lookup_hash(nd);
 	path.mnt = nd->mnt;
 
 do_last:
 	error = PTR_ERR(path.dentry);
 	if (IS_ERR(path.dentry)) {
-		up(&dir->d_inode->i_sem);
+		mutex_unlock(&dir->d_inode->i_mutex);
 		goto exit;
 	}
 
@@ -1579,7 +1579,7 @@ do_last:
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current->fs->umask;
 		error = vfs_create(dir->d_inode, path.dentry, mode, nd);
-		up(&dir->d_inode->i_sem);
+		mutex_unlock(&dir->d_inode->i_mutex);
 		dput(nd->dentry);
 		nd->dentry = path.dentry;
 		if (error)
@@ -1593,7 +1593,7 @@ do_last:
 	/*
 	 * It already exists.
 	 */
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	error = -EEXIST;
 	if (flag & O_EXCL)
@@ -1665,7 +1665,7 @@ do_link:
 		goto exit;
 	}
 	dir = nd->dentry;
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	path.dentry = lookup_hash(nd);
 	path.mnt = nd->mnt;
 	__putname(nd->last.name);
@@ -1680,13 +1680,13 @@ do_link:
  * Simple function to lookup and return a dentry and create it
  * if it doesn't exist.  Is SMP-safe.
  *
- * Returns with nd->dentry->d_inode->i_sem locked.
+ * Returns with nd->dentry->d_inode->i_mutex locked.
  */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 
-	down(&nd->dentry->d_inode->i_sem);
+	mutex_lock(&nd->dentry->d_inode->i_mutex);
 	/*
 	 * Yucky last component or no last component at all?
 	 * (foo/., foo/.., /////)
@@ -1784,7 +1784,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
 		}
 		dput(dentry);
 	}
-	up(&nd.dentry->d_inode->i_sem);
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out:
 	putname(tmp);
@@ -1836,7 +1836,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode)
 			error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
 			dput(dentry);
 		}
-		up(&nd.dentry->d_inode->i_sem);
+		mutex_unlock(&nd.dentry->d_inode->i_mutex);
 		path_release(&nd);
 out:
 		putname(tmp);
@@ -1885,7 +1885,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	DQUOT_INIT(dir);
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	dentry_unhash(dentry);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
@@ -1897,7 +1897,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 				dentry->d_inode->i_flags |= S_DEAD;
 		}
 	}
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	if (!error) {
 		d_delete(dentry);
 	}
@@ -1932,14 +1932,14 @@ asmlinkage long sys_rmdir(const char __user * pathname)
 			error = -EBUSY;
 			goto exit1;
 	}
-	down(&nd.dentry->d_inode->i_sem);
+	mutex_lock(&nd.dentry->d_inode->i_mutex);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
 		dput(dentry);
 	}
-	up(&nd.dentry->d_inode->i_sem);
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
 exit1:
 	path_release(&nd);
 exit:
@@ -1959,7 +1959,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 
 	DQUOT_INIT(dir);
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
 	else {
@@ -1967,7 +1967,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 		if (!error)
 			error = dir->i_op->unlink(dir, dentry);
 	}
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
 	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -1979,7 +1979,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 
 /*
  * Make sure that the actual truncation of the file will occur outside its
- * directory's i_sem.  Truncate can take a long time if there is a lot of
+ * directory's i_mutex.  Truncate can take a long time if there is a lot of
  * writeout happening, and we don't want to prevent access to the directory
  * while waiting on the I/O.
  */
@@ -2001,7 +2001,7 @@ asmlinkage long sys_unlink(const char __user * pathname)
 	error = -EISDIR;
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
-	down(&nd.dentry->d_inode->i_sem);
+	mutex_lock(&nd.dentry->d_inode->i_mutex);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
@@ -2015,7 +2015,7 @@ asmlinkage long sys_unlink(const char __user * pathname)
 	exit2:
 		dput(dentry);
 	}
-	up(&nd.dentry->d_inode->i_sem);
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
 	if (inode)
 		iput(inode);	/* truncate the inode here */
 exit1:
@@ -2075,7 +2075,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
 			error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
 			dput(dentry);
 		}
-		up(&nd.dentry->d_inode->i_sem);
+		mutex_unlock(&nd.dentry->d_inode->i_mutex);
 		path_release(&nd);
 out:
 		putname(to);
@@ -2113,10 +2113,10 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	if (error)
 		return error;
 
-	down(&old_dentry->d_inode->i_sem);
+	mutex_lock(&old_dentry->d_inode->i_mutex);
 	DQUOT_INIT(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
-	up(&old_dentry->d_inode->i_sem);
+	mutex_unlock(&old_dentry->d_inode->i_mutex);
 	if (!error)
 		fsnotify_create(dir, new_dentry->d_name.name);
 	return error;
@@ -2157,7 +2157,7 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
 		error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
 		dput(new_dentry);
 	}
-	up(&nd.dentry->d_inode->i_sem);
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
 out_release:
 	path_release(&nd);
 out:
@@ -2178,7 +2178,7 @@ exit:
  *	   sb->s_vfs_rename_sem. We might be more accurate, but that's another
  *	   story.
  *	c) we have to lock _three_ objects - parents and victim (if it exists).
- *	   And that - after we got ->i_sem on parents (until then we don't know
+ *	   And that - after we got ->i_mutex on parents (until then we don't know
  *	   whether the target exists).  Solution: try to be smart with locking
  *	   order for inodes.  We rely on the fact that tree topology may change
  *	   only under ->s_vfs_rename_sem _and_ that parent of the object we
@@ -2195,9 +2195,9 @@ exit:
  *	   stuff into VFS), but the former is not going away. Solution: the same
  *	   trick as in rmdir().
  *	e) conversion from fhandle to dentry may come in the wrong moment - when
- *	   we are removing the target. Solution: we will have to grab ->i_sem
+ *	   we are removing the target. Solution: we will have to grab ->i_mutex
  *	   in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
- *	   ->i_sem on parents, which works but leads to some truely excessive
+ *	   ->i_mutex on parents, which works but leads to some truely excessive
  *	   locking].
  */
 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -2222,7 +2222,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 
 	target = new_dentry->d_inode;
 	if (target) {
-		down(&target->i_sem);
+		mutex_lock(&target->i_mutex);
 		dentry_unhash(new_dentry);
 	}
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
@@ -2232,7 +2232,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	if (target) {
 		if (!error)
 			target->i_flags |= S_DEAD;
-		up(&target->i_sem);
+		mutex_unlock(&target->i_mutex);
 		if (d_unhashed(new_dentry))
 			d_rehash(new_dentry);
 		dput(new_dentry);
@@ -2255,7 +2255,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	dget(new_dentry);
 	target = new_dentry->d_inode;
 	if (target)
-		down(&target->i_sem);
+		mutex_lock(&target->i_mutex);
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		error = -EBUSY;
 	else
@@ -2266,7 +2266,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 			d_move(old_dentry, new_dentry);
 	}
 	if (target)
-		up(&target->i_sem);
+		mutex_unlock(&target->i_mutex);
 	dput(new_dentry);
 	return error;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 2019899f2ab8..2ca6145f43d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -355,14 +355,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
 		{ MS_SYNCHRONOUS, ",sync" },
 		{ MS_DIRSYNC, ",dirsync" },
 		{ MS_MANDLOCK, ",mand" },
-		{ MS_NOATIME, ",noatime" },
-		{ MS_NODIRATIME, ",nodiratime" },
 		{ 0, NULL }
 	};
 	static struct proc_fs_info mnt_info[] = {
 		{ MNT_NOSUID, ",nosuid" },
 		{ MNT_NODEV, ",nodev" },
 		{ MNT_NOEXEC, ",noexec" },
+		{ MNT_NOATIME, ",noatime" },
+		{ MNT_NODIRATIME, ",nodiratime" },
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
@@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount);
 void release_mounts(struct list_head *head)
 {
 	struct vfsmount *mnt;
-	while(!list_empty(head)) {
+	while (!list_empty(head)) {
 		mnt = list_entry(head->next, struct vfsmount, mnt_hash);
 		list_del_init(&mnt->mnt_hash);
 		if (mnt->mnt_parent != mnt) {
@@ -814,7 +814,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 		return -ENOTDIR;
 
 	err = -ENOENT;
-	down(&nd->dentry->d_inode->i_sem);
+	mutex_lock(&nd->dentry->d_inode->i_mutex);
 	if (IS_DEADDIR(nd->dentry->d_inode))
 		goto out_unlock;
 
@@ -826,7 +826,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
 		err = attach_recursive_mnt(mnt, nd, NULL);
 out_unlock:
-	up(&nd->dentry->d_inode->i_sem);
+	mutex_unlock(&nd->dentry->d_inode->i_mutex);
 	if (!err)
 		security_sb_post_addmount(mnt, nd);
 	return err;
@@ -962,7 +962,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
 		goto out;
 
 	err = -ENOENT;
-	down(&nd->dentry->d_inode->i_sem);
+	mutex_lock(&nd->dentry->d_inode->i_mutex);
 	if (IS_DEADDIR(nd->dentry->d_inode))
 		goto out1;
 
@@ -1004,7 +1004,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
 	list_del_init(&old_nd.mnt->mnt_expire);
 	spin_unlock(&vfsmount_lock);
 out1:
-	up(&nd->dentry->d_inode->i_sem);
+	mutex_unlock(&nd->dentry->d_inode->i_mutex);
 out:
 	up_write(&namespace_sem);
 	if (!err)
@@ -1286,7 +1286,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 		mnt_flags |= MNT_NODEV;
 	if (flags & MS_NOEXEC)
 		mnt_flags |= MNT_NOEXEC;
-	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE);
+	if (flags & MS_NOATIME)
+		mnt_flags |= MNT_NOATIME;
+	if (flags & MS_NODIRATIME)
+		mnt_flags |= MNT_NODIRATIME;
+
+	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
+		   MS_NOATIME | MS_NODIRATIME);
 
 	/* ... and get the mountpoint */
 	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1526,6 +1532,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
  * pointed to by put_old must yield the same directory as new_root. No other
  * file system may be mounted on put_old. After all, new_root is a mountpoint.
  *
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * in this situation.
+ *
  * Notes:
  *  - we don't move root/cwd if they are not at the root (reason: if something
  *    cared enough to change them, it's probably wrong to force them elsewhere)
@@ -1569,7 +1579,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	user_nd.dentry = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
-	down(&old_nd.dentry->d_inode->i_sem);
+	mutex_lock(&old_nd.dentry->d_inode->i_mutex);
 	error = -EINVAL;
 	if (IS_MNT_SHARED(old_nd.mnt) ||
 		IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
@@ -1622,7 +1632,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	path_release(&root_parent);
 	path_release(&parent_nd);
 out2:
-	up(&old_nd.dentry->d_inode->i_sem);
+	mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
 	up_write(&namespace_sem);
 	path_release(&user_nd);
 	path_release(&old_nd);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index a9f7a8ab1d59..cfd76f431dc0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_child);
+		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget_locked(dent);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 4947d9b11fc1..973b444d6914 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -262,7 +262,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 	}
 	vfree(bouncebuffer);
 
-	inode_update_time(inode, 1);
+	file_update_time(file);
 
 	*ppos = pos;
 
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 9e4dc30c2435..799e5c2bec55 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent)
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		if (dentry->d_fsdata == NULL)
 			ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 		dentry->d_fsdata = NULL;
 		ncp_age_dentry(server, dentry);
 		next = next->next;
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8b3bb715d177..ec61fd56a1a9 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,4 +13,5 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
+nfs-$(CONFIG_SYSCTL) += sysctl.o
 nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 30cae3602867..fcd97406a778 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -34,6 +34,7 @@ static struct nfs_callback_data nfs_callback_info;
 static DECLARE_MUTEX(nfs_callback_sema);
 static struct svc_program nfs4_callback_program;
 
+unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
 
 /*
@@ -98,7 +99,7 @@ int nfs_callback_up(void)
 	if (!serv)
 		goto out_err;
 	/* FIXME: We don't want to register this socket with the portmapper */
-	ret = svc_makesock(serv, IPPROTO_TCP, 0);
+	ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport);
 	if (ret < 0)
 		goto out_destroy;
 	if (!list_empty(&serv->sv_permsocks)) {
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index a0db2d4f9415..b252e7fe53a5 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -65,6 +65,7 @@ extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 extern int nfs_callback_up(void);
 extern int nfs_callback_down(void);
 
+extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
 
 #endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 65f1e19e4d19..462cfceb50c5 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -35,7 +35,9 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres
 	if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
 		goto out_iput;
 	res->size = i_size_read(inode);
-	res->change_attr = NFS_CHANGE_ATTR(inode);
+	res->change_attr = delegation->change_attr;
+	if (nfsi->npages != 0)
+		res->change_attr++;
 	res->ctime = inode->i_ctime;
 	res->mtime = inode->i_mtime;
 	res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 618a327027b3..c6f07c1c71e6 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -8,6 +8,7 @@
  */
 #include <linux/config.h>
 #include <linux/completion.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
@@ -130,6 +131,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			sizeof(delegation->stateid.data));
 	delegation->type = res->delegation_type;
 	delegation->maxsize = res->maxsize;
+	delegation->change_attr = nfsi->change_attr;
 	delegation->cred = get_rpccred(cred);
 	delegation->inode = inode;
 
@@ -157,8 +159,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 {
 	int res = 0;
 
-	__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
 	nfs_free_delegation(delegation);
 	return res;
@@ -231,6 +231,49 @@ restart:
 	spin_unlock(&clp->cl_lock);
 }
 
+int nfs_do_expire_all_delegations(void *ptr)
+{
+	struct nfs4_client *clp = ptr;
+	struct nfs_delegation *delegation;
+	struct inode *inode;
+
+	allow_signal(SIGKILL);
+restart:
+	spin_lock(&clp->cl_lock);
+	if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
+		goto out;
+	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
+		goto out;
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		inode = igrab(delegation->inode);
+		if (inode == NULL)
+			continue;
+		spin_unlock(&clp->cl_lock);
+		nfs_inode_return_delegation(inode);
+		iput(inode);
+		goto restart;
+	}
+out:
+	spin_unlock(&clp->cl_lock);
+	nfs4_put_client(clp);
+	module_put_and_exit(0);
+}
+
+void nfs_expire_all_delegations(struct nfs4_client *clp)
+{
+	struct task_struct *task;
+
+	__module_get(THIS_MODULE);
+	atomic_inc(&clp->cl_count);
+	task = kthread_run(nfs_do_expire_all_delegations, clp,
+			"%u.%u.%u.%u-delegreturn",
+			NIPQUAD(clp->cl_addr));
+	if (!IS_ERR(task))
+		return;
+	nfs4_put_client(clp);
+	module_put(THIS_MODULE);
+}
+
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2fcc30de924b..7a0b2bfce771 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -21,6 +21,7 @@ struct nfs_delegation {
 #define NFS_DELEGATION_NEED_RECLAIM 1
 	long flags;
 	loff_t maxsize;
+	__u64 change_attr;
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -30,6 +31,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
 
 struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
+void nfs_expire_all_delegations(struct nfs4_client *clp);
 void nfs_handle_cb_pathdown(struct nfs4_client *clp);
 
 void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c0d1a214572c..a1554bead692 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -194,7 +194,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	spin_unlock(&inode->i_lock);
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
-	 *	 through inode->i_sem or some other mechanism.
+	 *	 through inode->i_mutex or some other mechanism.
 	 */
 	if (page->index == 0)
 		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
@@ -573,7 +573,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 {
-	down(&filp->f_dentry->d_inode->i_sem);
+	mutex_lock(&filp->f_dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += filp->f_pos;
@@ -589,7 +589,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
 	}
 out:
-	up(&filp->f_dentry->d_inode->i_sem);
+	mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -1001,7 +1001,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	openflags &= ~(O_CREAT|O_TRUNC);
 
 	/*
-	 * Note: we're not holding inode->i_sem and so may be racing with
+	 * Note: we're not holding inode->i_mutex and so may be racing with
 	 * operations that change the directory. We therefore save the
 	 * change attribute *before* we do the RPC call.
 	 */
@@ -1051,7 +1051,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		return dentry;
 	if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
 		return NULL;
-	/* Note: caller is already holding the dir->i_sem! */
+	/* Note: caller is already holding the dir->i_mutex! */
 	dentry = d_alloc(parent, &name);
 	if (dentry == NULL)
 		return NULL;
@@ -1550,8 +1550,10 @@ go_ahead:
 	}
 	nfs_inode_return_delegation(old_inode);
 
-	if (new_inode)
+	if (new_inode != NULL) {
+		nfs_inode_return_delegation(new_inode);
 		d_delete(new_dentry);
+	}
 
 	nfs_begin_data_update(old_dir);
 	nfs_begin_data_update(new_dir);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 079228817603..10ae377e68ff 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -122,9 +122,10 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 {
 	int i;
 	for (i = 0; i < npages; i++) {
-		if (do_dirty)
-			set_page_dirty_lock(pages[i]);
-		page_cache_release(pages[i]);
+		struct page *page = pages[i];
+		if (do_dirty && !PageCompound(page))
+			set_page_dirty_lock(page);
+		page_cache_release(page);
 	}
 	kfree(pages);
 }
@@ -154,6 +155,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
 	unsigned int reads = 0;
+	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
 	if (!dreq)
@@ -167,7 +169,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 
 	list = &dreq->list;
 	for(;;) {
-		struct nfs_read_data *data = nfs_readdata_alloc();
+		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
 
 		if (unlikely(!data)) {
 			while (!list_empty(list)) {
@@ -268,8 +270,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->task.tk_calldata = data;
-		data->task.tk_release = nfs_readdata_release;
 		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
@@ -433,7 +433,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode,
 	struct nfs_writeverf first_verf;
 	struct nfs_write_data *wdata;
 
-	wdata = nfs_writedata_alloc();
+	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!wdata)
 		return -ENOMEM;
 
@@ -662,10 +662,10 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
 		.iov_len = count,
 	};
 
-	dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
+	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
-		(unsigned long) count, (unsigned long) pos);
+		(unsigned long) count, (long long) pos);
 
 	if (!is_sync_kiocb(iocb))
 		goto out;
@@ -718,9 +718,7 @@ out:
 ssize_t
 nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
-	ssize_t retval = -EINVAL;
-	loff_t *ppos = &iocb->ki_pos;
-	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	ssize_t retval;
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
@@ -728,35 +726,32 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	struct inode *inode = mapping->host;
 	struct iovec iov = {
 		.iov_base = (char __user *)buf,
-		.iov_len = count,
 	};
 
-	dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
+	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
-		file->f_dentry->d_name.name, inode->i_ino,
-		(unsigned long) count, (unsigned long) pos);
+		file->f_dentry->d_name.name,
+		(unsigned long) count, (long long) pos);
 
+	retval = -EINVAL;
 	if (!is_sync_kiocb(iocb))
 		goto out;
-	if (count < 0)
-		goto out;
-        if (pos < 0)
+
+	retval = generic_write_checks(file, &pos, &count, 0);
+	if (retval)
 		goto out;
-	retval = -EFAULT;
-	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+
+	retval = -EINVAL;
+	if ((ssize_t) count < 0)
 		goto out;
-	retval = -EFBIG;
-	if (limit != RLIM_INFINITY) {
-		if (pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			goto out;
-		}
-		if (count > limit - (unsigned long) pos)
-			count = limit - (unsigned long) pos;
-	}
 	retval = 0;
 	if (!count)
 		goto out;
+	iov.iov_len = count,
+
+	retval = -EFAULT;
+	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+		goto out;
 
 	retval = nfs_sync_mapping(mapping);
 	if (retval)
@@ -766,7 +761,7 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
 	if (retval > 0)
-		*ppos = pos + retval;
+		iocb->ki_pos = pos + retval;
 
 out:
 	return retval;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ffb8df91dc34..821edd30333b 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -54,7 +54,11 @@
 
 #define IDMAP_HASH_SZ          128
 
+/* Default cache timeout is 10 minutes */
+unsigned int nfs_idmap_cache_timeout = 600 * HZ;
+
 struct idmap_hashent {
+	unsigned long ih_expires;
 	__u32 ih_id;
 	int ih_namelen;
 	char ih_name[IDMAP_NAMESZ];
@@ -149,6 +153,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
 
 	if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
 		return NULL;
+	if (time_after(jiffies, he->ih_expires))
+		return NULL;
 	return he;
 }
 
@@ -164,6 +170,8 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
 	struct idmap_hashent *he = idmap_id_hash(h, id);
 	if (he->ih_id != id || he->ih_namelen == 0)
 		return NULL;
+	if (time_after(jiffies, he->ih_expires))
+		return NULL;
 	return he;
 }
 
@@ -192,6 +200,7 @@ idmap_update_entry(struct idmap_hashent *he, const char *name,
 	memcpy(he->ih_name, name, namelen);
 	he->ih_name[namelen] = '\0';
 	he->ih_namelen = namelen;
+	he->ih_expires = jiffies + nfs_idmap_cache_timeout;
 }
 
 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 432f41cd75e6..a77ee95b7efb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -40,6 +40,7 @@
 #include <asm/uaccess.h>
 
 #include "nfs4_fs.h"
+#include "callback.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
@@ -221,10 +222,10 @@ nfs_calc_block_size(u64 tsize)
 static inline unsigned long
 nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
 {
-	if (bsize < 1024)
-		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
-		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+	if (bsize < NFS_MIN_FILE_IO_SIZE)
+		bsize = NFS_DEF_FILE_IO_SIZE;
+	else if (bsize >= NFS_MAX_FILE_IO_SIZE)
+		bsize = NFS_MAX_FILE_IO_SIZE;
 
 	return nfs_block_bits(bsize, nrbitsp);
 }
@@ -307,20 +308,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
 	if (server->rsize > max_rpc_payload)
 		server->rsize = max_rpc_payload;
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
 	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (server->rpages > NFS_READ_MAXIOV) {
-		server->rpages = NFS_READ_MAXIOV;
-		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
-	}
 
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        if (server->wpages > NFS_WRITE_MAXIOV) {
-		server->wpages = NFS_WRITE_MAXIOV;
-                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
-	}
 
 	if (sb->s_blocksize == 0)
 		sb->s_blocksize = nfs_block_bits(server->wsize,
@@ -417,7 +413,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
 	clnt->cl_intr     = 1;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty   = 1;
 
 	return clnt;
 
@@ -575,11 +570,10 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 	buf->f_namelen = server->namelen;
  out:
 	unlock_kernel();
-
 	return 0;
 
  out_err:
-	printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error);
+	dprintk("%s: statfs error = %d\n", __FUNCTION__, -error);
 	buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
 	goto out;
 
@@ -650,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping)
 	if (mapping->nrpages == 0)
 		return 0;
 	unmap_mapping_range(mapping, 0, 0, 0);
-	ret = filemap_fdatawrite(mapping);
-	if (ret != 0)
-		goto out;
-	ret = filemap_fdatawait(mapping);
+	ret = filemap_write_and_wait(mapping);
 	if (ret != 0)
 		goto out;
 	ret = nfs_wb_all(mapping->host);
@@ -870,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	nfs_begin_data_update(inode);
 	/* Write all dirty data if we're changing file permissions or size */
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-		if (filemap_fdatawrite(inode->i_mapping) == 0)
-			filemap_fdatawait(inode->i_mapping);
+		filemap_write_and_wait(inode->i_mapping);
 		nfs_wb_all(inode);
 	}
 	/*
@@ -958,11 +948,22 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	if (__IS_FLG(inode, MS_NOATIME))
-		need_atime = 0;
-	else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
+	/* Flush out writes to the server in order to update c/mtime */
+	nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+
+	/*
+	 * We may force a getattr if the user cares about atime.
+	 *
+	 * Note that we only have to check the vfsmount flags here:
+	 *  - NFS always sets S_NOATIME by so checking it would give a
+	 *    bogus result
+	 *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
+	 *    no point in checking those.
+	 */
+ 	if ((mnt->mnt_flags & MNT_NOATIME) ||
+ 	    ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		need_atime = 0;
-	/* We may force a getattr if the user cares about atime */
+
 	if (need_atime)
 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	else
@@ -1252,6 +1253,33 @@ void nfs_end_data_update(struct inode *inode)
 	atomic_dec(&nfsi->data_updates);
 }
 
+static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
+			&& nfsi->change_attr == fattr->pre_change_attr) {
+		nfsi->change_attr = fattr->change_attr;
+		nfsi->cache_change_attribute = jiffies;
+	}
+
+	/* If we have atomic WCC data, we may update some attributes */
+	if ((fattr->valid & NFS_ATTR_WCC) != 0) {
+		if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
+			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+			nfsi->cache_change_attribute = jiffies;
+		}
+		if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
+			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+			nfsi->cache_change_attribute = jiffies;
+		}
+		if (inode->i_size == fattr->pre_size && nfsi->npages == 0) {
+			inode->i_size = fattr->size;
+			nfsi->cache_change_attribute = jiffies;
+		}
+	}
+}
+
 /**
  * nfs_check_inode_attributes - verify consistency of the inode attribute cache
  * @inode - pointer to inode
@@ -1268,22 +1296,20 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	int data_unstable;
 
 
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		return 0;
+
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
 
-	if (fattr->valid & NFS_ATTR_FATTR_V4) {
-		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
-				&& nfsi->change_attr == fattr->pre_change_attr)
-			nfsi->change_attr = fattr->change_attr;
-		if (nfsi->change_attr != fattr->change_attr) {
-			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-			if (!data_unstable)
-				nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-		}
-	}
+	/* Do atomic weak cache consistency updates */
+	nfs_wcc_update_inode(inode, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR) == 0) {
-		return 0;
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+			nfsi->change_attr != fattr->change_attr) {
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+		if (!data_unstable)
+			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
 	/* Has the inode gone and changed behind our back? */
@@ -1295,14 +1321,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	cur_size = i_size_read(inode);
  	new_isize = nfs_size_to_loff_t(fattr->size);
 
-	/* If we have atomic WCC data, we may update some attributes */
-	if ((fattr->valid & NFS_ATTR_WCC) != 0) {
-		if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
-			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-		if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime))
-			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-	}
-
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
@@ -1410,14 +1428,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 
-	if (nfsi->fileid != fattr->fileid) {
-		printk(KERN_ERR "%s: inode number mismatch\n"
-		       "expected (%s/0x%Lx), got (%s/0x%Lx)\n",
-		       __FUNCTION__,
-		       inode->i_sb->s_id, (long long)nfsi->fileid,
-		       inode->i_sb->s_id, (long long)fattr->fileid);
-		goto out_err;
-	}
+	if (nfsi->fileid != fattr->fileid)
+		goto out_fileid;
 
 	/*
 	 * Make sure the inode's type hasn't changed.
@@ -1436,6 +1448,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	if (data_stable)
 		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 
+	/* Do atomic weak cache consistency updates */
+	nfs_wcc_update_inode(inode, fattr);
+
 	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	cur_isize = i_size_read(inode);
@@ -1539,6 +1554,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	 */
 	nfs_invalidate_inode(inode);
 	return -ESTALE;
+
+ out_fileid:
+	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
+		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
+		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		(long long)nfsi->fileid, (long long)fattr->fileid);
+	goto out_err;
 }
 
 /*
@@ -1820,25 +1842,10 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		}
 		clnt->cl_intr     = 1;
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clp->cl_rpcclient = clnt;
-		clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0);
-		if (IS_ERR(clp->cl_cred)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(clp->cl_cred);
-			clp->cl_cred = NULL;
-			goto out_fail;
-		}
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
 		nfs_idmap_new(clp);
 	}
-	if (list_empty(&clp->cl_superblocks)) {
-		err = nfs4_init_client(clp);
-		if (err != 0) {
-			up_write(&clp->cl_sem);
-			goto out_fail;
-		}
-	}
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (!IS_ERR(clnt))
@@ -2033,6 +2040,35 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
+
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 #define nfs4_init_once(nfsi) \
 	do { \
 		INIT_LIST_HEAD(&(nfsi)->open_states); \
@@ -2040,8 +2076,25 @@ static struct file_system_type nfs4_fs_type = {
 		nfsi->delegation_state = 0; \
 		init_rwsem(&nfsi->rwsem); \
 	} while(0)
-#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
-#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+
+static inline int register_nfs4fs(void)
+{
+	int ret;
+
+	ret = nfs_register_sysctl();
+	if (ret != 0)
+		return ret;
+	ret = register_filesystem(&nfs4_fs_type);
+	if (ret != 0)
+		nfs_unregister_sysctl();
+	return ret;
+}
+
+static inline void unregister_nfs4fs(void)
+{
+	unregister_filesystem(&nfs4_fs_type);
+	nfs_unregister_sysctl();
+}
 #else
 #define nfs4_init_once(nfsi) \
 	do { } while (0)
@@ -2166,11 +2219,11 @@ out:
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
-	nfs_destroy_writepagecache();
 #ifdef CONFIG_NFS_DIRECTIO
-out0:
 	nfs_destroy_directcache();
+out0:
 #endif
+	nfs_destroy_writepagecache();
 out1:
 	nfs_destroy_readpagecache();
 out2:
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0e82617f2de0..db99b8f678f8 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -82,7 +82,6 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 				RPC_AUTH_UNIX);
 	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 		clnt->cl_intr = 1;
 	}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 59049e864ca7..7fc0560c89c9 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -146,23 +146,23 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 	return p;
 }
 
-#define SATTR(p, attr, flag, field) \
-        *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
 static inline u32 *
 xdr_encode_sattr(u32 *p, struct iattr *attr)
 {
-	SATTR(p, attr, ATTR_MODE, ia_mode);
-	SATTR(p, attr, ATTR_UID, ia_uid);
-	SATTR(p, attr, ATTR_GID, ia_gid);
-	SATTR(p, attr, ATTR_SIZE, ia_size);
+	const u32 not_set = __constant_htonl(0xFFFFFFFF);
+
+	*p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set;
+	*p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
+	*p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set;
+	*p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set;
 
 	if (attr->ia_valid & ATTR_ATIME_SET) {
 		p = xdr_encode_time(p, &attr->ia_atime);
 	} else if (attr->ia_valid & ATTR_ATIME) {
 		p = xdr_encode_current_server_time(p, &attr->ia_atime);
 	} else {
-		*p++ = ~(u32) 0;
-		*p++ = ~(u32) 0;
+		*p++ = not_set;
+		*p++ = not_set;
 	}
 
 	if (attr->ia_valid & ATTR_MTIME_SET) {
@@ -170,12 +170,11 @@ xdr_encode_sattr(u32 *p, struct iattr *attr)
 	} else if (attr->ia_valid & ATTR_MTIME) {
 		p = xdr_encode_current_server_time(p, &attr->ia_mtime);
 	} else {
-		*p++ = ~(u32) 0;	
-		*p++ = ~(u32) 0;
+		*p++ = not_set;	
+		*p++ = not_set;
 	}
   	return p;
 }
-#undef SATTR
 
 /*
  * NFS encode functions
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 92c870d19ccd..ed67567f0556 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -68,27 +68,39 @@ nfs3_async_handle_jukebox(struct rpc_task *task)
 	return 1;
 }
 
-/*
- * Bare-bones access to getattr: this is for nfs_read_super.
- */
 static int
-nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		   struct nfs_fsinfo *info)
+do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
 {
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-		status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
 		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
 }
 
 /*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fsinfo *info)
+{
+	int	status;
+
+	status = do_proc_get_root(server->client, fhandle, info);
+	if (status && server->client_sys != server->client)
+		status = do_proc_get_root(server->client_sys, fhandle, info);
+	return status;
+}
+
+/*
  * One function for each procedure in the NFS protocol.
  */
 static int
@@ -732,19 +744,23 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void
-nfs3_read_done(struct rpc_task *task)
+static void nfs3_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_read_ops = {
+	.rpc_call_done = nfs3_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs3_proc_read_setup(struct nfs_read_data *data)
 {
@@ -762,23 +778,26 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs3_write_done(struct rpc_task *task)
+static void nfs3_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data;
+	struct nfs_write_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
-	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_write_ops = {
+	.rpc_call_done = nfs3_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -806,23 +825,26 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs3_commit_done(struct rpc_task *task)
+static void nfs3_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data;
+	struct nfs_write_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
-	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task);
+	nfs_commit_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_commit_ops = {
+	.rpc_call_done = nfs3_commit_done,
+	.rpc_release = nfs_commit_release,
+};
+
 static void
 nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
@@ -840,7 +862,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0498bd36602c..b6c0b5012bce 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -182,7 +182,7 @@ xdr_encode_sattr(u32 *p, struct iattr *attr)
 {
 	if (attr->ia_valid & ATTR_MODE) {
 		*p++ = xdr_one;
-		*p++ = htonl(attr->ia_mode);
+		*p++ = htonl(attr->ia_mode & S_IALLUGO);
 	} else {
 		*p++ = xdr_zero;
 	}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b7f262dcb6e3..0f5e4e7cddec 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -38,7 +38,8 @@ struct idmap;
  ((err) != NFSERR_NOFILEHANDLE))
 
 enum nfs4_client_state {
-	NFS4CLNT_OK  = 0,
+	NFS4CLNT_STATE_RECOVER  = 0,
+	NFS4CLNT_LEASE_EXPIRED,
 };
 
 /*
@@ -67,7 +68,6 @@ struct nfs4_client {
 	atomic_t		cl_count;
 
 	struct rpc_clnt *	cl_rpcclient;
-	struct rpc_cred *	cl_cred;
 
 	struct list_head	cl_superblocks;	/* List of nfs_server structs */
 
@@ -76,7 +76,6 @@ struct nfs4_client {
 	struct work_struct	cl_renewd;
 	struct work_struct	cl_recoverd;
 
-	wait_queue_head_t	cl_waitq;
 	struct rpc_wait_queue	cl_rpcwaitq;
 
 	/* used for the setclientid verifier */
@@ -182,8 +181,9 @@ struct nfs4_state {
 
 	nfs4_stateid stateid;
 
-	unsigned int nreaders;
-	unsigned int nwriters;
+	unsigned int n_rdonly;
+	unsigned int n_wronly;
+	unsigned int n_rdwr;
 	int state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
@@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
-extern int nfs4_proc_async_renew(struct nfs4_client *);
-extern int nfs4_proc_renew(struct nfs4_client *);
+extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -237,8 +237,8 @@ extern void init_nfsv4_state(struct nfs_server *);
 extern void destroy_nfsv4_state(struct nfs_server *);
 extern struct nfs4_client *nfs4_get_client(struct in_addr *);
 extern void nfs4_put_client(struct nfs4_client *clp);
-extern int nfs4_init_client(struct nfs4_client *clp);
 extern struct nfs4_client *nfs4_find_client(struct in_addr *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
 extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f988a9417b13..984ca3454d04 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -57,11 +57,13 @@
 #define NFS4_POLL_RETRY_MIN	(1*HZ)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
-static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
+struct nfs4_opendata;
+static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
@@ -173,8 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 	kunmap_atomic(start, KM_USER0);
 }
 
-static void
-renew_lease(struct nfs_server *server, unsigned long timestamp)
+static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 	spin_lock(&clp->cl_lock);
@@ -194,21 +195,123 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 	spin_unlock(&inode->i_lock);
 }
 
+struct nfs4_opendata {
+	atomic_t count;
+	struct nfs_openargs o_arg;
+	struct nfs_openres o_res;
+	struct nfs_open_confirmargs c_arg;
+	struct nfs_open_confirmres c_res;
+	struct nfs_fattr f_attr;
+	struct nfs_fattr dir_attr;
+	struct dentry *dentry;
+	struct dentry *dir;
+	struct nfs4_state_owner *owner;
+	struct iattr attrs;
+	unsigned long timestamp;
+	int rpc_status;
+	int cancelled;
+};
+
+static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+		struct nfs4_state_owner *sp, int flags,
+		const struct iattr *attrs)
+{
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir = parent->d_inode;
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs4_opendata *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		goto err;
+	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (p->o_arg.seqid == NULL)
+		goto err_free;
+	atomic_set(&p->count, 1);
+	p->dentry = dget(dentry);
+	p->dir = parent;
+	p->owner = sp;
+	atomic_inc(&sp->so_count);
+	p->o_arg.fh = NFS_FH(dir);
+	p->o_arg.open_flags = flags,
+	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.id = sp->so_id;
+	p->o_arg.name = &dentry->d_name;
+	p->o_arg.server = server;
+	p->o_arg.bitmask = server->attr_bitmask;
+	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+	p->o_res.f_attr = &p->f_attr;
+	p->o_res.dir_attr = &p->dir_attr;
+	p->o_res.server = server;
+	nfs_fattr_init(&p->f_attr);
+	nfs_fattr_init(&p->dir_attr);
+	if (flags & O_EXCL) {
+		u32 *s = (u32 *) p->o_arg.u.verifier.data;
+		s[0] = jiffies;
+		s[1] = current->pid;
+	} else if (flags & O_CREAT) {
+		p->o_arg.u.attrs = &p->attrs;
+		memcpy(&p->attrs, attrs, sizeof(p->attrs));
+	}
+	p->c_arg.fh = &p->o_res.fh;
+	p->c_arg.stateid = &p->o_res.stateid;
+	p->c_arg.seqid = p->o_arg.seqid;
+	return p;
+err_free:
+	kfree(p);
+err:
+	dput(parent);
+	return NULL;
+}
+
+static void nfs4_opendata_free(struct nfs4_opendata *p)
+{
+	if (p != NULL && atomic_dec_and_test(&p->count)) {
+		nfs_free_seqid(p->o_arg.seqid);
+		nfs4_put_state_owner(p->owner);
+		dput(p->dir);
+		dput(p->dentry);
+		kfree(p);
+	}
+}
+
 /* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
-		rpc_action tk_exit, void *calldata)
+static int nfs4_call_async(struct rpc_clnt *clnt,
+		const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task *task;
 
-	if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
+	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
 		return -ENOMEM;
-
-	task->tk_calldata = calldata;
-	task->tk_action = tk_begin;
 	rpc_execute(task);
 	return 0;
 }
 
+static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
+{
+	sigset_t oldset;
+	int ret;
+
+	rpc_clnt_sigmask(task->tk_client, &oldset);
+	ret = rpc_wait_for_completion_task(task);
+	rpc_clnt_sigunmask(task->tk_client, &oldset);
+	return ret;
+}
+
+static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+{
+	switch (open_flags) {
+		case FMODE_WRITE:
+			state->n_wronly++;
+			break;
+		case FMODE_READ:
+			state->n_rdonly++;
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			state->n_rdwr++;
+	}
+}
+
 static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
 	struct inode *inode = state->inode;
@@ -218,41 +321,134 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 	spin_lock(&state->owner->so_lock);
 	spin_lock(&inode->i_lock);
 	memcpy(&state->stateid, stateid, sizeof(state->stateid));
-	if ((open_flags & FMODE_WRITE))
-		state->nwriters++;
-	if (open_flags & FMODE_READ)
-		state->nreaders++;
+	update_open_stateflags(state, open_flags);
 	nfs4_state_set_mode_locked(state, state->state | open_flags);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 }
 
+static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
+{
+	struct inode *inode;
+	struct nfs4_state *state = NULL;
+
+	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+		goto out;
+	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+	if (inode == NULL)
+		goto out;
+	state = nfs4_get_open_state(inode, data->owner);
+	if (state == NULL)
+		goto put_inode;
+	update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
+put_inode:
+	iput(inode);
+out:
+	return state;
+}
+
+static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+{
+	struct nfs_inode *nfsi = NFS_I(state->inode);
+	struct nfs_open_context *ctx;
+
+	spin_lock(&state->inode->i_lock);
+	list_for_each_entry(ctx, &nfsi->open_files, list) {
+		if (ctx->state != state)
+			continue;
+		get_nfs_open_context(ctx);
+		spin_unlock(&state->inode->i_lock);
+		return ctx;
+	}
+	spin_unlock(&state->inode->i_lock);
+	return ERR_PTR(-ENOENT);
+}
+
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+{
+	int ret;
+
+	opendata->o_arg.open_flags = openflags;
+	ret = _nfs4_proc_open(opendata);
+	if (ret != 0)
+		return ret; 
+	memcpy(stateid->data, opendata->o_res.stateid.data,
+			sizeof(stateid->data));
+	return 0;
+}
+
+static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
+{
+	nfs4_stateid stateid;
+	struct nfs4_state *newstate;
+	int mode = 0;
+	int delegation = 0;
+	int ret;
+
+	/* memory barrier prior to reading state->n_* */
+	smp_rmb();
+	if (state->n_rdwr != 0) {
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+		if (ret != 0)
+			return ret;
+		mode |= FMODE_READ|FMODE_WRITE;
+		if (opendata->o_res.delegation_type != 0)
+			delegation = opendata->o_res.delegation_type;
+		smp_rmb();
+	}
+	if (state->n_wronly != 0) {
+		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+		if (ret != 0)
+			return ret;
+		mode |= FMODE_WRITE;
+		if (opendata->o_res.delegation_type != 0)
+			delegation = opendata->o_res.delegation_type;
+		smp_rmb();
+	}
+	if (state->n_rdonly != 0) {
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+		if (ret != 0)
+			return ret;
+		mode |= FMODE_READ;
+	}
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
+	if (mode == 0)
+		return 0;
+	if (opendata->o_res.delegation_type == 0)
+		opendata->o_res.delegation_type = delegation;
+	opendata->o_arg.open_flags |= mode;
+	newstate = nfs4_opendata_to_nfs4_state(opendata);
+	if (newstate != NULL) {
+		if (opendata->o_res.delegation_type != 0) {
+			struct nfs_inode *nfsi = NFS_I(newstate->inode);
+			int delegation_flags = 0;
+			if (nfsi->delegation)
+				delegation_flags = nfsi->delegation->flags;
+			if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+				nfs_inode_set_delegation(newstate->inode,
+						opendata->owner->so_cred,
+						&opendata->o_res);
+			else
+				nfs_inode_reclaim_delegation(newstate->inode,
+						opendata->owner->so_cred,
+						&opendata->o_res);
+		}
+		nfs4_close_state(newstate, opendata->o_arg.open_flags);
+	}
+	if (newstate != state)
+		return -ESTALE;
+	return 0;
+}
+
 /*
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
  */
-static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
 {
-	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct nfs_openargs o_arg = {
-		.fh = NFS_FH(inode),
-		.id = sp->so_id,
-		.open_flags = state->state,
-		.clientid = server->nfs4_state->cl_clientid,
-		.claim = NFS4_OPEN_CLAIM_PREVIOUS,
-		.bitmask = server->attr_bitmask,
-	};
-	struct nfs_openres o_res = {
-		.server = server,	/* Grrr */
-	};
-	struct rpc_message msg = {
-		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
-		.rpc_argp       = &o_arg,
-		.rpc_resp	= &o_res,
-		.rpc_cred	= sp->so_cred,
-	};
+	struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+	struct nfs4_opendata *opendata;
+	int delegation_type = 0;
 	int status;
 
 	if (delegation != NULL) {
@@ -262,38 +458,27 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 			set_bit(NFS_DELEGATED_STATE, &state->flags);
 			return 0;
 		}
-		o_arg.u.delegation_type = delegation->type;
+		delegation_type = delegation->type;
 	}
-	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
-	if (o_arg.seqid == NULL)
+	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	if (opendata == NULL)
 		return -ENOMEM;
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	/* Confirm the sequence as being established */
-	nfs_confirm_seqid(&sp->so_seqid, status);
-	nfs_increment_open_seqid(status, o_arg.seqid);
-	if (status == 0) {
-		memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
-		if (o_res.delegation_type != 0) {
-			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
-			/* Did the server issue an immediate delegation recall? */
-			if (o_res.do_recall)
-				nfs_async_inode_return_delegation(inode, &o_res.stateid);
-		}
-	}
-	nfs_free_seqid(o_arg.seqid);
-	clear_bit(NFS_DELEGATED_STATE, &state->flags);
-	/* Ensure we update the inode attributes */
-	NFS_CACHEINV(inode);
+	opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
+	opendata->o_arg.fh = NFS_FH(state->inode);
+	nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+	opendata->o_arg.u.delegation_type = delegation_type;
+	status = nfs4_open_recover(opendata, state);
+	nfs4_opendata_free(opendata);
 	return status;
 }
 
-static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_open_reclaim(sp, state);
+		err = _nfs4_do_open_reclaim(sp, state, dentry);
 		if (err != -NFS4ERR_DELAY)
 			break;
 		nfs4_handle_exception(server, err, &exception);
@@ -301,63 +486,36 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	return err;
 }
 
+static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	struct nfs_open_context *ctx;
+	int ret;
+
+	ctx = nfs4_state_find_open_context(state);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+	ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+	put_nfs_open_context(ctx);
+	return ret;
+}
+
 static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 {
 	struct nfs4_state_owner  *sp  = state->owner;
-	struct inode *inode = dentry->d_inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct dentry *parent = dget_parent(dentry);
-	struct nfs_openargs arg = {
-		.fh = NFS_FH(parent->d_inode),
-		.clientid = server->nfs4_state->cl_clientid,
-		.name = &dentry->d_name,
-		.id = sp->so_id,
-		.server = server,
-		.bitmask = server->attr_bitmask,
-		.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR,
-	};
-	struct nfs_openres res = {
-		.server = server,
-	};
-	struct 	rpc_message msg = {
-		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= sp->so_cred,
-	};
-	int status = 0;
+	struct nfs4_opendata *opendata;
+	int ret;
 
 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
-		goto out;
-	if (state->state == 0)
-		goto out;
-	arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
-	status = -ENOMEM;
-	if (arg.seqid == NULL)
-		goto out;
-	arg.open_flags = state->state;
-	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs_increment_open_seqid(status, arg.seqid);
-	if (status != 0)
-		goto out_free;
-	if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
-		status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
-				sp, &res.stateid, arg.seqid);
-		if (status != 0)
-			goto out_free;
-	}
-	nfs_confirm_seqid(&sp->so_seqid, 0);
-	if (status >= 0) {
-		memcpy(state->stateid.data, res.stateid.data,
-				sizeof(state->stateid.data));
-		clear_bit(NFS_DELEGATED_STATE, &state->flags);
-	}
-out_free:
-	nfs_free_seqid(arg.seqid);
-out:
-	dput(parent);
-	return status;
+		return 0;
+	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	if (opendata == NULL)
+		return -ENOMEM;
+	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+	memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+			sizeof(opendata->o_arg.u.delegation.data));
+	ret = nfs4_open_recover(opendata, state);
+	nfs4_opendata_free(opendata);
+	return ret;
 }
 
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
@@ -382,82 +540,202 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
+static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_open_confirmargs arg = {
-		.fh             = fh,
-		.seqid          = seqid,
-		.stateid	= *stateid,
-	};
-	struct nfs_open_confirmres res;
-	struct 	rpc_message msg = {
-		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= sp->so_cred,
+	struct nfs4_opendata *data = calldata;
+	struct  rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+		.rpc_argp = &data->c_arg,
+		.rpc_resp = &data->c_res,
+		.rpc_cred = data->owner->so_cred,
 	};
+	data->timestamp = jiffies;
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+
+	data->rpc_status = task->tk_status;
+	if (RPC_ASSASSINATED(task))
+		return;
+	if (data->rpc_status == 0) {
+		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+				sizeof(data->o_res.stateid.data));
+		renew_lease(data->o_res.server, data->timestamp);
+	}
+	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
+	nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+}
+
+static void nfs4_open_confirm_release(void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+	struct nfs4_state *state = NULL;
+
+	/* If this request hasn't been cancelled, do nothing */
+	if (data->cancelled == 0)
+		goto out_free;
+	/* In case of error, no cleanup! */
+	if (data->rpc_status != 0)
+		goto out_free;
+	nfs_confirm_seqid(&data->owner->so_seqid, 0);
+	state = nfs4_opendata_to_nfs4_state(data);
+	if (state != NULL)
+		nfs4_close_state(state, data->o_arg.open_flags);
+out_free:
+	nfs4_opendata_free(data);
+}
+
+static const struct rpc_call_ops nfs4_open_confirm_ops = {
+	.rpc_call_prepare = nfs4_open_confirm_prepare,
+	.rpc_call_done = nfs4_open_confirm_done,
+	.rpc_release = nfs4_open_confirm_release,
+};
+
+/*
+ * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata
+ */
+static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
+{
+	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
+	struct rpc_task *task;
 	int status;
 
-	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
-	/* Confirm the sequence as being established */
-	nfs_confirm_seqid(&sp->so_seqid, status);
-	nfs_increment_open_seqid(status, seqid);
-	if (status >= 0)
-		memcpy(stateid, &res.stateid, sizeof(*stateid));
+	atomic_inc(&data->count);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+	if (IS_ERR(task)) {
+		nfs4_opendata_free(data);
+		return PTR_ERR(task);
+	}
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0) {
+		data->cancelled = 1;
+		smp_wmb();
+	} else
+		status = data->rpc_status;
+	rpc_release_task(task);
 	return status;
 }
 
-static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res)
+static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs4_opendata *data = calldata;
+	struct nfs4_state_owner *sp = data->owner;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
-		.rpc_argp = o_arg,
-		.rpc_resp = o_res,
+		.rpc_argp = &data->o_arg,
+		.rpc_resp = &data->o_res,
 		.rpc_cred = sp->so_cred,
 	};
-	int status;
+	
+	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
+		return;
+	/* Update sequence id. */
+	data->o_arg.id = sp->so_id;
+	data->o_arg.clientid = sp->so_client->cl_clientid;
+	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
+		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+	data->timestamp = jiffies;
+	rpc_call_setup(task, &msg, 0);
+}
 
-	/* Update sequence id. The caller must serialize! */
-	o_arg->id = sp->so_id;
-	o_arg->clientid = sp->so_client->cl_clientid;
+static void nfs4_open_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
 
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	if (status == 0) {
-		/* OPEN on anything except a regular file is disallowed in NFSv4 */
-		switch (o_res->f_attr->mode & S_IFMT) {
+	data->rpc_status = task->tk_status;
+	if (RPC_ASSASSINATED(task))
+		return;
+	if (task->tk_status == 0) {
+		switch (data->o_res.f_attr->mode & S_IFMT) {
 			case S_IFREG:
 				break;
 			case S_IFLNK:
-				status = -ELOOP;
+				data->rpc_status = -ELOOP;
 				break;
 			case S_IFDIR:
-				status = -EISDIR;
+				data->rpc_status = -EISDIR;
 				break;
 			default:
-				status = -ENOTDIR;
+				data->rpc_status = -ENOTDIR;
 		}
+		renew_lease(data->o_res.server, data->timestamp);
 	}
+	nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+}
+
+static void nfs4_open_release(void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+	struct nfs4_state *state = NULL;
 
-	nfs_increment_open_seqid(status, o_arg->seqid);
+	/* If this request hasn't been cancelled, do nothing */
+	if (data->cancelled == 0)
+		goto out_free;
+	/* In case of error, no cleanup! */
+	if (data->rpc_status != 0)
+		goto out_free;
+	/* In case we need an open_confirm, no cleanup! */
+	if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
+		goto out_free;
+	nfs_confirm_seqid(&data->owner->so_seqid, 0);
+	state = nfs4_opendata_to_nfs4_state(data);
+	if (state != NULL)
+		nfs4_close_state(state, data->o_arg.open_flags);
+out_free:
+	nfs4_opendata_free(data);
+}
+
+static const struct rpc_call_ops nfs4_open_ops = {
+	.rpc_call_prepare = nfs4_open_prepare,
+	.rpc_call_done = nfs4_open_done,
+	.rpc_release = nfs4_open_release,
+};
+
+/*
+ * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
+ */
+static int _nfs4_proc_open(struct nfs4_opendata *data)
+{
+	struct inode *dir = data->dir->d_inode;
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_openargs *o_arg = &data->o_arg;
+	struct nfs_openres *o_res = &data->o_res;
+	struct rpc_task *task;
+	int status;
+
+	atomic_inc(&data->count);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
+	if (IS_ERR(task)) {
+		nfs4_opendata_free(data);
+		return PTR_ERR(task);
+	}
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0) {
+		data->cancelled = 1;
+		smp_wmb();
+	} else
+		status = data->rpc_status;
+	rpc_release_task(task);
 	if (status != 0)
-		goto out;
+		return status;
+
 	if (o_arg->open_flags & O_CREAT) {
 		update_changeattr(dir, &o_res->cinfo);
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
 	} else
 		nfs_refresh_inode(dir, o_res->dir_attr);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
-		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
-				sp, &o_res->stateid, o_arg->seqid);
+		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
-			goto out;
+			return status;
 	}
-	nfs_confirm_seqid(&sp->so_seqid, 0);
+	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
-out:
-	return status;
+		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+	return 0;
 }
 
 static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
@@ -488,6 +766,15 @@ out:
 	return -EACCES;
 }
 
+int nfs4_recover_expired_lease(struct nfs_server *server)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+
+	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+		nfs4_schedule_state_recovery(clp);
+	return nfs4_wait_clnt_recover(server->client, clp);
+}
+
 /*
  * OPEN_EXPIRED:
  * 	reclaim state on the server after a network partition.
@@ -495,77 +782,31 @@ out:
  */
 static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
 {
-	struct dentry *parent = dget_parent(dentry);
-	struct inode *dir = parent->d_inode;
 	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct nfs_fattr f_attr, dir_attr;
-	struct nfs_openargs o_arg = {
-		.fh = NFS_FH(dir),
-		.open_flags = state->state,
-		.name = &dentry->d_name,
-		.bitmask = server->attr_bitmask,
-		.claim = NFS4_OPEN_CLAIM_NULL,
-	};
-	struct nfs_openres o_res = {
-		.f_attr = &f_attr,
-		.dir_attr = &dir_attr,
-		.server = server,
-	};
-	int status = 0;
+	struct nfs4_opendata *opendata;
+	int openflags = state->state & (FMODE_READ|FMODE_WRITE);
+	int ret;
 
 	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-		status = _nfs4_do_access(inode, sp->so_cred, state->state);
-		if (status < 0)
-			goto out;
+		ret = _nfs4_do_access(inode, sp->so_cred, openflags);
+		if (ret < 0)
+			return ret;
 		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
-		goto out;
+		return 0;
 	}
-	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
-	status = -ENOMEM;
-	if (o_arg.seqid == NULL)
-		goto out;
-	nfs_fattr_init(&f_attr);
-	nfs_fattr_init(&dir_attr);
-	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
-	if (status != 0)
-		goto out_nodeleg;
-	/* Check if files differ */
-	if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT))
-		goto out_stale;
-	/* Has the file handle changed? */
-	if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) {
-		/* Verify if the change attributes are the same */
-		if (f_attr.change_attr != NFS_I(inode)->change_attr)
-			goto out_stale;
-		if (nfs_size_to_loff_t(f_attr.size) != inode->i_size)
-			goto out_stale;
-		/* Lets just pretend that this is the same file */
-		nfs_copy_fh(NFS_FH(inode), &o_res.fh);
-		NFS_I(inode)->fileid = f_attr.fileid;
-	}
-	memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
-	if (o_res.delegation_type != 0) {
-		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM))
-			nfs_inode_set_delegation(inode, sp->so_cred, &o_res);
-		else
-			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
+	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
+	if (opendata == NULL)
+		return -ENOMEM;
+	ret = nfs4_open_recover(opendata, state);
+	if (ret == -ESTALE) {
+		/* Invalidate the state owner so we don't ever use it again */
+		nfs4_drop_state_owner(sp);
+		d_drop(dentry);
 	}
-out_nodeleg:
-	nfs_free_seqid(o_arg.seqid);
-	clear_bit(NFS_DELEGATED_STATE, &state->flags);
-out:
-	dput(parent);
-	return status;
-out_stale:
-	status = -ESTALE;
-	/* Invalidate the state owner so we don't ever use it again */
-	nfs4_drop_state_owner(sp);
-	d_drop(dentry);
-	/* Should we be trying to close that stateid? */
-	goto out_nodeleg;
+	nfs4_opendata_free(opendata);
+	return ret;
 }
 
 static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
@@ -584,26 +825,19 @@ static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_
 
 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
-	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_open_context *ctx;
-	int status;
+	int ret;
 
-	spin_lock(&state->inode->i_lock);
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
-		if (ctx->state != state)
-			continue;
-		get_nfs_open_context(ctx);
-		spin_unlock(&state->inode->i_lock);
-		status = nfs4_do_open_expired(sp, state, ctx->dentry);
-		put_nfs_open_context(ctx);
-		return status;
-	}
-	spin_unlock(&state->inode->i_lock);
-	return -ENOENT;
+	ctx = nfs4_state_find_open_context(state);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+	ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+	put_nfs_open_context(ctx);
+	return ret;
 }
 
 /*
- * Returns an nfs4_state + an extra reference to the inode
+ * Returns a referenced nfs4_state if there is an open delegation on the file
  */
 static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
 {
@@ -616,6 +850,14 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	int open_flags = flags & (FMODE_READ|FMODE_WRITE);
 	int err;
 
+	err = -ENOMEM;
+	if (!(sp = nfs4_get_state_owner(server, cred))) {
+		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
+		return err;
+	}
+	err = nfs4_recover_expired_lease(server);
+	if (err != 0)
+		goto out_put_state_owner;
 	/* Protect against reboot recovery - NOTE ORDER! */
 	down_read(&clp->cl_sem);
 	/* Protect against delegation recall */
@@ -625,10 +867,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	if (delegation == NULL || (delegation->type & open_flags) != open_flags)
 		goto out_err;
 	err = -ENOMEM;
-	if (!(sp = nfs4_get_state_owner(server, cred))) {
-		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
-		goto out_err;
-	}
 	state = nfs4_get_open_state(inode, sp);
 	if (state == NULL)
 		goto out_err;
@@ -636,39 +874,34 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	err = -ENOENT;
 	if ((state->state & open_flags) == open_flags) {
 		spin_lock(&inode->i_lock);
-		if (open_flags & FMODE_READ)
-			state->nreaders++;
-		if (open_flags & FMODE_WRITE)
-			state->nwriters++;
+		update_open_stateflags(state, open_flags);
 		spin_unlock(&inode->i_lock);
 		goto out_ok;
 	} else if (state->state != 0)
-		goto out_err;
+		goto out_put_open_state;
 
 	lock_kernel();
 	err = _nfs4_do_access(inode, cred, open_flags);
 	unlock_kernel();
 	if (err != 0)
-		goto out_err;
+		goto out_put_open_state;
 	set_bit(NFS_DELEGATED_STATE, &state->flags);
 	update_open_stateid(state, &delegation->stateid, open_flags);
 out_ok:
 	nfs4_put_state_owner(sp);
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
-	igrab(inode);
 	*res = state;
-	return 0; 
+	return 0;
+out_put_open_state:
+	nfs4_put_open_state(state);
 out_err:
-	if (sp != NULL) {
-		if (state != NULL)
-			nfs4_put_open_state(state);
-		nfs4_put_state_owner(sp);
-	}
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	if (err != -EACCES)
 		nfs_inode_return_delegation(inode);
+out_put_state_owner:
+	nfs4_put_state_owner(sp);
 	return err;
 }
 
@@ -689,7 +922,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st
 }
 
 /*
- * Returns an nfs4_state + an referenced inode
+ * Returns a referenced nfs4_state
  */
 static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
@@ -697,73 +930,46 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
 	struct nfs4_client *clp = server->nfs4_state;
-	struct inode *inode = NULL;
+	struct nfs4_opendata *opendata;
 	int                     status;
-	struct nfs_fattr f_attr, dir_attr;
-	struct nfs_openargs o_arg = {
-		.fh             = NFS_FH(dir),
-		.open_flags	= flags,
-		.name           = &dentry->d_name,
-		.server         = server,
-		.bitmask = server->attr_bitmask,
-		.claim = NFS4_OPEN_CLAIM_NULL,
-	};
-	struct nfs_openres o_res = {
-		.f_attr         = &f_attr,
-		.dir_attr	= &dir_attr,
-		.server         = server,
-	};
 
 	/* Protect against reboot recovery conflicts */
-	down_read(&clp->cl_sem);
 	status = -ENOMEM;
 	if (!(sp = nfs4_get_state_owner(server, cred))) {
 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
 		goto out_err;
 	}
-	if (flags & O_EXCL) {
-		u32 *p = (u32 *) o_arg.u.verifier.data;
-		p[0] = jiffies;
-		p[1] = current->pid;
-	} else
-		o_arg.u.attrs = sattr;
-	/* Serialization for the sequence id */
+	status = nfs4_recover_expired_lease(server);
+	if (status != 0)
+		goto err_put_state_owner;
+	down_read(&clp->cl_sem);
+	status = -ENOMEM;
+	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+	if (opendata == NULL)
+		goto err_put_state_owner;
 
-	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
-	if (o_arg.seqid == NULL)
-		return -ENOMEM;
-	nfs_fattr_init(&f_attr);
-	nfs_fattr_init(&dir_attr);
-	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
+	status = _nfs4_proc_open(opendata);
 	if (status != 0)
-		goto out_err;
+		goto err_opendata_free;
 
 	status = -ENOMEM;
-	inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr);
-	if (!inode)
-		goto out_err;
-	state = nfs4_get_open_state(inode, sp);
-	if (!state)
-		goto out_err;
-	update_open_stateid(state, &o_res.stateid, flags);
-	if (o_res.delegation_type != 0)
-		nfs_inode_set_delegation(inode, cred, &o_res);
-	nfs_free_seqid(o_arg.seqid);
+	state = nfs4_opendata_to_nfs4_state(opendata);
+	if (state == NULL)
+		goto err_opendata_free;
+	if (opendata->o_res.delegation_type != 0)
+		nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
+	nfs4_opendata_free(opendata);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
+err_opendata_free:
+	nfs4_opendata_free(opendata);
+err_put_state_owner:
+	nfs4_put_state_owner(sp);
 out_err:
-	if (sp != NULL) {
-		if (state != NULL)
-			nfs4_put_open_state(state);
-		nfs_free_seqid(o_arg.seqid);
-		nfs4_put_state_owner(sp);
-	}
 	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
 	up_read(&clp->cl_sem);
-	if (inode != NULL)
-		iput(inode);
 	*res = NULL;
 	return status;
 }
@@ -830,6 +1036,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
                 .rpc_argp       = &arg,
                 .rpc_resp       = &res,
         };
+	unsigned long timestamp = jiffies;
 	int status;
 
 	nfs_fattr_init(fattr);
@@ -841,6 +1048,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
 	status = rpc_call_sync(server->client, &msg, 0);
+	if (status == 0 && state != NULL)
+		renew_lease(server, timestamp);
 	return status;
 }
 
@@ -865,12 +1074,13 @@ struct nfs4_closedata {
 	struct nfs_closeargs arg;
 	struct nfs_closeres res;
 	struct nfs_fattr fattr;
+	unsigned long timestamp;
 };
 
-static void nfs4_free_closedata(struct nfs4_closedata *calldata)
+static void nfs4_free_closedata(void *data)
 {
-	struct nfs4_state *state = calldata->state;
-	struct nfs4_state_owner *sp = state->owner;
+	struct nfs4_closedata *calldata = data;
+	struct nfs4_state_owner *sp = calldata->state->owner;
 
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
@@ -878,12 +1088,14 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata)
 	kfree(calldata);
 }
 
-static void nfs4_close_done(struct rpc_task *task)
+static void nfs4_close_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
+	if (RPC_ASSASSINATED(task))
+		return;
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
@@ -892,6 +1104,7 @@ static void nfs4_close_done(struct rpc_task *task)
 		case 0:
 			memcpy(&state->stateid, &calldata->res.stateid,
 					sizeof(state->stateid));
+			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
@@ -904,12 +1117,11 @@ static void nfs4_close_done(struct rpc_task *task)
 			}
 	}
 	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
-	nfs4_free_closedata(calldata);
 }
 
-static void nfs4_close_begin(struct rpc_task *task)
+static void nfs4_close_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
@@ -918,10 +1130,8 @@ static void nfs4_close_begin(struct rpc_task *task)
 		.rpc_cred = state->owner->so_cred,
 	};
 	int mode = 0, old_mode;
-	int status;
 
-	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
-	if (status != 0)
+	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
 	/* Recalculate the new open mode in case someone reopened the file
 	 * while we were waiting in line to be scheduled.
@@ -929,26 +1139,34 @@ static void nfs4_close_begin(struct rpc_task *task)
 	spin_lock(&state->owner->so_lock);
 	spin_lock(&calldata->inode->i_lock);
 	mode = old_mode = state->state;
-	if (state->nreaders == 0)
-		mode &= ~FMODE_READ;
-	if (state->nwriters == 0)
-		mode &= ~FMODE_WRITE;
+	if (state->n_rdwr == 0) {
+		if (state->n_rdonly == 0)
+			mode &= ~FMODE_READ;
+		if (state->n_wronly == 0)
+			mode &= ~FMODE_WRITE;
+	}
 	nfs4_state_set_mode_locked(state, mode);
 	spin_unlock(&calldata->inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		nfs4_free_closedata(calldata);
-		task->tk_exit = NULL;
-		rpc_exit(task, 0);
+		/* Note: exit _without_ calling nfs4_close_done */
+		task->tk_action = NULL;
 		return;
 	}
 	nfs_fattr_init(calldata->res.fattr);
 	if (mode != 0)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 	calldata->arg.open_flags = mode;
+	calldata->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
 }
 
+static const struct rpc_call_ops nfs4_close_ops = {
+	.rpc_call_prepare = nfs4_close_prepare,
+	.rpc_call_done = nfs4_close_done,
+	.rpc_release = nfs4_free_closedata,
+};
+
 /* 
  * It is possible for data to be read/written from a mem-mapped file 
  * after the sys_close call (which hits the vfs layer as a flush).
@@ -981,8 +1199,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.server = server;
 
-	status = nfs4_call_async(server->client, nfs4_close_begin,
-			nfs4_close_done, calldata);
+	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
 	if (status == 0)
 		goto out;
 
@@ -1034,7 +1251,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 			d_add(dentry, NULL);
 		return (struct dentry *)state;
 	}
-	res = d_add_unique(dentry, state->inode);
+	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		dentry = res;
 	nfs4_intent_set_file(nd, dentry, state);
@@ -1046,7 +1263,6 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
-	struct inode *inode;
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	if (IS_ERR(cred))
@@ -1070,9 +1286,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 		}
 		goto out_drop;
 	}
-	inode = state->inode;
-	iput(inode);
-	if (inode == dentry->d_inode) {
+	if (state->inode == dentry->d_inode) {
 		nfs4_intent_set_file(nd, dentry, state);
 		return 1;
 	}
@@ -1508,11 +1722,13 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata)
 
 	wdata->args.bitmask = server->attr_bitmask;
 	wdata->res.server = server;
+	wdata->timestamp = jiffies;
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, rpcflags);
 	dprintk("NFS reply write: %d\n", status);
 	if (status < 0)
 		return status;
+	renew_lease(server, wdata->timestamp);
 	nfs_post_op_update_inode(inode, fattr);
 	return wdata->res.count;
 }
@@ -1547,8 +1763,11 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata)
 
 	cdata->args.bitmask = server->attr_bitmask;
 	cdata->res.server = server;
+	cdata->timestamp = jiffies;
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, 0);
+	if (status >= 0)
+		renew_lease(server, cdata->timestamp);
 	dprintk("NFS reply commit: %d\n", status);
 	if (status >= 0)
 		nfs_post_op_update_inode(inode, fattr);
@@ -1601,7 +1820,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(state);
 		goto out;
 	}
-	d_instantiate(dentry, state->inode);
+	d_instantiate(dentry, igrab(state->inode));
 	if (flags & O_EXCL) {
 		struct nfs_fattr fattr;
 		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
@@ -2125,10 +2344,9 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
-static void
-nfs4_read_done(struct rpc_task *task)
+static void nfs4_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 	struct inode *inode = data->inode;
 
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2138,9 +2356,14 @@ nfs4_read_done(struct rpc_task *task)
 	if (task->tk_status > 0)
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_read_ops = {
+	.rpc_call_done = nfs4_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs4_proc_read_setup(struct nfs_read_data *data)
 {
@@ -2160,14 +2383,13 @@ nfs4_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs4_write_done(struct rpc_task *task)
+static void nfs4_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2179,9 +2401,14 @@ nfs4_write_done(struct rpc_task *task)
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
 	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_write_ops = {
+	.rpc_call_done = nfs4_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -2214,14 +2441,13 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs4_commit_done(struct rpc_task *task)
+static void nfs4_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2231,9 +2457,14 @@ nfs4_commit_done(struct rpc_task *task)
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	/* Call back common NFS writeback processing */
-	nfs_commit_done(task);
+	nfs_commit_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_commit_ops = {
+	.rpc_call_done = nfs4_commit_done,
+	.rpc_release = nfs_commit_release,
+};
+
 static void
 nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
@@ -2255,7 +2486,7 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
 	rpc_call_setup(task, &msg, 0);	
 }
 
@@ -2263,11 +2494,10 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
  * standalone procedure for queueing an asynchronous RENEW.
  */
-static void
-renew_done(struct rpc_task *task)
+static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
-	unsigned long timestamp = (unsigned long)task->tk_calldata;
+	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
 		switch (task->tk_status) {
@@ -2284,26 +2514,28 @@ renew_done(struct rpc_task *task)
 	spin_unlock(&clp->cl_lock);
 }
 
-int
-nfs4_proc_async_renew(struct nfs4_client *clp)
+static const struct rpc_call_ops nfs4_renew_ops = {
+	.rpc_call_done = nfs4_renew_done,
+};
+
+int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
 		.rpc_argp	= clp,
-		.rpc_cred	= clp->cl_cred,
+		.rpc_cred	= cred,
 	};
 
 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
-			renew_done, (void *)jiffies);
+			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int
-nfs4_proc_renew(struct nfs4_client *clp)
+int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
 		.rpc_argp	= clp,
-		.rpc_cred	= clp->cl_cred,
+		.rpc_cred	= cred,
 	};
 	unsigned long now = jiffies;
 	int status;
@@ -2519,7 +2751,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 		case -NFS4ERR_EXPIRED:
 			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
 			nfs4_schedule_state_recovery(clp);
-			if (test_bit(NFS4CLNT_OK, &clp->cl_state))
+			if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
 				rpc_wake_up_task(task);
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -2536,25 +2768,25 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 	return 0;
 }
 
+static int nfs4_wait_bit_interruptible(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
+}
+
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
 {
-	DEFINE_WAIT(wait);
 	sigset_t oldset;
-	int interruptible, res = 0;
+	int res;
 
 	might_sleep();
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	interruptible = TASK_UNINTERRUPTIBLE;
-	if (clnt->cl_intr)
-		interruptible = TASK_INTERRUPTIBLE;
-	prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
-	nfs4_schedule_state_recovery(clp);
-	if (clnt->cl_intr && signalled())
-		res = -ERESTARTSYS;
-	else if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
-		schedule();
-	finish_wait(&clp->cl_waitq, &wait);
+	res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
+			nfs4_wait_bit_interruptible,
+			TASK_INTERRUPTIBLE);
 	rpc_clnt_sigunmask(clnt, &oldset);
 	return res;
 }
@@ -2597,6 +2829,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
+			nfs4_schedule_state_recovery(clp);
 			ret = nfs4_wait_clnt_recover(server->client, clp);
 			if (ret == 0)
 				exception->retry = 1;
@@ -2613,7 +2846,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port)
+int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2624,7 +2857,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
 		.rpc_argp = &setclientid,
 		.rpc_resp = clp,
-		.rpc_cred = clp->cl_cred,
+		.rpc_cred = cred,
 	};
 	u32 *p;
 	int loop = 0;
@@ -2638,7 +2871,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
 				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
-				clp->cl_cred->cr_ops->cr_name,
+				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
 				sizeof(setclientid.sc_netid), "tcp");
@@ -2661,14 +2894,14 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 }
 
 int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
+nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
 		.rpc_argp = clp,
 		.rpc_resp = &fsinfo,
-		.rpc_cred = clp->cl_cred,
+		.rpc_cred = cred,
 	};
 	unsigned long now;
 	int status;
@@ -2679,24 +2912,92 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
 		spin_lock(&clp->cl_lock);
 		clp->cl_lease_time = fsinfo.lease_time * HZ;
 		clp->cl_last_renewal = now;
+		clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 		spin_unlock(&clp->cl_lock);
 	}
 	return status;
 }
 
-static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+struct nfs4_delegreturndata {
+	struct nfs4_delegreturnargs args;
+	struct nfs4_delegreturnres res;
+	struct nfs_fh fh;
+	nfs4_stateid stateid;
+	struct rpc_cred *cred;
+	unsigned long timestamp;
+	struct nfs_fattr fattr;
+	int rpc_status;
+};
+
+static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs4_delegreturnargs args = {
-		.fhandle = NFS_FH(inode),
-		.stateid = stateid,
-	};
+	struct nfs4_delegreturndata *data = calldata;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
-		.rpc_argp = &args,
-		.rpc_cred = cred,
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
 	};
+	nfs_fattr_init(data->res.fattr);
+	rpc_call_setup(task, &msg, 0);
+}
 
-	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegreturndata *data = calldata;
+	data->rpc_status = task->tk_status;
+	if (data->rpc_status == 0)
+		renew_lease(data->res.server, data->timestamp);
+}
+
+static void nfs4_delegreturn_release(void *calldata)
+{
+	struct nfs4_delegreturndata *data = calldata;
+
+	put_rpccred(data->cred);
+	kfree(calldata);
+}
+
+const static struct rpc_call_ops nfs4_delegreturn_ops = {
+	.rpc_call_prepare = nfs4_delegreturn_prepare,
+	.rpc_call_done = nfs4_delegreturn_done,
+	.rpc_release = nfs4_delegreturn_release,
+};
+
+static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+{
+	struct nfs4_delegreturndata *data;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_task *task;
+	int status;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+	data->args.fhandle = &data->fh;
+	data->args.stateid = &data->stateid;
+	data->args.bitmask = server->attr_bitmask;
+	nfs_copy_fh(&data->fh, NFS_FH(inode));
+	memcpy(&data->stateid, stateid, sizeof(data->stateid));
+	data->res.fattr = &data->fattr;
+	data->res.server = server;
+	data->cred = get_rpccred(cred);
+	data->timestamp = jiffies;
+	data->rpc_status = 0;
+
+	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
+	if (IS_ERR(task)) {
+		nfs4_delegreturn_release(data);
+		return PTR_ERR(task);
+	}
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status == 0) {
+		status = data->rpc_status;
+		if (status == 0)
+			nfs_post_op_update_inode(inode, &data->fattr);
+	}
+	rpc_release_task(task);
+	return status;
 }
 
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
@@ -2734,43 +3035,17 @@ nfs4_set_lock_task_retry(unsigned long timeout)
 	return timeout;
 }
 
-static inline int
-nfs4_lck_type(int cmd, struct file_lock *request)
-{
-	/* set lock type */
-	switch (request->fl_type) {
-		case F_RDLCK:
-			return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT;
-		case F_WRLCK:
-			return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
-		case F_UNLCK:
-			return NFS4_WRITE_LT; 
-	}
-	BUG();
-	return 0;
-}
-
-static inline uint64_t
-nfs4_lck_length(struct file_lock *request)
-{
-	if (request->fl_end == OFFSET_MAX)
-		return ~(uint64_t)0;
-	return request->fl_end - request->fl_start + 1;
-}
-
 static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_client *clp = server->nfs4_state;
-	struct nfs_lockargs arg = {
+	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
+		.fl = request,
 	};
-	struct nfs_lockres res = {
-		.server = server,
+	struct nfs_lockt_res res = {
+		.denied = request,
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
@@ -2778,36 +3053,23 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		.rpc_resp       = &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
-	struct nfs_lowner nlo;
 	struct nfs4_lock_state *lsp;
 	int status;
 
 	down_read(&clp->cl_sem);
-	nlo.clientid = clp->cl_clientid;
+	arg.lock_owner.clientid = clp->cl_clientid;
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	nlo.id = lsp->ls_id; 
-	arg.u.lockt = &nlo;
+	arg.lock_owner.id = lsp->ls_id; 
 	status = rpc_call_sync(server->client, &msg, 0);
-	if (!status) {
-		request->fl_type = F_UNLCK;
-	} else if (status == -NFS4ERR_DENIED) {
-		int64_t len, start, end;
-		start = res.u.denied.offset;
-		len = res.u.denied.length;
-		end = start + len - 1;
-		if (end < 0 || len == 0)
-			request->fl_end = OFFSET_MAX;
-		else
-			request->fl_end = (loff_t)end;
-		request->fl_start = (loff_t)start;
-		request->fl_type = F_WRLCK;
-		if (res.u.denied.type & 1)
-			request->fl_type = F_RDLCK;
-		request->fl_pid = 0;
-		status = 0;
+	switch (status) {
+		case 0:
+			request->fl_type = F_UNLCK;
+			break;
+		case -NFS4ERR_DENIED:
+			status = 0;
 	}
 out:
 	up_read(&clp->cl_sem);
@@ -2847,196 +3109,314 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 }
 
 struct nfs4_unlockdata {
-	struct nfs_lockargs arg;
-	struct nfs_locku_opargs luargs;
-	struct nfs_lockres res;
+	struct nfs_locku_args arg;
+	struct nfs_locku_res res;
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
-	atomic_t refcount;
-	struct completion completion;
+	struct file_lock fl;
+	const struct nfs_server *server;
+	unsigned long timestamp;
 };
 
-static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
-{
-	if (atomic_dec_and_test(&calldata->refcount)) {
-		nfs_free_seqid(calldata->luargs.seqid);
-		nfs4_put_lock_state(calldata->lsp);
-		put_nfs_open_context(calldata->ctx);
-		kfree(calldata);
-	}
+static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
+		struct nfs_open_context *ctx,
+		struct nfs4_lock_state *lsp,
+		struct nfs_seqid *seqid)
+{
+	struct nfs4_unlockdata *p;
+	struct inode *inode = lsp->ls_state->inode;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return NULL;
+	p->arg.fh = NFS_FH(inode);
+	p->arg.fl = &p->fl;
+	p->arg.seqid = seqid;
+	p->arg.stateid = &lsp->ls_stateid;
+	p->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+	/* Ensure we don't close file until we're done freeing locks! */
+	p->ctx = get_nfs_open_context(ctx);
+	memcpy(&p->fl, fl, sizeof(p->fl));
+	p->server = NFS_SERVER(inode);
+	return p;
 }
 
-static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
+static void nfs4_locku_release_calldata(void *data)
 {
-	complete(&calldata->completion);
-	nfs4_locku_release_calldata(calldata);
+	struct nfs4_unlockdata *calldata = data;
+	nfs_free_seqid(calldata->arg.seqid);
+	nfs4_put_lock_state(calldata->lsp);
+	put_nfs_open_context(calldata->ctx);
+	kfree(calldata);
 }
 
-static void nfs4_locku_done(struct rpc_task *task)
+static void nfs4_locku_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+	struct nfs4_unlockdata *calldata = data;
 
-	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
+	if (RPC_ASSASSINATED(task))
+		return;
+	nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(calldata->lsp->ls_stateid.data,
-					calldata->res.u.stateid.data,
+					calldata->res.stateid.data,
 					sizeof(calldata->lsp->ls_stateid.data));
+			renew_lease(calldata->server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
 			break;
 		default:
-			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
+			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
 				rpc_restart_call(task);
-				return;
 			}
 	}
-	nfs4_locku_complete(calldata);
 }
 
-static void nfs4_locku_begin(struct rpc_task *task)
+static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+	struct nfs4_unlockdata *calldata = data;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
 		.rpc_argp       = &calldata->arg,
 		.rpc_resp       = &calldata->res,
 		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
 	};
-	int status;
 
-	status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
-	if (status != 0)
+	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
 	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
-		nfs4_locku_complete(calldata);
-		task->tk_exit = NULL;
-		rpc_exit(task, 0);
+		/* Note: exit _without_ running nfs4_locku_done */
+		task->tk_action = NULL;
 		return;
 	}
+	calldata->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
 }
 
+static const struct rpc_call_ops nfs4_locku_ops = {
+	.rpc_call_prepare = nfs4_locku_prepare,
+	.rpc_call_done = nfs4_locku_done,
+	.rpc_release = nfs4_locku_release_calldata,
+};
+
+static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
+		struct nfs_open_context *ctx,
+		struct nfs4_lock_state *lsp,
+		struct nfs_seqid *seqid)
+{
+	struct nfs4_unlockdata *data;
+	struct rpc_task *task;
+
+	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
+	if (data == NULL) {
+		nfs_free_seqid(seqid);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Unlock _before_ we do the RPC call */
+	do_vfs_lock(fl->fl_file, fl);
+	task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
+	if (IS_ERR(task))
+		nfs4_locku_release_calldata(data);
+	return task;
+}
+
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_unlockdata *calldata;
-	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_seqid *seqid;
 	struct nfs4_lock_state *lsp;
-	int status;
+	struct rpc_task *task;
+	int status = 0;
 
 	/* Is this a delegated lock? */
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return do_vfs_lock(request->fl_file, request);
+		goto out_unlock;
+	/* Is this open_owner holding any locks on the server? */
+	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+		goto out_unlock;
 
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
-		return status;
+		goto out_unlock;
 	lsp = request->fl_u.nfs4_fl.owner;
-	/* We might have lost the locks! */
-	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
-		return 0;
-	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
-	if (calldata == NULL)
-		return -ENOMEM;
-	calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	if (calldata->luargs.seqid == NULL) {
-		kfree(calldata);
-		return -ENOMEM;
-	}
-	calldata->luargs.stateid = &lsp->ls_stateid;
-	calldata->arg.fh = NFS_FH(inode);
-	calldata->arg.type = nfs4_lck_type(cmd, request);
-	calldata->arg.offset = request->fl_start;
-	calldata->arg.length = nfs4_lck_length(request);
-	calldata->arg.u.locku = &calldata->luargs;
-	calldata->res.server = server;
-	calldata->lsp = lsp;
-	atomic_inc(&lsp->ls_count);
-
-	/* Ensure we don't close file until we're done freeing locks! */
-	calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
-
-	atomic_set(&calldata->refcount, 2);
-	init_completion(&calldata->completion);
-
-	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
-			nfs4_locku_done, calldata);
-	if (status == 0)
-		wait_for_completion_interruptible(&calldata->completion);
+	status = -ENOMEM;
+	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (seqid == NULL)
+		goto out_unlock;
+	task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
+	status = PTR_ERR(task);
+	if (IS_ERR(task))
+		goto out_unlock;
+	status = nfs4_wait_for_completion_rpc_task(task);
+	rpc_release_task(task);
+	return status;
+out_unlock:
 	do_vfs_lock(request->fl_file, request);
-	nfs4_locku_release_calldata(calldata);
 	return status;
 }
 
-static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
+struct nfs4_lockdata {
+	struct nfs_lock_args arg;
+	struct nfs_lock_res res;
+	struct nfs4_lock_state *lsp;
+	struct nfs_open_context *ctx;
+	struct file_lock fl;
+	unsigned long timestamp;
+	int rpc_status;
+	int cancelled;
+};
+
+static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
+		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp)
 {
-	struct inode *inode = state->inode;
+	struct nfs4_lockdata *p;
+	struct inode *inode = lsp->ls_state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
-	struct nfs_lock_opargs largs = {
-		.lock_stateid = &lsp->ls_stateid,
-		.open_stateid = &state->stateid,
-		.lock_owner = {
-			.clientid = server->nfs4_state->cl_clientid,
-			.id = lsp->ls_id,
-		},
-		.reclaim = reclaim,
-	};
-	struct nfs_lockargs arg = {
-		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
-		.u = {
-			.lock = &largs,
-		},
-	};
-	struct nfs_lockres res = {
-		.server = server,
-	};
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return NULL;
+
+	p->arg.fh = NFS_FH(inode);
+	p->arg.fl = &p->fl;
+	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (p->arg.lock_seqid == NULL)
+		goto out_free;
+	p->arg.lock_stateid = &lsp->ls_stateid;
+	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.id = lsp->ls_id;
+	p->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+	p->ctx = get_nfs_open_context(ctx);
+	memcpy(&p->fl, fl, sizeof(p->fl));
+	return p;
+out_free:
+	kfree(p);
+	return NULL;
+}
+
+static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_lockdata *data = calldata;
+	struct nfs4_state *state = data->lsp->ls_state;
+	struct nfs4_state_owner *sp = state->owner;
 	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCK],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= state->owner->so_cred,
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+		.rpc_argp = &data->arg,
+		.rpc_resp = &data->res,
+		.rpc_cred = sp->so_cred,
 	};
-	int status = -ENOMEM;
-
-	largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	if (largs.lock_seqid == NULL)
-		return -ENOMEM;
-	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
-		struct nfs4_state_owner *owner = state->owner;
 
-		largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
-		if (largs.open_seqid == NULL)
+	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
+		return;
+	dprintk("%s: begin!\n", __FUNCTION__);
+	/* Do we need to do an open_to_lock_owner? */
+	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
+		data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid);
+		if (data->arg.open_seqid == NULL) {
+			data->rpc_status = -ENOMEM;
+			task->tk_action = NULL;
 			goto out;
-		largs.new_lock_owner = 1;
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment open seqid on success, and seqid mutating errors */
-		if (largs.new_lock_owner != 0) {
-			nfs_increment_open_seqid(status, largs.open_seqid);
-			if (status == 0)
-				nfs_confirm_seqid(&lsp->ls_seqid, 0);
 		}
-		nfs_free_seqid(largs.open_seqid);
-	} else
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	/* increment lock seqid on success, and seqid mutating errors*/
-	nfs_increment_lock_seqid(status, largs.lock_seqid);
-	/* save the returned stateid. */
-	if (status == 0) {
-		memcpy(lsp->ls_stateid.data, res.u.stateid.data,
-				sizeof(lsp->ls_stateid.data));
-		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-	} else if (status == -NFS4ERR_DENIED)
-		status = -EAGAIN;
+		data->arg.open_stateid = &state->stateid;
+		data->arg.new_lock_owner = 1;
+	}
+	data->timestamp = jiffies;
+	rpc_call_setup(task, &msg, 0);
 out:
-	nfs_free_seqid(largs.lock_seqid);
-	return status;
+	dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
+}
+
+static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_lockdata *data = calldata;
+
+	dprintk("%s: begin!\n", __FUNCTION__);
+
+	data->rpc_status = task->tk_status;
+	if (RPC_ASSASSINATED(task))
+		goto out;
+	if (data->arg.new_lock_owner != 0) {
+		nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid);
+		if (data->rpc_status == 0)
+			nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
+		else
+			goto out;
+	}
+	if (data->rpc_status == 0) {
+		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+					sizeof(data->lsp->ls_stateid.data));
+		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+		renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+	}
+	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
+out:
+	dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status);
+}
+
+static void nfs4_lock_release(void *calldata)
+{
+	struct nfs4_lockdata *data = calldata;
+
+	dprintk("%s: begin!\n", __FUNCTION__);
+	if (data->arg.open_seqid != NULL)
+		nfs_free_seqid(data->arg.open_seqid);
+	if (data->cancelled != 0) {
+		struct rpc_task *task;
+		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
+				data->arg.lock_seqid);
+		if (!IS_ERR(task))
+			rpc_release_task(task);
+		dprintk("%s: cancelling lock!\n", __FUNCTION__);
+	} else
+		nfs_free_seqid(data->arg.lock_seqid);
+	nfs4_put_lock_state(data->lsp);
+	put_nfs_open_context(data->ctx);
+	kfree(data);
+	dprintk("%s: done!\n", __FUNCTION__);
+}
+
+static const struct rpc_call_ops nfs4_lock_ops = {
+	.rpc_call_prepare = nfs4_lock_prepare,
+	.rpc_call_done = nfs4_lock_done,
+	.rpc_release = nfs4_lock_release,
+};
+
+static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim)
+{
+	struct nfs4_lockdata *data;
+	struct rpc_task *task;
+	int ret;
+
+	dprintk("%s: begin!\n", __FUNCTION__);
+	data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data,
+			fl->fl_u.nfs4_fl.owner);
+	if (data == NULL)
+		return -ENOMEM;
+	if (IS_SETLKW(cmd))
+		data->arg.block = 1;
+	if (reclaim != 0)
+		data->arg.reclaim = 1;
+	task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
+			&nfs4_lock_ops, data);
+	if (IS_ERR(task)) {
+		nfs4_lock_release(data);
+		return PTR_ERR(task);
+	}
+	ret = nfs4_wait_for_completion_rpc_task(task);
+	if (ret == 0) {
+		ret = data->rpc_status;
+		if (ret == -NFS4ERR_DENIED)
+			ret = -EAGAIN;
+	} else
+		data->cancelled = 1;
+	rpc_release_task(task);
+	dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret);
+	return ret;
 }
 
 static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index a3001628ad32..5d764d8e6d8a 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -54,6 +54,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include "nfs4_fs.h"
+#include "delegation.h"
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
@@ -61,6 +62,7 @@ void
 nfs4_renew_state(void *data)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
 
@@ -68,7 +70,7 @@ nfs4_renew_state(void *data)
 	dprintk("%s: start\n", __FUNCTION__);
 	/* Are there any active superblocks? */
 	if (list_empty(&clp->cl_superblocks))
-		goto out; 
+		goto out;
 	spin_lock(&clp->cl_lock);
 	lease = clp->cl_lease_time;
 	last = clp->cl_last_renewal;
@@ -76,9 +78,17 @@ nfs4_renew_state(void *data)
 	timeout = (2 * lease) / 3 + (long)last - (long)now;
 	/* Are we close to a lease timeout? */
 	if (time_after(now, last + lease/3)) {
+		cred = nfs4_get_renew_cred(clp);
+		if (cred == NULL) {
+			set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+			spin_unlock(&clp->cl_lock);
+			nfs_expire_all_delegations(clp);
+			goto out;
+		}
 		spin_unlock(&clp->cl_lock);
 		/* Queue an asynchronous RENEW. */
-		nfs4_proc_async_renew(clp);
+		nfs4_proc_async_renew(clp, cred);
+		put_rpccred(cred);
 		timeout = (2 * lease) / 3;
 		spin_lock(&clp->cl_lock);
 	} else
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ef4c57618fe..afad0255e7db 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -43,6 +43,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
@@ -57,8 +59,6 @@ const nfs4_stateid zero_stateid;
 static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
-static void nfs4_recover_state(void *);
-
 void
 init_nfsv4_state(struct nfs_server *server)
 {
@@ -91,11 +91,10 @@ nfs4_alloc_client(struct in_addr *addr)
 
 	if (nfs_callback_up() < 0)
 		return NULL;
-	if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
 		nfs_callback_down();
 		return NULL;
 	}
-	memset(clp, 0, sizeof(*clp));
 	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
 	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
@@ -103,14 +102,12 @@ nfs4_alloc_client(struct in_addr *addr)
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
 	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
 	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
 	INIT_LIST_HEAD(&clp->cl_superblocks);
-	init_waitqueue_head(&clp->cl_waitq);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_OK;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 	return clp;
 }
 
@@ -127,8 +124,6 @@ nfs4_free_client(struct nfs4_client *clp)
 		kfree(sp);
 	}
 	BUG_ON(!list_empty(&clp->cl_state_owners));
-	if (clp->cl_cred)
-		put_rpccred(clp->cl_cred);
 	nfs_idmap_delete(clp);
 	if (!IS_ERR(clp->cl_rpcclient))
 		rpc_shutdown_client(clp->cl_rpcclient);
@@ -193,27 +188,22 @@ nfs4_put_client(struct nfs4_client *clp)
 	list_del(&clp->cl_servers);
 	spin_unlock(&state_spinlock);
 	BUG_ON(!list_empty(&clp->cl_superblocks));
-	wake_up_all(&clp->cl_waitq);
 	rpc_wake_up(&clp->cl_rpcwaitq);
 	nfs4_kill_renewd(clp);
 	nfs4_free_client(clp);
 }
 
-static int __nfs4_init_client(struct nfs4_client *clp)
+static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
 {
-	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
+	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
+			nfs_callback_tcpport, cred);
 	if (status == 0)
-		status = nfs4_proc_setclientid_confirm(clp);
+		status = nfs4_proc_setclientid_confirm(clp, cred);
 	if (status == 0)
 		nfs4_schedule_state_renewal(clp);
 	return status;
 }
 
-int nfs4_init_client(struct nfs4_client *clp)
-{
-	return nfs4_map_errors(__nfs4_init_client(clp));
-}
-
 u32
 nfs4_alloc_lockowner_id(struct nfs4_client *clp)
 {
@@ -235,6 +225,32 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
 	return sp;
 }
 
+struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+	struct rpc_cred *cred = NULL;
+
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		if (list_empty(&sp->so_states))
+			continue;
+		cred = get_rpccred(sp->so_cred);
+		break;
+	}
+	return cred;
+}
+
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+
+	if (!list_empty(&clp->cl_state_owners)) {
+		sp = list_entry(clp->cl_state_owners.next,
+				struct nfs4_state_owner, so_list);
+		return get_rpccred(sp->so_cred);
+	}
+	return NULL;
+}
+
 static struct nfs4_state_owner *
 nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
 {
@@ -349,14 +365,9 @@ nfs4_alloc_open_state(void)
 {
 	struct nfs4_state *state;
 
-	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state)
 		return NULL;
-	state->state = 0;
-	state->nreaders = 0;
-	state->nwriters = 0;
-	state->flags = 0;
-	memset(state->stateid.data, 0, sizeof(state->stateid.data));
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	spin_lock_init(&state->state_lock);
@@ -475,15 +486,23 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
 	spin_lock(&inode->i_lock);
-	if (mode & FMODE_READ)
-		state->nreaders--;
-	if (mode & FMODE_WRITE)
-		state->nwriters--;
+	switch (mode & (FMODE_READ | FMODE_WRITE)) {
+		case FMODE_READ:
+			state->n_rdonly--;
+			break;
+		case FMODE_WRITE:
+			state->n_wronly--;
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			state->n_rdwr--;
+	}
 	oldstate = newstate = state->state;
-	if (state->nreaders == 0)
-		newstate &= ~FMODE_READ;
-	if (state->nwriters == 0)
-		newstate &= ~FMODE_WRITE;
+	if (state->n_rdwr == 0) {
+		if (state->n_rdonly == 0)
+			newstate &= ~FMODE_READ;
+		if (state->n_wronly == 0)
+			newstate &= ~FMODE_WRITE;
+	}
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		nfs4_state_set_mode_locked(state, newstate);
 		oldstate = newstate;
@@ -733,45 +752,43 @@ out:
 }
 
 static int reclaimer(void *);
-struct reclaimer_args {
-	struct nfs4_client *clp;
-	struct completion complete;
-};
+
+static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+{
+	smp_mb__before_clear_bit();
+	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+}
 
 /*
  * State recovery routine
  */
-void
-nfs4_recover_state(void *data)
+static void nfs4_recover_state(struct nfs4_client *clp)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
-	struct reclaimer_args args = {
-		.clp = clp,
-	};
-	might_sleep();
-
-	init_completion(&args.complete);
+	struct task_struct *task;
 
-	if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
-		goto out_failed_clear;
-	wait_for_completion(&args.complete);
-	return;
-out_failed_clear:
-	set_bit(NFS4CLNT_OK, &clp->cl_state);
-	wake_up_all(&clp->cl_waitq);
-	rpc_wake_up(&clp->cl_rpcwaitq);
+	__module_get(THIS_MODULE);
+	atomic_inc(&clp->cl_count);
+	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
+			NIPQUAD(clp->cl_addr));
+	if (!IS_ERR(task))
+		return;
+	nfs4_clear_recover_bit(clp);
+	nfs4_put_client(clp);
+	module_put(THIS_MODULE);
 }
 
 /*
  * Schedule a state recovery attempt
  */
-void
-nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs4_client *clp)
 {
 	if (!clp)
 		return;
-	if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
-		schedule_work(&clp->cl_recoverd);
+	if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
+		nfs4_recover_state(clp);
 }
 
 static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
@@ -887,18 +904,14 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 
 static int reclaimer(void *ptr)
 {
-	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
-	struct nfs4_client *clp = args->clp;
+	struct nfs4_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
+	struct rpc_cred *cred;
 	int status = 0;
 
-	daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
 	allow_signal(SIGKILL);
 
-	atomic_inc(&clp->cl_count);
-	complete(&args->complete);
-
 	/* Ensure exclusive access to NFSv4 state */
 	lock_kernel();
 	down_write(&clp->cl_sem);
@@ -906,20 +919,33 @@ static int reclaimer(void *ptr)
 	if (list_empty(&clp->cl_superblocks))
 		goto out;
 restart_loop:
-	status = nfs4_proc_renew(clp);
-	switch (status) {
-		case 0:
-		case -NFS4ERR_CB_PATH_DOWN:
-			goto out;
-		case -NFS4ERR_STALE_CLIENTID:
-		case -NFS4ERR_LEASE_MOVED:
-			ops = &nfs4_reboot_recovery_ops;
-			break;
-		default:
-			ops = &nfs4_network_partition_recovery_ops;
-	};
+	ops = &nfs4_network_partition_recovery_ops;
+	/* Are there any open files on this volume? */
+	cred = nfs4_get_renew_cred(clp);
+	if (cred != NULL) {
+		/* Yes there are: try to renew the old lease */
+		status = nfs4_proc_renew(clp, cred);
+		switch (status) {
+			case 0:
+			case -NFS4ERR_CB_PATH_DOWN:
+				put_rpccred(cred);
+				goto out;
+			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_LEASE_MOVED:
+				ops = &nfs4_reboot_recovery_ops;
+		}
+	} else {
+		/* "reboot" to ensure we clear all state on the server */
+		clp->cl_boot_time = CURRENT_TIME;
+		cred = nfs4_get_setclientid_cred(clp);
+	}
+	/* We're going to have to re-establish a clientid */
 	nfs4_state_mark_reclaim(clp);
-	status = __nfs4_init_client(clp);
+	status = -ENOENT;
+	if (cred != NULL) {
+		status = nfs4_init_client(clp, cred);
+		put_rpccred(cred);
+	}
 	if (status)
 		goto out_error;
 	/* Mark all delegations for reclaim */
@@ -940,14 +966,13 @@ restart_loop:
 	}
 	nfs_delegation_reap_unclaimed(clp);
 out:
-	set_bit(NFS4CLNT_OK, &clp->cl_state);
 	up_write(&clp->cl_sem);
 	unlock_kernel();
-	wake_up_all(&clp->cl_waitq);
-	rpc_wake_up(&clp->cl_rpcwaitq);
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
+	nfs4_clear_recover_bit(clp);
 	nfs4_put_client(clp);
+	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fbbace8a30c4..4bbf5ef57785 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -392,9 +392,11 @@ static int nfs_stat_to_errno(int);
 				decode_getattr_maxsz)
 #define NFS4_enc_delegreturn_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				encode_delegreturn_maxsz)
+				encode_delegreturn_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
-				decode_delegreturn_maxsz)
+				decode_delegreturn_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
@@ -564,7 +566,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	}
 	if (iap->ia_valid & ATTR_MODE) {
 		bmval1 |= FATTR4_WORD1_MODE;
-		WRITE32(iap->ia_mode);
+		WRITE32(iap->ia_mode & S_IALLUGO);
 	}
 	if (iap->ia_valid & ATTR_UID) {
 		bmval1 |= FATTR4_WORD1_OWNER;
@@ -742,69 +744,80 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
 	return 0;
 }
 
+static inline int nfs4_lock_type(struct file_lock *fl, int block)
+{
+	if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK)
+		return block ? NFS4_READW_LT : NFS4_READ_LT;
+	return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
+}
+
+static inline uint64_t nfs4_lock_length(struct file_lock *fl)
+{
+	if (fl->fl_end == OFFSET_MAX)
+		return ~(uint64_t)0;
+	return fl->fl_end - fl->fl_start + 1;
+}
+
 /*
  * opcode,type,reclaim,offset,length,new_lock_owner = 32
  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
  */
-static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 {
 	uint32_t *p;
-	struct nfs_lock_opargs *opargs = arg->u.lock;
 
 	RESERVE_SPACE(32);
 	WRITE32(OP_LOCK);
-	WRITE32(arg->type); 
-	WRITE32(opargs->reclaim);
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
-	WRITE32(opargs->new_lock_owner);
-	if (opargs->new_lock_owner){
+	WRITE32(nfs4_lock_type(args->fl, args->block));
+	WRITE32(args->reclaim);
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
+	WRITE32(args->new_lock_owner);
+	if (args->new_lock_owner){
 		RESERVE_SPACE(40);
-		WRITE32(opargs->open_seqid->sequence->counter);
-		WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
-		WRITE32(opargs->lock_seqid->sequence->counter);
-		WRITE64(opargs->lock_owner.clientid);
+		WRITE32(args->open_seqid->sequence->counter);
+		WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data));
+		WRITE32(args->lock_seqid->sequence->counter);
+		WRITE64(args->lock_owner.clientid);
 		WRITE32(4);
-		WRITE32(opargs->lock_owner.id);
+		WRITE32(args->lock_owner.id);
 	}
 	else {
 		RESERVE_SPACE(20);
-		WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
-		WRITE32(opargs->lock_seqid->sequence->counter);
+		WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data));
+		WRITE32(args->lock_seqid->sequence->counter);
 	}
 
 	return 0;
 }
 
-static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args)
 {
 	uint32_t *p;
-	struct nfs_lowner *opargs = arg->u.lockt;
 
 	RESERVE_SPACE(40);
 	WRITE32(OP_LOCKT);
-	WRITE32(arg->type);
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
-	WRITE64(opargs->clientid);
+	WRITE32(nfs4_lock_type(args->fl, 0));
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
+	WRITE64(args->lock_owner.clientid);
 	WRITE32(4);
-	WRITE32(opargs->id);
+	WRITE32(args->lock_owner.id);
 
 	return 0;
 }
 
-static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args)
 {
 	uint32_t *p;
-	struct nfs_locku_opargs *opargs = arg->u.locku;
 
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
-	WRITE32(arg->type);
-	WRITE32(opargs->seqid->sequence->counter);
-	WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
+	WRITE32(nfs4_lock_type(args->fl, 0));
+	WRITE32(args->seqid->sequence->counter);
+	WRITEMEM(args->stateid->data, sizeof(args->stateid->data));
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
 
 	return 0;
 }
@@ -964,9 +977,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_CONFIRM);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	WRITE32(arg->seqid->sequence->counter);
 
 	return 0;
@@ -1499,9 +1512,6 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1538,9 +1548,6 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1558,19 +1565,19 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 2,
+		.nops   = 3,
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
 	if (status)
 		goto out;
 	status = encode_open(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1602,21 +1609,14 @@ out:
 /*
  * Encode a LOCK request
  */
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lock_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
-	struct nfs_lock_opargs *opargs = args->u.lock;
 	int status;
 
-	status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
-	if (status != 0)
-		goto out;
-	/* Do we need to do an open_to_lock_owner? */
-	if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
-		opargs->new_lock_owner = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1630,7 +1630,7 @@ out:
 /*
  * Encode a LOCKT request
  */
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockt_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1651,7 +1651,7 @@ out:
 /*
  * Encode a LOCKU request
  */
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_locku_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1985,14 +1985,20 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const str
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	if ((status = encode_putfh(&xdr, args->fhandle)) == 0)
-		status = encode_delegreturn(&xdr, args->stateid);
+	status = encode_putfh(&xdr, args->fhandle);
+	if (status != 0)
+		goto out;
+	status = encode_delegreturn(&xdr, args->stateid);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
 	return status;
 }
 
@@ -2955,55 +2961,64 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 /*
  * We create the owner, so we know a proper owner.id length is 4.
  */
-static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied)
+static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 {
+	uint64_t offset, length, clientid;
 	uint32_t *p;
-	uint32_t namelen;
+	uint32_t namelen, type;
 
 	READ_BUF(32);
-	READ64(denied->offset);
-	READ64(denied->length);
-	READ32(denied->type);
-	READ64(denied->owner.clientid);
+	READ64(offset);
+	READ64(length);
+	READ32(type);
+	if (fl != NULL) {
+		fl->fl_start = (loff_t)offset;
+		fl->fl_end = fl->fl_start + (loff_t)length - 1;
+		if (length == ~(uint64_t)0)
+			fl->fl_end = OFFSET_MAX;
+		fl->fl_type = F_WRLCK;
+		if (type & 1)
+			fl->fl_type = F_RDLCK;
+		fl->fl_pid = 0;
+	}
+	READ64(clientid);
 	READ32(namelen);
 	READ_BUF(namelen);
-	if (namelen == 4)
-		READ32(denied->owner.id);
 	return -NFS4ERR_DENIED;
 }
 
-static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
 {
 	uint32_t *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCK);
 	if (status == 0) {
-		READ_BUF(sizeof(res->u.stateid.data));
-		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
+		READ_BUF(sizeof(res->stateid.data));
+		COPYMEM(res->stateid.data, sizeof(res->stateid.data));
 	} else if (status == -NFS4ERR_DENIED)
-		return decode_lock_denied(xdr, &res->u.denied);
+		return decode_lock_denied(xdr, NULL);
 	return status;
 }
 
-static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
 {
 	int status;
 	status = decode_op_hdr(xdr, OP_LOCKT);
 	if (status == -NFS4ERR_DENIED)
-		return decode_lock_denied(xdr, &res->u.denied);
+		return decode_lock_denied(xdr, res->denied);
 	return status;
 }
 
-static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
 {
 	uint32_t *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
 	if (status == 0) {
-		READ_BUF(sizeof(res->u.stateid.data));
-		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
+		READ_BUF(sizeof(res->stateid.data));
+		COPYMEM(res->stateid.data, sizeof(res->stateid.data));
 	}
 	return status;
 }
@@ -3831,6 +3846,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct
         if (status)
                 goto out;
         status = decode_open(&xdr, res);
+        if (status)
+                goto out;
+	decode_getfattr(&xdr, res->f_attr, res->server);
 out:
         return status;
 }
@@ -3864,7 +3882,7 @@ out:
 /*
  * Decode LOCK response
  */
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lock_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3885,7 +3903,7 @@ out:
 /*
  * Decode LOCKT response
  */
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockt_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3906,7 +3924,7 @@ out:
 /*
  * Decode LOCKU response
  */
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_locku_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4174,7 +4192,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_delegreturnres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4182,11 +4200,14 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *d
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
-	if (status == 0) {
-		status = decode_putfh(&xdr);
-		if (status == 0)
-			status = decode_delegreturn(&xdr);
-	}
+	if (status != 0)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status != 0)
+		goto out;
+	status = decode_delegreturn(&xdr);
+	decode_getfattr(&xdr, res->fattr, res->server);
+out:
 	return status;
 }
 
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 1b272a135a31..e897e00c2c9d 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf)
 			case Opt_noacl:
 				nfs_data.flags |= NFS_MOUNT_NOACL;
 				break;
-			default : 
+			default:
+				printk(KERN_WARNING "Root-NFS: unknown "
+					"option: %s\n", p);
 				return 0;
 		}
 	}
@@ -296,8 +298,8 @@ static int __init root_nfs_name(char *name)
 	nfs_port          = -1;
 	nfs_data.version  = NFS_MOUNT_VERSION;
 	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
-	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
+	nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
 	nfs_data.acregmin = 3;
 	nfs_data.acregmax = 60;
 	nfs_data.acdirmin = 30;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e1e3ca5d746b..f5150d71c03d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -111,6 +111,9 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	};
 	int	status;
 
+	/* Mask out the non-modebit related stuff from attr->ia_mode */
+	sattr->ia_mode &= S_IALLUGO;
+
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
@@ -547,10 +550,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void
-nfs_read_done(struct rpc_task *task)
+static void nfs_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
@@ -560,9 +562,14 @@ nfs_read_done(struct rpc_task *task)
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs_read_ops = {
+	.rpc_call_done = nfs_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs_proc_read_setup(struct nfs_read_data *data)
 {
@@ -580,20 +587,24 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs_write_done(struct rpc_task *task)
+static void nfs_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs_write_ops = {
+	.rpc_call_done = nfs_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -614,7 +625,7 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5f20eafba8ec..05eb43fadf8e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -42,9 +42,8 @@ mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
-void nfs_readdata_release(struct rpc_task *task)
+void nfs_readdata_release(void *data)
 {
-        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
         nfs_readdata_free(data);
 }
 
@@ -84,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result;
 	struct nfs_read_data *rdata;
 
-	rdata = nfs_readdata_alloc();
+	rdata = nfs_readdata_alloc(1);
 	if (!rdata)
 		return -ENOMEM;
 
@@ -220,9 +219,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_readdata_release;
 
 	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 			data->task.tk_pid,
@@ -287,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 
 	nbytes = req->wb_bytes;
 	for(;;) {
-		data = nfs_readdata_alloc();
+		data = nfs_readdata_alloc(1);
 		if (!data)
 			goto out_bad;
 		INIT_LIST_HEAD(&data->pages);
@@ -343,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
 		return nfs_pagein_multi(head, inode);
 
-	data = nfs_readdata_alloc();
+	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
 	if (!data)
 		goto out_bad;
 
@@ -452,9 +448,9 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task)
+void nfs_readpage_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 	int status = task->tk_status;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
new file mode 100644
index 000000000000..4c486eb867ca
--- /dev/null
+++ b/fs/nfs/sysctl.c
@@ -0,0 +1,84 @@
+/*
+ * linux/fs/nfs/sysctl.c
+ *
+ * Sysctl interface to NFS parameters
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_idmap.h>
+
+#include "callback.h"
+
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+static struct ctl_table_header *nfs_callback_sysctl_table;
+/*
+ * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
+ * Use the same values as fs/lockd/svc.c
+ */
+#define CTL_UNNUMBERED -2
+
+static ctl_table nfs_cb_sysctls[] = {
+#ifdef CONFIG_NFS_V4
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "nfs_callback_tcpport",
+		.data = &nfs_callback_set_tcpport,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (int *)&nfs_set_port_min,
+		.extra2 = (int *)&nfs_set_port_max,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "idmap_cache_timeout",
+		.data = &nfs_idmap_cache_timeout,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_jiffies,
+		.strategy = &sysctl_jiffies,
+	},
+#endif
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_cb_sysctl_dir[] = {
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "nfs",
+		.mode = 0555,
+		.child = nfs_cb_sysctls,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_cb_sysctl_root[] = {
+	{
+		.ctl_name = CTL_FS,
+		.procname = "fs",
+		.mode = 0555,
+		.child = nfs_cb_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+
+int nfs_register_sysctl(void)
+{
+	nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0);
+	if (nfs_callback_sysctl_table == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void nfs_unregister_sysctl(void)
+{
+	unregister_sysctl_table(nfs_callback_sysctl_table);
+	nfs_callback_sysctl_table = NULL;
+}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index d639d172d568..a65c7b53d558 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -87,10 +87,9 @@ nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
  * We delay initializing RPC info until after the call to dentry_iput()
  * in order to minimize races against rename().
  */
-static void
-nfs_async_unlink_init(struct rpc_task *task)
+static void nfs_async_unlink_init(struct rpc_task *task, void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	struct dentry		*dir = data->dir;
 	struct rpc_message	msg = {
 		.rpc_cred	= data->cred,
@@ -116,10 +115,9 @@ nfs_async_unlink_init(struct rpc_task *task)
  *
  * Do the directory attribute update.
  */
-static void
-nfs_async_unlink_done(struct rpc_task *task)
+static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	struct dentry		*dir = data->dir;
 	struct inode		*dir_i;
 
@@ -141,13 +139,18 @@ nfs_async_unlink_done(struct rpc_task *task)
  * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
  * rpc_task would be freed too.
  */
-static void
-nfs_async_unlink_release(struct rpc_task *task)
+static void nfs_async_unlink_release(void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	nfs_put_unlinkdata(data);
 }
 
+static const struct rpc_call_ops nfs_unlink_ops = {
+	.rpc_call_prepare = nfs_async_unlink_init,
+	.rpc_call_done = nfs_async_unlink_done,
+	.rpc_release = nfs_async_unlink_release,
+};
+
 /**
  * nfs_async_unlink - asynchronous unlinking of a file
  * @dentry: dentry to unlink
@@ -157,7 +160,6 @@ nfs_async_unlink(struct dentry *dentry)
 {
 	struct dentry	*dir = dentry->d_parent;
 	struct nfs_unlinkdata	*data;
-	struct rpc_task	*task;
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
@@ -178,17 +180,13 @@ nfs_async_unlink(struct dentry *dentry)
 	nfs_deletes = data;
 	data->count = 1;
 
-	task = &data->task;
-	rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
-	task->tk_calldata = data;
-	task->tk_action = nfs_async_unlink_init;
-	task->tk_release = nfs_async_unlink_release;
+	rpc_init_task(&data->task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data);
 
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
 
-	rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
+	rpc_sleep_on(&nfs_delete_queue, &data->task, NULL, NULL);
 	status = 0;
  out:
 	return status;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3107908e5f3f..9449b6835509 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,24 +89,38 @@ static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(void)
+static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_commit_mempool);
+				p = NULL;
+			}
+		}
 	}
 	return p;
 }
 
 static inline void nfs_commit_free(struct nfs_write_data *p)
 {
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
-static void nfs_writedata_release(struct rpc_task *task)
+void nfs_writedata_release(void *wdata)
 {
-	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_writedata_free(wdata);
 }
 
@@ -168,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result, written = 0;
 	struct nfs_write_data *wdata;
 
-	wdata = nfs_writedata_alloc();
+	wdata = nfs_writedata_alloc(1);
 	if (!wdata)
 		return -ENOMEM;
 
@@ -232,19 +246,16 @@ static int nfs_writepage_async(struct nfs_open_context *ctx,
 		unsigned int offset, unsigned int count)
 {
 	struct nfs_page	*req;
-	int		status;
 
 	req = nfs_update_request(ctx, inode, page, offset, count);
-	status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
-	if (status < 0)
-		goto out;
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	/* Set the PG_uptodate flag? */
 	nfs_mark_uptodate(page, offset, count);
 	nfs_unlock_request(req);
- out:
-	return status;
+	return 0;
 }
 
 static int wb_priority(struct writeback_control *wbc)
@@ -304,11 +315,8 @@ do_it:
 	lock_kernel();
 	if (!IS_SYNC(inode) && inode_referenced) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
-		if (err >= 0) {
-			err = 0;
-			if (wbc->for_reclaim)
-				nfs_flush_inode(inode, 0, 0, FLUSH_STABLE);
-		}
+		if (!wbc->for_writepages)
+			nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	} else {
 		err = nfs_writepage_sync(ctx, inode, page, 0,
 						offset, priority);
@@ -877,9 +885,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 
 	data->task.tk_priority = flush_task_priority(how);
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_writedata_release;
 
 	dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 		data->task.tk_pid,
@@ -919,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
 
 	nbytes = req->wb_bytes;
 	for (;;) {
-		data = nfs_writedata_alloc();
+		data = nfs_writedata_alloc(1);
 		if (!data)
 			goto out_bad;
 		list_add(&data->pages, &list);
@@ -983,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
 		return nfs_flush_multi(head, inode, how);
 
-	data = nfs_writedata_alloc();
+	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!data)
 		goto out_bad;
 
@@ -1137,9 +1142,9 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task)
+void nfs_writeback_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
 
@@ -1206,9 +1211,8 @@ void nfs_writeback_done(struct rpc_task *task)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static void nfs_commit_release(struct rpc_task *task)
+void nfs_commit_release(void *wdata)
 {
-	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_commit_free(wdata);
 }
 
@@ -1244,9 +1248,6 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 
 	data->task.tk_priority = flush_task_priority(how);
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_commit_release;
 	
 	dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid);
 }
@@ -1255,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
  * Commit dirty pages
  */
 static int
-nfs_commit_list(struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_write_data	*data;
 	struct nfs_page         *req;
 
-	data = nfs_commit_alloc();
+	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
 
 	if (!data)
 		goto out_bad;
@@ -1283,10 +1284,9 @@ nfs_commit_list(struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void
-nfs_commit_done(struct rpc_task *task)
+void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = (struct nfs_write_data *)task->tk_calldata;
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	int res = 0;
 
@@ -1366,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how)
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		error = nfs_commit_list(&head, how);
+		error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 	}
@@ -1377,22 +1377,23 @@ int nfs_commit_inode(struct inode *inode, int how)
 int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 		  unsigned int npages, int how)
 {
-	int	error,
-		wait;
+	int nocommit = how & FLUSH_NOCOMMIT;
+	int wait = how & FLUSH_WAIT;
+	int error;
 
-	wait = how & FLUSH_WAIT;
-	how &= ~FLUSH_WAIT;
+	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
 
 	do {
-		error = 0;
-		if (wait)
+		if (wait) {
 			error = nfs_wait_on_requests(inode, idx_start, npages);
-		if (error == 0)
-			error = nfs_flush_inode(inode, idx_start, npages, how);
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (error == 0)
+			if (error != 0)
+				continue;
+		}
+		error = nfs_flush_inode(inode, idx_start, npages, how);
+		if (error != 0)
+			continue;
+		if (!nocommit)
 			error = nfs_commit_inode(inode, how);
-#endif
 	} while (error > 0);
 	return error;
 }
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 041380fe667b..6d2dfed1de08 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -56,13 +56,20 @@ static int
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
-	int	nfserr;
+	int	err, nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
-				SVCFH_fmt(&argp->fh));
+		SVCFH_fmt(&argp->fh));
 
 	fh_copy(&resp->fh, &argp->fh);
 	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	if (nfserr)
+		RETURN_STATUS(nfserr);
+
+	err = vfs_getattr(resp->fh.fh_export->ex_mnt,
+			  resp->fh.fh_dentry, &resp->stat);
+	nfserr = nfserrno(err);
+
 	RETURN_STATUS(nfserr);
 }
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 9147b8524d05..243d94b9653a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -154,37 +154,34 @@ decode_sattr3(u32 *p, struct iattr *iap)
 }
 
 static inline u32 *
-encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+	      struct kstat *stat)
 {
-	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
 	struct dentry	*dentry = fhp->fh_dentry;
-	struct kstat stat;
 	struct timespec time;
 
-	vfs_getattr(mnt, dentry, &stat);
-
-	*p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]);
-	*p++ = htonl((u32) stat.mode);
-	*p++ = htonl((u32) stat.nlink);
-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
-	if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) {
+	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
+	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
 		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
 	} else {
-		p = xdr_encode_hyper(p, (u64) stat.size);
+		p = xdr_encode_hyper(p, (u64) stat->size);
 	}
-	p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9);
-	*p++ = htonl((u32) MAJOR(stat.rdev));
-	*p++ = htonl((u32) MINOR(stat.rdev));
+	p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
+	*p++ = htonl((u32) MAJOR(stat->rdev));
+	*p++ = htonl((u32) MINOR(stat->rdev));
 	if (is_fsid(fhp, rqstp->rq_reffh))
 		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
 	else
-		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev));
-	p = xdr_encode_hyper(p, (u64) stat.ino);
-	p = encode_time3(p, &stat.atime);
+		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev));
+	p = xdr_encode_hyper(p, (u64) stat->ino);
+	p = encode_time3(p, &stat->atime);
 	lease_get_mtime(dentry->d_inode, &time); 
 	p = encode_time3(p, &time);
-	p = encode_time3(p, &stat.ctime);
+	p = encode_time3(p, &stat->ctime);
 
 	return p;
 }
@@ -232,8 +229,14 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
 	if (dentry && dentry->d_inode != NULL) {
-		*p++ = xdr_one;		/* attributes follow */
-		return encode_fattr3(rqstp, p, fhp);
+	        int err;
+		struct kstat stat;
+
+		err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat);
+		if (!err) {
+			*p++ = xdr_one;		/* attributes follow */
+			return encode_fattr3(rqstp, p, fhp, &stat);
+		}
 	}
 	*p++ = xdr_zero;
 	return p;
@@ -616,7 +619,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	if (resp->status == 0)
-		p = encode_fattr3(rqstp, p, &resp->fh);
+		p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 583c0710e45e..d828662d737d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -53,7 +53,7 @@
 #define NFSPROC4_CB_COMPOUND 1
 
 /* declarations */
-static void nfs4_cb_null(struct rpc_task *task);
+static const struct rpc_call_ops nfs4_cb_null_ops;
 
 /* Index of predefined Linux callback client operations */
 
@@ -431,7 +431,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	}
 	clnt->cl_intr = 0;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty = 1;
 
 	/* Kick rpciod, put the call on the wire. */
 
@@ -447,7 +446,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
+	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -469,7 +468,7 @@ out_err:
 }
 
 static void
-nfs4_cb_null(struct rpc_task *task)
+nfs4_cb_null(struct rpc_task *task, void *dummy)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
 	struct nfs4_callback *cb = &clp->cl_callback;
@@ -488,6 +487,10 @@ out:
 	put_nfs4_client(clp);
 }
 
+static const struct rpc_call_ops nfs4_cb_null_ops = {
+	.rpc_call_done = nfs4_cb_null,
+};
+
 /*
  * called with dp->dl_count inc'ed.
  * nfs4_lock_state() may or may not have been called.
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 954cf893d50c..be963a133aaa 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -121,9 +121,9 @@ out:
 static void
 nfsd4_sync_rec_dir(void)
 {
-	down(&rec_dir.dentry->d_inode->i_sem);
+	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
 	nfsd_sync_dir(rec_dir.dentry);
-	up(&rec_dir.dentry->d_inode->i_sem);
+	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
 }
 
 int
@@ -143,7 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	nfs4_save_user(&uid, &gid);
 
 	/* lock the parent */
-	down(&rec_dir.dentry->d_inode->i_sem);
+	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
 
 	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
 	if (IS_ERR(dentry)) {
@@ -159,7 +159,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_put:
 	dput(dentry);
 out_unlock:
-	up(&rec_dir.dentry->d_inode->i_sem);
+	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
 	if (status == 0) {
 		clp->cl_firststate = 1;
 		nfsd4_sync_rec_dir();
@@ -259,9 +259,9 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
 		printk("nfsd4: non-file found in client recovery directory\n");
 		return -EINVAL;
 	}
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	status = vfs_unlink(dir->d_inode, dentry);
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
 }
 
@@ -274,9 +274,9 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
 	 * any regular files anyway, just in case the directory was created by
 	 * a kernel from the future.... */
 	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	status = vfs_rmdir(dir->d_inode, dentry);
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
 }
 
@@ -288,9 +288,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	down(&rec_dir.dentry->d_inode->i_sem);
+	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
 	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
-	up(&rec_dir.dentry->d_inode->i_sem);
+	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
 		return status;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b45999ff33e6..aa7bb41b293d 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,46 +152,44 @@ decode_sattr(u32 *p, struct iattr *iap)
 }
 
 static inline u32 *
-encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+	     struct kstat *stat)
 {
-	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
 	struct dentry	*dentry = fhp->fh_dentry;
-	struct kstat stat;
 	int type;
 	struct timespec time;
 
-	vfs_getattr(mnt, dentry, &stat);
-	type = (stat.mode & S_IFMT);
+	type = (stat->mode & S_IFMT);
 
 	*p++ = htonl(nfs_ftypes[type >> 12]);
-	*p++ = htonl((u32) stat.mode);
-	*p++ = htonl((u32) stat.nlink);
-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
 
-	if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) {
+	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
 		*p++ = htonl(NFS_MAXPATHLEN);
 	} else {
-		*p++ = htonl((u32) stat.size);
+		*p++ = htonl((u32) stat->size);
 	}
-	*p++ = htonl((u32) stat.blksize);
+	*p++ = htonl((u32) stat->blksize);
 	if (S_ISCHR(type) || S_ISBLK(type))
-		*p++ = htonl(new_encode_dev(stat.rdev));
+		*p++ = htonl(new_encode_dev(stat->rdev));
 	else
 		*p++ = htonl(0xffffffff);
-	*p++ = htonl((u32) stat.blocks);
+	*p++ = htonl((u32) stat->blocks);
 	if (is_fsid(fhp, rqstp->rq_reffh))
 		*p++ = htonl((u32) fhp->fh_export->ex_fsid);
 	else
-		*p++ = htonl(new_encode_dev(stat.dev));
-	*p++ = htonl((u32) stat.ino);
-	*p++ = htonl((u32) stat.atime.tv_sec);
-	*p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0);
+		*p++ = htonl(new_encode_dev(stat->dev));
+	*p++ = htonl((u32) stat->ino);
+	*p++ = htonl((u32) stat->atime.tv_sec);
+	*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
 	lease_get_mtime(dentry->d_inode, &time); 
 	*p++ = htonl((u32) time.tv_sec);
 	*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
-	*p++ = htonl((u32) stat.ctime.tv_sec);
-	*p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0);
+	*p++ = htonl((u32) stat->ctime.tv_sec);
+	*p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0);
 
 	return p;
 }
@@ -199,7 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 /* Helper function for NFSv2 ACL code */
 u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 {
-	return encode_fattr(rqstp, p, fhp);
+	struct kstat stat;
+	vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat);
+	return encode_fattr(rqstp, p, fhp, &stat);
 }
 
 /*
@@ -394,7 +394,7 @@ int
 nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_attrstat *resp)
 {
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
@@ -403,7 +403,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_diropres *resp)
 {
 	p = encode_fh(p, &resp->fh);
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
@@ -428,7 +428,7 @@ int
 nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_readres *resp)
 {
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index af7c3c3074b0..eef0576a7785 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -48,8 +48,8 @@
 #include <linux/fsnotify.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
-#ifdef CONFIG_NFSD_V4
 #include <linux/xattr.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
@@ -365,8 +365,30 @@ out_nfserr:
 	goto out;
 }
 
-#if defined(CONFIG_NFSD_V4)
+#if defined(CONFIG_NFSD_V2_ACL) || \
+    defined(CONFIG_NFSD_V3_ACL) || \
+    defined(CONFIG_NFSD_V4)
+static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
+{
+	ssize_t buflen;
+	int error;
+
+	buflen = vfs_getxattr(dentry, key, NULL, 0);
+	if (buflen <= 0)
+		return buflen;
+
+	*buf = kmalloc(buflen, GFP_KERNEL);
+	if (!*buf)
+		return -ENOMEM;
 
+	error = vfs_getxattr(dentry, key, *buf, buflen);
+	if (error < 0)
+		return error;
+	return buflen;
+}
+#endif
+
+#if defined(CONFIG_NFSD_V4)
 static int
 set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
 {
@@ -374,7 +396,6 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
 	size_t buflen;
 	char *buf = NULL;
 	int error = 0;
-	struct inode *inode = dentry->d_inode;
 
 	buflen = posix_acl_xattr_size(pacl->a_count);
 	buf = kmalloc(buflen, GFP_KERNEL);
@@ -388,15 +409,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
 		goto out;
 	}
 
-	error = -EOPNOTSUPP;
-	if (inode->i_op && inode->i_op->setxattr) {
-		down(&inode->i_sem);
-		security_inode_setxattr(dentry, key, buf, len, 0);
-		error = inode->i_op->setxattr(dentry, key, buf, len, 0);
-		if (!error)
-			security_inode_post_setxattr(dentry, key, buf, len, 0);
-		up(&inode->i_sem);
-	}
+	error = vfs_setxattr(dentry, key, buf, len, 0);
 out:
 	kfree(buf);
 	return error;
@@ -455,44 +468,19 @@ out_nfserr:
 static struct posix_acl *
 _get_posix_acl(struct dentry *dentry, char *key)
 {
-	struct inode *inode = dentry->d_inode;
-	char *buf = NULL;
-	int buflen, error = 0;
+	void *buf = NULL;
 	struct posix_acl *pacl = NULL;
+	int buflen;
 
-	error = -EOPNOTSUPP;
-	if (inode->i_op == NULL)
-		goto out_err;
-	if (inode->i_op->getxattr == NULL)
-		goto out_err;
-
-	error = security_inode_getxattr(dentry, key);
-	if (error)
-		goto out_err;
-
-	buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
-	if (buflen <= 0) {
-		error = buflen < 0 ? buflen : -ENODATA;
-		goto out_err;
-	}
-
-	buf = kmalloc(buflen, GFP_KERNEL);
-	if (buf == NULL) {
-		error = -ENOMEM;
-		goto out_err;
-	}
-
-	error = inode->i_op->getxattr(dentry, key, buf, buflen);
-	if (error < 0)
-		goto out_err;
+	buflen = nfsd_getxattr(dentry, key, &buf);
+	if (!buflen)
+		buflen = -ENODATA;
+	if (buflen <= 0)
+		return ERR_PTR(buflen);
 
 	pacl = posix_acl_from_xattr(buf, buflen);
- out:
 	kfree(buf);
 	return pacl;
- out_err:
-	pacl = ERR_PTR(error);
-	goto out;
 }
 
 int
@@ -717,27 +705,33 @@ nfsd_close(struct file *filp)
  * As this calls fsync (not fdatasync) there is no need for a write_inode
  * after it.
  */
-static inline void nfsd_dosync(struct file *filp, struct dentry *dp,
-			       struct file_operations *fop)
+static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
+			      struct file_operations *fop)
 {
 	struct inode *inode = dp->d_inode;
 	int (*fsync) (struct file *, struct dentry *, int);
+	int err = nfs_ok;
 
 	filemap_fdatawrite(inode->i_mapping);
 	if (fop && (fsync = fop->fsync))
-		fsync(filp, dp, 0);
+		err=fsync(filp, dp, 0);
 	filemap_fdatawait(inode->i_mapping);
+
+	return nfserrno(err);
 }
 	
 
-static void
+static int
 nfsd_sync(struct file *filp)
 {
+        int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
-	down(&inode->i_sem);
-	nfsd_dosync(filp, filp->f_dentry, filp->f_op);
-	up(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
+	err=nfsd_dosync(filp, filp->f_dentry, filp->f_op);
+	mutex_unlock(&inode->i_mutex);
+
+	return err;
 }
 
 void
@@ -874,6 +868,16 @@ out:
 	return err;
 }
 
+static void kill_suid(struct dentry *dentry)
+{
+	struct iattr	ia;
+	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+	notify_change(dentry, &ia);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+}
+
 static inline int
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
@@ -927,14 +931,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	}
 
 	/* clear setuid/setgid flag after write */
-	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) {
-		struct iattr	ia;
-		ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
-
-		down(&inode->i_sem);
-		notify_change(dentry, &ia);
-		up(&inode->i_sem);
-	}
+	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+		kill_suid(dentry);
 
 	if (err >= 0 && stable) {
 		static ino_t	last_ino;
@@ -962,7 +960,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 			if (inode->i_state & I_DIRTY) {
 				dprintk("nfsd: write sync %d\n", current->pid);
-				nfsd_sync(file);
+				err=nfsd_sync(file);
 			}
 #if 0
 			wake_up(&inode->i_wait);
@@ -1066,7 +1064,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		return err;
 	if (EX_ISSYNC(fhp->fh_export)) {
 		if (file->f_op && file->f_op->fsync) {
-			nfsd_sync(file);
+			err = nfsd_sync(file);
 		} else {
 			err = nfserr_notsupp;
 		}
@@ -1874,39 +1872,25 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
 	ssize_t size;
 	struct posix_acl *acl;
 
-	if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr)
+	if (!IS_POSIXACL(inode))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
 		return ERR_PTR(-EOPNOTSUPP);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			name = POSIX_ACL_XATTR_ACCESS;
-			break;
-		case ACL_TYPE_DEFAULT:
-			name = POSIX_ACL_XATTR_DEFAULT;
-			break;
-		default:
-			return ERR_PTR(-EOPNOTSUPP);
 	}
 
-	size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0);
+	size = nfsd_getxattr(fhp->fh_dentry, name, &value);
+	if (size < 0)
+		return ERR_PTR(size);
 
-	if (size < 0) {
-		acl = ERR_PTR(size);
-		goto getout;
-	} else if (size > 0) {
-		value = kmalloc(size, GFP_KERNEL);
-		if (!value) {
-			acl = ERR_PTR(-ENOMEM);
-			goto getout;
-		}
-		size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
-		if (size < 0) {
-			acl = ERR_PTR(size);
-			goto getout;
-		}
-	}
 	acl = posix_acl_from_xattr(value, size);
-
-getout:
 	kfree(value);
 	return acl;
 }
@@ -1947,16 +1931,13 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 	} else
 		size = 0;
 
-	if (!fhp->fh_locked)
-		fh_lock(fhp);  /* unlocking is done automatically */
 	if (size)
-		error = inode->i_op->setxattr(fhp->fh_dentry, name,
-					      value, size, 0);
+		error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
 	else {
 		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
 			error = 0;
 		else {
-			error = inode->i_op->removexattr(fhp->fh_dentry, name);
+			error = vfs_removexattr(fhp->fh_dentry, name);
 			if (error == -ENODATA)
 				error = 0;
 		}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index eda056bac256..9480a0526cd3 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1532,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
  * NOTE to self: No changes in the attribute list are required to move from
  *		 a resident to a non-resident attribute.
  *
- * Locking: - The caller must hold i_sem on the inode.
+ * Locking: - The caller must hold i_mutex on the inode.
  */
 int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 {
@@ -1728,7 +1728,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	/*
 	 * This needs to be last since the address space operations ->readpage
 	 * and ->writepage can run concurrently with us as they are not
-	 * serialized on i_sem.  Note, we are not allowed to fail once we flip
+	 * serialized on i_mutex.  Note, we are not allowed to fail once we flip
 	 * this switch, which is another reason to do this last.
 	 */
 	NInoSetNonResident(ni);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 795c3d1930f5..b0690d4c8906 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
  * work but we don't care for how quickly one can access them. This also fixes
  * the dcache aliasing issues.
  *
- * Locking:  - Caller must hold i_sem on the directory.
+ * Locking:  - Caller must hold i_mutex on the directory.
  *	     - Each page cache page in the index allocation mapping must be
  *	       locked whilst being accessed otherwise we may find a corrupt
  *	       page due to it being under ->writepage at the moment which
@@ -1085,11 +1085,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
  * While this will return the names in random order this doesn't matter for
  * ->readdir but OTOH results in a faster ->readdir.
  *
- * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS
+ * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
  * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
  * modifications).
  *
- * Locking:  - Caller must hold i_sem on the directory.
+ * Locking:  - Caller must hold i_mutex on the directory.
  *	     - Each page cache page in the index allocation mapping must be
  *	       locked whilst being accessed otherwise we may find a corrupt
  *	       page due to it being under ->writepage at the moment which
@@ -1520,7 +1520,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
  * Note: In the past @filp could be NULL so we ignore it as we don't need it
  * anyway.
  *
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
  *
  * TODO: We should probably also write all attribute/index inodes associated
  * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 727533891813..fb413d3d8618 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -106,7 +106,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
  * this is the case, the necessary zeroing will also have happened and that all
  * metadata is self-consistent.
  *
- * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be
+ * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
  *	    held by the caller.
  */
 static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
@@ -473,7 +473,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
  * @bytes:	number of bytes to be written
  *
  * This is called for non-resident attributes from ntfs_file_buffered_write()
- * with i_sem held on the inode (@pages[0]->mapping->host).  There are
+ * with i_mutex held on the inode (@pages[0]->mapping->host).  There are
  * @nr_pages pages in @pages which are locked but not kmap()ped.  The source
  * data has not yet been copied into the @pages.
  * 
@@ -1637,7 +1637,7 @@ err_out:
  * @pos:	byte position in file at which the write begins
  * @bytes:	number of bytes to be written
  *
- * This is called from ntfs_file_buffered_write() with i_sem held on the inode
+ * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
  * (@pages[0]->mapping->host).  There are @nr_pages pages in @pages which are
  * locked but not kmap()ped.  The source data has already been copied into the
  * @page.  ntfs_prepare_pages_for_non_resident_write() has been called before
@@ -1814,7 +1814,7 @@ err_out:
 /**
  * ntfs_file_buffered_write -
  *
- * Locking: The vfs is holding ->i_sem on the inode.
+ * Locking: The vfs is holding ->i_mutex on the inode.
  */
 static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 		const struct iovec *iov, unsigned long nr_segs,
@@ -2173,7 +2173,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 	err = remove_suid(file->f_dentry);
 	if (err)
 		goto out;
-	inode_update_time(inode, 1);
+	file_update_time(file);
 	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
 			count);
 out:
@@ -2196,9 +2196,9 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err = sync_page_range(inode, mapping, pos, ret);
 		if (err < 0)
@@ -2221,12 +2221,12 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	init_sync_kiocb(&kiocb, file);
 	ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
 	if (ret == -EIOCBQUEUED)
 		ret = wait_on_sync_kiocb(&kiocb);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err = sync_page_range(inode, mapping, *ppos - ret, ret);
 		if (err < 0)
@@ -2269,7 +2269,7 @@ static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
  * Note: In the past @filp could be NULL so we ignore it as we don't need it
  * anyway.
  *
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
  *
  * TODO: We should probably also write all attribute/index inodes associated
  * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 8f2d5727546f..9f5427c2d105 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -32,7 +32,7 @@
  * Allocate a new index context, initialize it with @idx_ni and return it.
  * Return NULL if allocation failed.
  *
- * Locking:  Caller must hold i_sem on the index inode.
+ * Locking:  Caller must hold i_mutex on the index inode.
  */
 ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 {
@@ -50,7 +50,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
  *
  * Release the index context @ictx, releasing all associated resources.
  *
- * Locking:  Caller must hold i_sem on the index inode.
+ * Locking:  Caller must hold i_mutex on the index inode.
  */
 void ntfs_index_ctx_put(ntfs_index_context *ictx)
 {
@@ -106,7 +106,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx)
  * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
  * ensure that the changes are written to disk.
  *
- * Locking:  - Caller must hold i_sem on the index inode.
+ * Locking:  - Caller must hold i_mutex on the index inode.
  *	     - Each page cache page in the index allocation mapping must be
  *	       locked whilst being accessed otherwise we may find a corrupt
  *	       page due to it being under ->writepage at the moment which
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index b24f4c4b2c5c..ea1bd3feea1b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2125,13 +2125,13 @@ void ntfs_put_inode(struct inode *vi)
 		ntfs_inode *ni = NTFS_I(vi);
 		if (NInoIndexAllocPresent(ni)) {
 			struct inode *bvi = NULL;
-			down(&vi->i_sem);
+			mutex_lock(&vi->i_mutex);
 			if (atomic_read(&vi->i_count) == 2) {
 				bvi = ni->itype.index.bmp_ino;
 				if (bvi)
 					ni->itype.index.bmp_ino = NULL;
 			}
-			up(&vi->i_sem);
+			mutex_unlock(&vi->i_mutex);
 			if (bvi)
 				iput(bvi);
 		}
@@ -2311,7 +2311,7 @@ static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
  *
  * Returns 0 on success or -errno on error.
  *
- * Called with ->i_sem held.  In all but one case ->i_alloc_sem is held for
+ * Called with ->i_mutex held.  In all but one case ->i_alloc_sem is held for
  * writing.  The only case in the kernel where ->i_alloc_sem is not held is
  * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
  * with the current i_size as the offset.  The analogous place in NTFS is in
@@ -2767,7 +2767,25 @@ unm_done:
 	up_write(&ni->runlist.lock);
 done:
 	/* Update the mtime and ctime on the base inode. */
-	inode_update_time(VFS_I(base_ni), 1);
+	/* normally ->truncate shouldn't update ctime or mtime,
+	 * but ntfs did before so it got a copy & paste version
+	 * of file_update_time.  one day someone should fix this
+	 * for real.
+	 */
+	if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
+		struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb);
+		int sync_it = 0;
+
+		if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) ||
+		    !timespec_equal(&VFS_I(base_ni)->i_ctime, &now))
+			sync_it = 1;
+		VFS_I(base_ni)->i_mtime = now;
+		VFS_I(base_ni)->i_ctime = now;
+
+		if (sync_it)
+			mark_inode_dirty_sync(VFS_I(base_ni));
+	}
+
 	if (likely(!err)) {
 		NInoClearTruncateFailed(ni);
 		ntfs_debug("Done.");
@@ -2831,7 +2849,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
  * We also abort all changes of user, group, and mode as we do not implement
  * the NTFS ACLs yet.
  *
- * Called with ->i_sem held.  For the ATTR_SIZE (i.e. ->truncate) case, also
+ * Called with ->i_mutex held.  For the ATTR_SIZE (i.e. ->truncate) case, also
  * called with ->i_alloc_sem held for writing.
  *
  * Basically this is a copy of generic notify_change() and inode_setattr()
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 351dbc3b6e40..5ea9eb93af62 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -96,7 +96,7 @@
  *    name. We then convert the name to the current NLS code page, and proceed
  *    searching for a dentry with this name, etc, as in case 2), above.
  *
- * Locking: Caller must hold i_sem on the directory.
+ * Locking: Caller must hold i_mutex on the directory.
  */
 static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 		struct nameidata *nd)
@@ -254,7 +254,7 @@ handle_name:
 	nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
 
 	/*
-	 * Note: No need for dent->d_lock lock as i_sem is held on the
+	 * Note: No need for dent->d_lock lock as i_mutex is held on the
 	 * parent inode.
 	 */
 
@@ -374,7 +374,7 @@ struct inode_operations ntfs_dir_inode_ops = {
  * The code is based on the ext3 ->get_parent() implementation found in
  * fs/ext3/namei.c::ext3_get_parent().
  *
- * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down.
+ * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
  *
  * Return the dentry of the parent directory on success or the error code on
  * error (IS_ERR() is true).
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index 833df2a4e9fb..d0ef4182147b 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -48,7 +48,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
 		ntfs_error(vol->sb, "Quota inodes are not open.");
 		return FALSE;
 	}
-	down(&vol->quota_q_ino->i_sem);
+	mutex_lock(&vol->quota_q_ino->i_mutex);
 	ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
 	if (!ictx) {
 		ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
 	ntfs_index_entry_mark_dirty(ictx);
 set_done:
 	ntfs_index_ctx_put(ictx);
-	up(&vol->quota_q_ino->i_sem);
+	mutex_unlock(&vol->quota_q_ino->i_mutex);
 	/*
 	 * We set the flag so we do not try to mark the quotas out of date
 	 * again on remount.
@@ -110,7 +110,7 @@ done:
 err_out:
 	if (ictx)
 		ntfs_index_ctx_put(ictx);
-	up(&vol->quota_q_ino->i_sem);
+	mutex_unlock(&vol->quota_q_ino->i_mutex);
 	return FALSE;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6c16db9e1a8a..c3a3f1a8310b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -443,8 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 
 	ntfs_debug("Entering with remount options string: %s", opt);
 #ifndef NTFS_RW
-	/* For read-only compiled driver, enforce all read-only flags. */
-	*flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+	/* For read-only compiled driver, enforce read-only flag. */
+	*flags |= MS_RDONLY;
 #else /* NTFS_RW */
 	/*
 	 * For the read-write compiled driver, if we are remounting read-write,
@@ -1213,10 +1213,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
 	 * Find the inode number for the hibernation file by looking up the
 	 * filename hiberfil.sys in the root directory.
 	 */
-	down(&vol->root_ino->i_sem);
+	mutex_lock(&vol->root_ino->i_mutex);
 	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
 			&name);
-	up(&vol->root_ino->i_sem);
+	mutex_unlock(&vol->root_ino->i_mutex);
 	if (IS_ERR_MREF(mref)) {
 		ret = MREF_ERR(mref);
 		/* If the file does not exist, Windows is not hibernated. */
@@ -1307,10 +1307,10 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
 	 * Find the inode number for the quota file by looking up the filename
 	 * $Quota in the extended system files directory $Extend.
 	 */
-	down(&vol->extend_ino->i_sem);
+	mutex_lock(&vol->extend_ino->i_mutex);
 	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
 			&name);
-	up(&vol->extend_ino->i_sem);
+	mutex_unlock(&vol->extend_ino->i_mutex);
 	if (IS_ERR_MREF(mref)) {
 		/*
 		 * If the file does not exist, quotas are disabled and have
@@ -1390,10 +1390,10 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
 	 * Find the inode number for the transaction log file by looking up the
 	 * filename $UsnJrnl in the extended system files directory $Extend.
 	 */
-	down(&vol->extend_ino->i_sem);
+	mutex_lock(&vol->extend_ino->i_mutex);
 	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
 			&name);
-	up(&vol->extend_ino->i_sem);
+	mutex_unlock(&vol->extend_ino->i_mutex);
 	if (IS_ERR_MREF(mref)) {
 		/*
 		 * If the file does not exist, transaction logging is disabled,
@@ -1721,7 +1721,7 @@ static BOOL load_system_files(ntfs_volume *vol)
 						es3);
 				goto iput_mirr_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s",
 					!vol->mftmirr_ino ? es1 : es2, es3);
 		} else
@@ -1837,7 +1837,7 @@ get_ctx_vol_failed:
 						es1, es2);
 				goto iput_vol_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1874,7 +1874,7 @@ get_ctx_vol_failed:
 				}
 				goto iput_logfile_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1919,7 +1919,7 @@ get_ctx_vol_failed:
 						es1, es2);
 				goto iput_root_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1943,7 +1943,7 @@ get_ctx_vol_failed:
 			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		sb->s_flags |= MS_RDONLY;
 		/*
 		 * Do not set NVolErrors() because ntfs_remount() might manage
 		 * to set the dirty flag in which case all would be well.
@@ -1970,7 +1970,7 @@ get_ctx_vol_failed:
 			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		sb->s_flags |= MS_RDONLY;
 		NVolSetErrors(vol);
 	}
 #endif
@@ -1989,7 +1989,7 @@ get_ctx_vol_failed:
 			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		sb->s_flags |= MS_RDONLY;
 		NVolSetErrors(vol);
 	}
 #endif /* NTFS_RW */
@@ -2030,7 +2030,7 @@ get_ctx_vol_failed:
 						es1, es2);
 				goto iput_quota_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -2053,7 +2053,7 @@ get_ctx_vol_failed:
 			goto iput_quota_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		sb->s_flags |= MS_RDONLY;
 		NVolSetErrors(vol);
 	}
 	/*
@@ -2074,7 +2074,7 @@ get_ctx_vol_failed:
 						es1, es2);
 				goto iput_usnjrnl_err_out;
 			}
-			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			sb->s_flags |= MS_RDONLY;
 			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -2097,7 +2097,7 @@ get_ctx_vol_failed:
 			goto iput_usnjrnl_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		sb->s_flags |= MS_RDONLY;
 		NVolSetErrors(vol);
 	}
 #endif /* NTFS_RW */
@@ -2312,9 +2312,9 @@ static void ntfs_put_super(struct super_block *sb)
 	if (!list_empty(&sb->s_dirty)) {
 		const char *s1, *s2;
 
-		down(&vol->mft_ino->i_sem);
+		mutex_lock(&vol->mft_ino->i_mutex);
 		truncate_inode_pages(vol->mft_ino->i_mapping, 0);
-		up(&vol->mft_ino->i_sem);
+		mutex_unlock(&vol->mft_ino->i_mutex);
 		write_inode_now(vol->mft_ino, 1);
 		if (!list_empty(&sb->s_dirty)) {
 			static const char *_s1 = "inodes";
@@ -2689,7 +2689,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 
 	ntfs_debug("Entering.");
 #ifndef NTFS_RW
-	sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+	sb->s_flags |= MS_RDONLY;
 #endif /* ! NTFS_RW */
 	/* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
 	sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 465f797451ee..6b9812db3779 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -966,7 +966,7 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
 		   num_clusters);
 
-	BUG_ON(!down_trylock(&tl_inode->i_sem));
+	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
 	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
 
@@ -1108,7 +1108,7 @@ bail:
 	return status;
 }
 
-/* Expects you to already be holding tl_inode->i_sem */
+/* Expects you to already be holding tl_inode->i_mutex */
 static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 {
 	int status;
@@ -1123,7 +1123,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 
 	mlog_entry_void();
 
-	BUG_ON(!down_trylock(&tl_inode->i_sem));
+	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
 	di = (struct ocfs2_dinode *) tl_bh->b_data;
 	tl = &di->id2.i_dealloc;
@@ -1198,9 +1198,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	int status;
 	struct inode *tl_inode = osb->osb_tl_inode;
 
-	down(&tl_inode->i_sem);
+	mutex_lock(&tl_inode->i_mutex);
 	status = __ocfs2_flush_truncate_log(osb);
-	up(&tl_inode->i_sem);
+	mutex_unlock(&tl_inode->i_mutex);
 
 	return status;
 }
@@ -1363,7 +1363,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
 	     tl_copy->i_blkno);
 
-	down(&tl_inode->i_sem);
+	mutex_lock(&tl_inode->i_mutex);
 	for(i = 0; i < num_recs; i++) {
 		if (ocfs2_truncate_log_needs_flush(osb)) {
 			status = __ocfs2_flush_truncate_log(osb);
@@ -1395,7 +1395,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 	}
 
 bail_up:
-	up(&tl_inode->i_sem);
+	mutex_unlock(&tl_inode->i_mutex);
 
 	mlog_exit(status);
 	return status;
@@ -1840,7 +1840,7 @@ start:
 
 	mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
 
-	down(&tl_inode->i_sem);
+	mutex_lock(&tl_inode->i_mutex);
 	tl_sem = 1;
 	/* ocfs2_truncate_log_needs_flush guarantees us at least one
 	 * record is free for use. If there isn't any, we flush to get
@@ -1875,7 +1875,7 @@ start:
 		goto bail;
 	}
 
-	up(&tl_inode->i_sem);
+	mutex_unlock(&tl_inode->i_mutex);
 	tl_sem = 0;
 
 	ocfs2_commit_trans(handle);
@@ -1890,7 +1890,7 @@ bail:
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 
 	if (tl_sem)
-		up(&tl_inode->i_sem);
+		mutex_unlock(&tl_inode->i_mutex);
 
 	if (handle)
 		ocfs2_commit_trans(handle);
@@ -1994,7 +1994,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 			goto bail;
 		}
 
-		down(&ext_alloc_inode->i_sem);
+		mutex_lock(&ext_alloc_inode->i_mutex);
 		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
 
 		status = ocfs2_meta_lock(ext_alloc_inode,
@@ -2026,7 +2026,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
 		if (tc->tc_ext_alloc_locked)
 			ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
 
-		up(&tc->tc_ext_alloc_inode->i_sem);
+		mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
 		iput(tc->tc_ext_alloc_inode);
 	}
 
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index f5ef5ea61a05..e8c56a3d9c64 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 	mlog(ML_ENTRY, "ENTRY:\n");					\
 } while (0)
 
-/* We disable this for old compilers since they don't have support for
- * __builtin_types_compatible_p.
+/*
+ * We disable this for sparse.
  */
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \
-    !defined(__CHECKER__)
+#if !defined(__CHECKER__)
 #define mlog_exit(st) do {						     \
 	if (__builtin_types_compatible_p(typeof(st), unsigned long))	     \
 		mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st));	     \
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 5fd60c105913..cf7828f23361 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -653,7 +653,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	struct config_group *o2hb_group = NULL, *ret = NULL;
 	void *defs = NULL;
 
-	/* this runs under the parent dir's i_sem; there can be only
+	/* this runs under the parent dir's i_mutex; there can be only
 	 * one caller in here at a time */
 	if (o2nm_single_cluster)
 		goto out; /* ENOSPC */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 856e20ae8263..57158fa75d91 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -202,7 +202,7 @@ bail:
 }
 
 /*
- * NOTE: this should always be called with parent dir i_sem taken.
+ * NOTE: this should always be called with parent dir i_mutex taken.
  */
 int ocfs2_find_files_on_disk(const char *name,
 			     int namelen,
@@ -245,7 +245,7 @@ leave:
  * Return 0 if the name does not exist
  * Return -EEXIST if the directory contains the name
  *
- * Callers should have i_sem + a cluster lock on dir
+ * Callers should have i_mutex + a cluster lock on dir
  */
 int ocfs2_check_dir_for_entry(struct inode *dir,
 			      const char *name,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 72ae9e3306f4..ca5f9f90d794 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -492,7 +492,7 @@ restart_all:
 	}
 
 	/* blocks peope in read/write from reading our allocation
-	 * until we're done changing it. We depend on i_sem to block
+	 * until we're done changing it. We depend on i_mutex to block
 	 * other extend/truncate calls while we're here. Ordering wrt
 	 * start_trans is important here -- always do it before! */
 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -958,8 +958,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		filp->f_flags &= ~O_DIRECT;
 #endif
 
-	down(&inode->i_sem);
-	/* to match setattr's i_sem -> i_alloc_sem -> rw_lock ordering */
+	mutex_lock(&inode->i_mutex);
+	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
 	if (filp->f_flags & O_DIRECT) {
 		have_alloc_sem = 1;
 		down_read(&inode->i_alloc_sem);
@@ -1123,7 +1123,7 @@ out:
 		up_read(&inode->i_alloc_sem);
 	if (rw_level != -1) 
 		ocfs2_rw_unlock(inode, rw_level);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	mlog_exit(ret);
 	return ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index a91ba4dec936..d4ecc0627716 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -485,10 +485,10 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail;
 	}
 
-	down(&inode_alloc_inode->i_sem);
+	mutex_lock(&inode_alloc_inode->i_mutex);
 	status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
 	if (status < 0) {
-		up(&inode_alloc_inode->i_sem);
+		mutex_unlock(&inode_alloc_inode->i_mutex);
 
 		mlog_errno(status);
 		goto bail;
@@ -536,7 +536,7 @@ bail_commit:
 	ocfs2_commit_trans(handle);
 bail_unlock:
 	ocfs2_meta_unlock(inode_alloc_inode, 1);
-	up(&inode_alloc_inode->i_sem);
+	mutex_unlock(&inode_alloc_inode->i_mutex);
 	brelse(inode_alloc_bh);
 bail:
 	iput(inode_alloc_inode);
@@ -567,10 +567,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
 	/* Lock the orphan dir. The lock will be held for the entire
 	 * delete_inode operation. We do this now to avoid races with
 	 * recovery completion on other nodes. */
-	down(&orphan_dir_inode->i_sem);
+	mutex_lock(&orphan_dir_inode->i_mutex);
 	status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
 	if (status < 0) {
-		up(&orphan_dir_inode->i_sem);
+		mutex_unlock(&orphan_dir_inode->i_mutex);
 
 		mlog_errno(status);
 		goto bail;
@@ -593,7 +593,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
 
 bail_unlock_dir:
 	ocfs2_meta_unlock(orphan_dir_inode, 1);
-	up(&orphan_dir_inode->i_sem);
+	mutex_unlock(&orphan_dir_inode->i_mutex);
 	brelse(orphan_dir_bh);
 bail:
 	iput(orphan_dir_inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 04428042e5e5..303c8d96457f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -216,7 +216,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
 	atomic_inc(&inode->i_count);
 
 	/* we're obviously changing it... */
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 
 	/* sanity check */
 	BUG_ON(OCFS2_I(inode)->ip_handle);
@@ -241,7 +241,7 @@ static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
 		OCFS2_I(inode)->ip_handle = NULL;
 		list_del_init(&OCFS2_I(inode)->ip_handle_list);
 
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 		iput(inode);
 	}
 }
@@ -1433,10 +1433,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 		goto out;
 	}
 
-	down(&orphan_dir_inode->i_sem);
+	mutex_lock(&orphan_dir_inode->i_mutex);
 	status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);
 	if (status < 0) {
-		up(&orphan_dir_inode->i_sem);
+		mutex_unlock(&orphan_dir_inode->i_mutex);
 		mlog_errno(status);
 		goto out;
 	}
@@ -1451,7 +1451,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 		if (!bh)
 			status = -EINVAL;
 		if (status < 0) {
-			up(&orphan_dir_inode->i_sem);
+			mutex_unlock(&orphan_dir_inode->i_mutex);
 			if (bh)
 				brelse(bh);
 			mlog_errno(status);
@@ -1465,7 +1465,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 
 			if (!ocfs2_check_dir_entry(orphan_dir_inode,
 						  de, bh, local)) {
-				up(&orphan_dir_inode->i_sem);
+				mutex_unlock(&orphan_dir_inode->i_mutex);
 				status = -EINVAL;
 				mlog_errno(status);
 				brelse(bh);
@@ -1509,7 +1509,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 		}
 		brelse(bh);
 	}
-	up(&orphan_dir_inode->i_sem);
+	mutex_unlock(&orphan_dir_inode->i_mutex);
 
 	ocfs2_meta_unlock(orphan_dir_inode, 0);
 	have_disk_lock = 0;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index fe373a2101d9..149b35181666 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -334,7 +334,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 
 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
 				  &alloc_bh, 0, inode);
@@ -367,7 +367,7 @@ bail:
 		brelse(alloc_bh);
 
 	if (inode) {
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 		iput(inode);
 	}
 
@@ -446,7 +446,7 @@ bail:
 
 /*
  * make sure we've got at least bitswanted contiguous bits in the
- * local alloc. You lose them when you drop i_sem.
+ * local alloc. You lose them when you drop i_mutex.
  *
  * We will add ourselves to the transaction passed in, but may start
  * our own in order to shift windows.
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index afdeec4b0eef..843cf9ddefe8 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -80,12 +80,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
 	.nopage = ocfs2_nopage,
 };
 
-int ocfs2_mmap(struct file *file,
-	       struct vm_area_struct *vma)
+int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
-	struct inode *inode = mapping->host;
-
 	/* We don't want to support shared writable mappings yet. */
 	if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
 	    && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
@@ -95,7 +91,7 @@ int ocfs2_mmap(struct file *file,
 		return -EINVAL;
 	}
 
-	update_atime(inode);
+	file_accessed(file);
 	vma->vm_ops = &ocfs2_file_vm_ops;
 	return 0;
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 48bf7f0ce544..364d64bd5f10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -169,7 +169,7 @@ static match_table_t tokens = {
  */
 static void ocfs2_write_super(struct super_block *sb)
 {
-	if (down_trylock(&sb->s_lock) == 0)
+	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
diff --git a/fs/open.c b/fs/open.c
index f53a5b9ffb7d..a3b3a9b5c2ff 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -194,7 +194,8 @@ out:
 	return error;
 }
 
-int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+	struct file *filp)
 {
 	int err;
 	struct iattr newattrs;
@@ -204,19 +205,19 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
 		return -EINVAL;
 
 	newattrs.ia_size = length;
-	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	newattrs.ia_valid = ATTR_SIZE | time_attrs;
 	if (filp) {
 		newattrs.ia_file = filp;
 		newattrs.ia_valid |= ATTR_FILE;
 	}
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	err = notify_change(dentry, &newattrs);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return err;
 }
 
-static inline long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user * path, loff_t length)
 {
 	struct nameidata nd;
 	struct inode * inode;
@@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error) {
 		DQUOT_INIT(inode);
-		error = do_truncate(nd.dentry, length, NULL);
+		error = do_truncate(nd.dentry, length, 0, NULL);
 	}
 	put_write_access(inode);
 
@@ -282,7 +283,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length)
 	return do_sys_truncate(path, (long)length);
 }
 
-static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
 	struct inode * inode;
 	struct dentry *dentry;
@@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 
 	error = locks_verify_truncate(inode, file, length);
 	if (!error)
-		error = do_truncate(dentry, length, file);
+		error = do_truncate(dentry, length, 0, file);
 out_putf:
 	fput(file);
 out:
@@ -397,9 +398,9 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
 		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
 			goto dput_and_out;
 	}
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	error = notify_change(nd.dentry, &newattrs);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 dput_and_out:
 	path_release(&nd);
 out:
@@ -450,9 +451,9 @@ long do_utimes(char __user * filename, struct timeval * times)
 		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
 			goto dput_and_out;
 	}
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	error = notify_change(nd.dentry, &newattrs);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 dput_and_out:
 	path_release(&nd);
 out:
@@ -619,13 +620,13 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	err = -EPERM;
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		goto out_putf;
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	err = notify_change(dentry, &newattrs);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 out_putf:
 	fput(file);
@@ -653,13 +654,13 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		goto dput_and_out;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	error = notify_change(nd.dentry, &newattrs);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 dput_and_out:
 	path_release(&nd);
@@ -695,9 +696,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 	}
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	error = notify_change(dentry, &newattrs);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 out:
 	return error;
 }
@@ -970,7 +971,7 @@ out:
 
 EXPORT_SYMBOL(get_unused_fd);
 
-static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
+static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
 	__FD_CLR(fd, fdt->open_fds);
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index 656bc43431b9..7490cc9208b3 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -21,26 +21,30 @@ config ACORN_PARTITION
 	  Support hard disks partitioned under Acorn operating systems.
 
 config ACORN_PARTITION_CUMANA
-	bool "Cumana partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "Cumana partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 	help
 	  Say Y here if you would like to use hard disks under Linux which
 	  were partitioned using the Cumana interface on Acorn machines.
 
 config ACORN_PARTITION_EESOX
-	bool "EESOX partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "EESOX partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 
 config ACORN_PARTITION_ICS
-	bool "ICS partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "ICS partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 	help
 	  Say Y here if you would like to use hard disks under Linux which
 	  were partitioned using the ICS interface on Acorn machines.
 
 config ACORN_PARTITION_ADFS
-	bool "Native filecore partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "Native filecore partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -48,15 +52,17 @@ config ACORN_PARTITION_ADFS
 	  `Y' here, Linux will support disk partitions created under ADFS.
 
 config ACORN_PARTITION_POWERTEC
-	bool "PowerTec partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "PowerTec partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 	help
 	  Support reading partition tables created on Acorn machines using
 	  the PowerTec SCSI drive.
 
 config ACORN_PARTITION_RISCIX
-	bool "RISCiX partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+	bool "RISCiX partition support" if PARTITION_ADVANCED
 	default y if ARCH_ACORN
+	depends on ACORN_PARTITION
 	help
 	  Once upon a time, there was a native Unix port for the Acorn series
 	  of machines called RISCiX.  If you say 'Y' here, Linux will be able
@@ -85,7 +91,7 @@ config ATARI_PARTITION
 
 config IBM_PARTITION
 	bool "IBM disk label and partition support"
-	depends on PARTITION_ADVANCED && ARCH_S390
+	depends on PARTITION_ADVANCED && S390
 	help
 	  Say Y here if you would like to be able to read the hard disk
 	  partition table format used by IBM DASD disks operating under CMS.
@@ -224,5 +230,3 @@ config EFI_PARTITION
 	  Say Y here if you would like to use hard disks under Linux which
 	  were partitioned using EFI GPT.  Presently only useful on the
 	  IA-64 platform.
-
-#      define_bool CONFIG_ACORN_PARTITION_CUMANA y
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 6327bcb2d73d..78010ad60e47 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -56,7 +56,10 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	struct hd_geometry *geo;
 	char type[5] = {0,};
 	char name[7] = {0,};
-	struct vtoc_volume_label *vlabel;
+	union label_t {
+		struct vtoc_volume_label vol;
+		struct vtoc_cms_label cms;
+	} *label;
 	unsigned char *data;
 	Sector sect;
 
@@ -64,9 +67,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		goto out_noinfo;
 	if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
 		goto out_nogeo;
-	if ((vlabel = kmalloc(sizeof(struct vtoc_volume_label),
-			      GFP_KERNEL)) == NULL)
-		goto out_novlab;
+	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
+		goto out_nolab;
 	
 	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
@@ -87,7 +89,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		strncpy(name, data + 8, 6);
 	else
 		strncpy(name, data + 4, 6);
-	memcpy (vlabel, data, sizeof(struct vtoc_volume_label));
+	memcpy(label, data, sizeof(union label_t));
 	put_dev_sector(sect);
 
 	EBCASC(type, 4);
@@ -100,14 +102,12 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		/*
 		 * VM style CMS1 labeled disk
 		 */
-		int *label = (int *) vlabel;
-
-		if (label[13] != 0) {
+		if (label->cms.disk_offset != 0) {
 			printk("CMS1/%8s(MDSK):", name);
 			/* disk is reserved minidisk */
-			blocksize = label[3];
-			offset = label[13];
-			size = (label[7] - 1)*(blocksize >> 9);
+			blocksize = label->cms.block_size;
+			offset = label->cms.disk_offset;
+			size = (label->cms.block_count - 1) * (blocksize >> 9);
 		} else {
 			printk("CMS1/%8s:", name);
 			offset = (info->label_block + 1);
@@ -126,7 +126,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		printk("VOL1/%8s:", name);
 
 		/* get block number and read then go through format1 labels */
-		blk = cchhb2blk(&vlabel->vtoc, geo) + 1;
+		blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
 		counter = 0;
 		while ((data = read_dev_sector(bdev, blk*(blocksize/512),
 					       &sect)) != NULL) {
@@ -174,7 +174,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	}
 
 	printk("\n");
-	kfree(vlabel);
+	kfree(label);
 	kfree(geo);
 	kfree(info);
 	return 1;
@@ -182,8 +182,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 out_readerr:
 out_badsect:
 out_noioctl:
-	kfree(vlabel);
-out_novlab:
+	kfree(label);
+out_nolab:
 	kfree(geo);
 out_nogeo:
 	kfree(info);
diff --git a/fs/pipe.c b/fs/pipe.c
index 66aa0b938d6a..eef0f29e86ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,10 +44,10 @@ void pipe_wait(struct inode * inode)
 	 * is considered a noninteractive wait:
 	 */
 	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 	schedule();
 	finish_wait(PIPE_WAIT(*inode), &wait);
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 }
 
 static inline int
@@ -136,7 +136,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	info = inode->i_pipe;
 	for (;;) {
 		int bufs = info->nrbufs;
@@ -200,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 		}
 		pipe_wait(inode);
 	}
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 	/* Signal writers asynchronously that there is more room.  */
 	if (do_wakeup) {
 		wake_up_interruptible(PIPE_WAIT(*inode));
@@ -237,7 +237,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	info = inode->i_pipe;
 
 	if (!PIPE_READERS(*inode)) {
@@ -341,13 +341,13 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 		PIPE_WAITING_WRITERS(*inode)--;
 	}
 out:
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 	if (do_wakeup) {
 		wake_up_interruptible(PIPE_WAIT(*inode));
 		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
 	}
 	if (ret > 0)
-		inode_update_time(inode, 1);	/* mtime and ctime */
+		file_update_time(filp);
 	return ret;
 }
 
@@ -381,7 +381,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
 
 	switch (cmd) {
 		case FIONREAD:
-			down(PIPE_SEM(*inode));
+			mutex_lock(PIPE_MUTEX(*inode));
 			info =  inode->i_pipe;
 			count = 0;
 			buf = info->curbuf;
@@ -390,7 +390,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
 				count += info->bufs[buf].len;
 				buf = (buf+1) & (PIPE_BUFFERS-1);
 			}
-			up(PIPE_SEM(*inode));
+			mutex_unlock(PIPE_MUTEX(*inode));
 			return put_user(count, (int __user *)arg);
 		default:
 			return -EINVAL;
@@ -433,7 +433,7 @@ pipe_poll(struct file *filp, poll_table *wait)
 static int
 pipe_release(struct inode *inode, int decr, int decw)
 {
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	PIPE_READERS(*inode) -= decr;
 	PIPE_WRITERS(*inode) -= decw;
 	if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
@@ -443,7 +443,7 @@ pipe_release(struct inode *inode, int decr, int decw)
 		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
 		kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
 	}
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	return 0;
 }
@@ -454,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	if (retval < 0)
 		return retval;
@@ -471,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	if (retval < 0)
 		return retval;
@@ -488,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
 
 	if (retval >= 0)
 		retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
 
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	if (retval < 0)
 		return retval;
@@ -534,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
 {
 	/* We could have perhaps used atomic_t, but this and friends
 	   below are the only places.  So it doesn't seem worthwhile.  */
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	PIPE_READERS(*inode)++;
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	return 0;
 }
@@ -544,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
 static int
 pipe_write_open(struct inode *inode, struct file *filp)
 {
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	PIPE_WRITERS(*inode)++;
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	return 0;
 }
@@ -554,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
 static int
 pipe_rdwr_open(struct inode *inode, struct file *filp)
 {
-	down(PIPE_SEM(*inode));
+	mutex_lock(PIPE_MUTEX(*inode));
 	if (filp->f_mode & FMODE_READ)
 		PIPE_READERS(*inode)++;
 	if (filp->f_mode & FMODE_WRITE)
 		PIPE_WRITERS(*inode)++;
-	up(PIPE_SEM(*inode));
+	mutex_unlock(PIPE_MUTEX(*inode));
 
 	return 0;
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index aeeec8ba8dd2..f1871f773f64 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
 		struct vfsmount *next;
 		struct vfsmount *master = m->mnt_master;
 
-		if ( master == origin->mnt_master ) {
+		if (master == origin->mnt_master) {
 			next = next_peer(m);
 			return ((next == origin) ? NULL : next);
 		} else if (m->mnt_slave.next != &master->mnt_slave_list)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3e1239e4b303..7eb1bd7f800c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -308,7 +308,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
 	buffer = task_sig(task, buffer);
 	buffer = task_cap(task, buffer);
 	buffer = cpuset_task_status_allowed(task, buffer);
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
 	return buffer - orig;
@@ -330,7 +330,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	unsigned long  min_flt = 0,  maj_flt = 0;
 	cputime_t cutime, cstime, utime, stime;
 	unsigned long rsslim = 0;
-	unsigned long it_real_value = 0;
+	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
 
@@ -386,7 +386,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			utime = cputime_add(utime, task->signal->utime);
 			stime = cputime_add(stime, task->signal->stime);
 		}
-		it_real_value = task->signal->it_real_value;
+		it_real_value = task->signal->real_timer.expires;
 	}
 	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
 	read_unlock(&tasklist_lock);
@@ -435,7 +435,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		priority,
 		nice,
 		num_threads,
-		jiffies_to_clock_t(it_real_value),
+		(long) ktime_to_clock_t(it_real_value),
 		start_time,
 		vsize,
 		mm ? get_mm_rss(mm) : 0,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 72b431d0a0a4..20e5c4509a43 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -21,6 +21,8 @@
 #include <linux/bitops.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 static ssize_t proc_file_read(struct file *file, char __user *buf,
 			      size_t nbytes, loff_t *ppos);
 static ssize_t proc_file_write(struct file *file, const char __user *buffer,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e6a818a93f3d..6573f31f1fd9 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -19,7 +19,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-extern void free_proc_entry(struct proc_dir_entry *);
+#include "internal.h"
 
 static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
 {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e55198f9806..95a1cf32b838 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
 
+void free_proc_entry(struct proc_dir_entry *de);
+
+int proc_init_inodecache(void);
+
 static inline struct task_struct *proc_task(struct inode *inode)
 {
 	return PROC_I(inode)->task;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5b6b0b6038a7..63bf6c00fa0c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = {
 };
 #endif
 
+#ifdef CONFIG_SLAB
 extern struct seq_operations slabinfo_op;
 extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
 static int slabinfo_open(struct inode *inode, struct file *file)
@@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+#endif
 
 static int show_stat(struct seq_file *p, void *v)
 {
@@ -600,7 +602,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("partitions", 0, &proc_partitions_operations);
 	create_seq_entry("stat", 0, &proc_stat_operations);
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
+#ifdef CONFIG_SLAB
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#endif
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index aef148f099a2..68896283c8ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,8 @@
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
 
+#include "internal.h"
+
 struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
 
 #ifdef CONFIG_SYSCTL
@@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-extern int __init proc_init_inodecache(void);
 void __init proc_root_init(void)
 {
 	int err = proc_init_inodecache();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd5a8f0446..0eaad41f4658 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = {
 };
 
 #ifdef CONFIG_NUMA
-
-struct numa_maps {
-	unsigned long pages;
-	unsigned long anon;
-	unsigned long mapped;
-	unsigned long mapcount_max;
-	unsigned long node[MAX_NUMNODES];
-};
-
-/*
- * Calculate numa node maps for a vma
- */
-static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
-{
-	int i;
-	struct page *page;
-	unsigned long vaddr;
-	struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
-
-	if (!md)
-		return NULL;
-	md->pages = 0;
-	md->anon = 0;
-	md->mapped = 0;
-	md->mapcount_max = 0;
-	for_each_node(i)
-		md->node[i] =0;
-
- 	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
-		page = follow_page(vma, vaddr, 0);
-		if (page) {
-			int count = page_mapcount(page);
-
-			if (count)
-				md->mapped++;
-			if (count > md->mapcount_max)
-				md->mapcount_max = count;
-			md->pages++;
-			if (PageAnon(page))
-				md->anon++;
-			md->node[page_to_nid(page)]++;
-		}
-		cond_resched();
-	}
-	return md;
-}
-
-static int show_numa_map(struct seq_file *m, void *v)
-{
-	struct task_struct *task = m->private;
-	struct vm_area_struct *vma = v;
-	struct mempolicy *pol;
-	struct numa_maps *md;
-	struct zone **z;
-	int n;
-	int first;
-
-	if (!vma->vm_mm)
-		return 0;
-
-	md = get_numa_maps(vma);
-	if (!md)
-		return 0;
-
-	seq_printf(m, "%08lx", vma->vm_start);
-	pol = get_vma_policy(task, vma, vma->vm_start);
-	/* Print policy */
-	switch (pol->policy) {
-	case MPOL_PREFERRED:
-		seq_printf(m, " prefer=%d", pol->v.preferred_node);
-		break;
-	case MPOL_BIND:
-		seq_printf(m, " bind={");
-		first = 1;
-		for (z = pol->v.zonelist->zones; *z; z++) {
-
-			if (!first)
-				seq_putc(m, ',');
-			else
-				first = 0;
-			seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
-					(*z)->name);
-		}
-		seq_putc(m, '}');
-		break;
-	case MPOL_INTERLEAVE:
-		seq_printf(m, " interleave={");
-		first = 1;
-		for_each_node(n) {
-			if (node_isset(n, pol->v.nodes)) {
-				if (!first)
-					seq_putc(m,',');
-				else
-					first = 0;
-				seq_printf(m, "%d",n);
-			}
-		}
-		seq_putc(m, '}');
-		break;
-	default:
-		seq_printf(m," default");
-		break;
-	}
-	seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
-			md->mapcount_max, md->pages, md->mapped);
-	if (md->anon)
-		seq_printf(m," Anon=%lu",md->anon);
-
-	for_each_online_node(n) {
-		if (md->node[n])
-			seq_printf(m, " N%d=%lu", n, md->node[n]);
-	}
-	seq_putc(m, '\n');
-	kfree(md);
-	if (m->count < m->size)  /* vma is copied successfully */
-		m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
-	return 0;
-}
+extern int show_numa_map(struct seq_file *m, void *v);
 
 struct seq_operations proc_pid_numa_maps_op = {
-	.start	= m_start,
-	.next	= m_next,
-	.stop	= m_stop,
-	.show	= show_numa_map
+        .start  = m_start,
+        .next   = m_next,
+        .stop   = m_stop,
+        .show   = show_numa_map
 };
 #endif
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 3b2e7b69e63a..5378d7c78419 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -35,6 +35,9 @@ static size_t elfcorebuf_sz;
 /* Total size of vmcore file. */
 static u64 vmcore_size;
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 struct proc_dir_entry *proc_vmcore = NULL;
 
 /* Reads a page from the oldmem device from given offset. */
diff --git a/fs/quota.c b/fs/quota.c
index 612e04db4b93..d14d872646d4 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -168,7 +168,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	sync_blockdev(sb->s_bdev);
 
 	/* Now when everything is written we can discard the pagecache so
-	 * that userspace sees the changes. We need i_sem and so we could
+	 * that userspace sees the changes. We need i_mutex and so we could
 	 * not do it inside dqonoff_sem. Moreover we need to be carefull
 	 * about races with quotaoff() (that is the reason why we have own
 	 * reference to inode). */
@@ -184,9 +184,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	up(&sb_dqopt(sb)->dqonoff_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (discard[cnt]) {
-			down(&discard[cnt]->i_sem);
+			mutex_lock(&discard[cnt]->i_mutex);
 			truncate_inode_pages(&discard[cnt]->i_data, 0);
-			up(&discard[cnt]->i_sem);
+			mutex_unlock(&discard[cnt]->i_mutex);
 			iput(discard[cnt]);
 		}
 	}
diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile
index f096f3007091..5a0236e02ee1 100644
--- a/fs/ramfs/Makefile
+++ b/fs/ramfs/Makefile
@@ -4,4 +4,6 @@
 
 obj-$(CONFIG_RAMFS) += ramfs.o
 
-ramfs-objs := inode.o
+file-mmu-y := file-nommu.o
+file-mmu-$(CONFIG_MMU) := file-mmu.o
+ramfs-objs += inode.o $(file-mmu-y)
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
new file mode 100644
index 000000000000..2115383dcc8d
--- /dev/null
+++ b/fs/ramfs/file-mmu.c
@@ -0,0 +1,57 @@
+/* file-mmu.c: ramfs MMU-based file operations
+ *
+ * Resizable simple ram filesystem for Linux.
+ *
+ * Copyright (C) 2000 Linus Torvalds.
+ *               2000 Transmeta Corp.
+ *
+ * Usage limits added by David Gibson, Linuxcare Australia.
+ * This file is released under the GPL.
+ */
+
+/*
+ * NOTE! This filesystem is probably most useful
+ * not as a real filesystem, but as an example of
+ * how virtual filesystems can be written.
+ *
+ * It doesn't get much simpler than this. Consider
+ * that this file implements the full semantics of
+ * a POSIX-compliant read-write filesystem.
+ *
+ * Note in particular how the filesystem does not
+ * need to implement any data structures of its own
+ * to keep track of the virtual data: using the VFS
+ * caches is sufficient.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/ramfs.h>
+
+#include <asm/uaccess.h>
+#include "internal.h"
+
+struct address_space_operations ramfs_aops = {
+	.readpage	= simple_readpage,
+	.prepare_write	= simple_prepare_write,
+	.commit_write	= simple_commit_write
+};
+
+struct file_operations ramfs_file_operations = {
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+	.mmap		= generic_file_mmap,
+	.fsync		= simple_sync_file,
+	.sendfile	= generic_file_sendfile,
+	.llseek		= generic_file_llseek,
+};
+
+struct inode_operations ramfs_file_inode_operations = {
+	.getattr	= simple_getattr,
+};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
new file mode 100644
index 000000000000..3f810acd0bfa
--- /dev/null
+++ b/fs/ramfs/file-nommu.c
@@ -0,0 +1,292 @@
+/* file-nommu.c: no-MMU version of ramfs
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/ramfs.h>
+#include <linux/quotaops.h>
+#include <linux/pagevec.h>
+#include <linux/mman.h>
+
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
+
+struct address_space_operations ramfs_aops = {
+	.readpage		= simple_readpage,
+	.prepare_write		= simple_prepare_write,
+	.commit_write		= simple_commit_write
+};
+
+struct file_operations ramfs_file_operations = {
+	.mmap			= ramfs_nommu_mmap,
+	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
+	.read			= generic_file_read,
+	.write			= generic_file_write,
+	.fsync			= simple_sync_file,
+	.sendfile		= generic_file_sendfile,
+	.llseek			= generic_file_llseek,
+};
+
+struct inode_operations ramfs_file_inode_operations = {
+	.setattr		= ramfs_nommu_setattr,
+	.getattr		= simple_getattr,
+};
+
+/*****************************************************************************/
+/*
+ * add a contiguous set of pages into a ramfs inode when it's truncated from
+ * size 0 on the assumption that it's going to be used for an mmap of shared
+ * memory
+ */
+static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+{
+	struct pagevec lru_pvec;
+	unsigned long npages, xpages, loop, limit;
+	struct page *pages;
+	unsigned order;
+	void *data;
+	int ret;
+
+	/* make various checks */
+	order = get_order(newsize);
+	if (unlikely(order >= MAX_ORDER))
+		goto too_big;
+
+	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	if (limit != RLIM_INFINITY && newsize > limit)
+		goto fsize_exceeded;
+
+	if (newsize > inode->i_sb->s_maxbytes)
+		goto too_big;
+
+	i_size_write(inode, newsize);
+
+	/* allocate enough contiguous pages to be able to satisfy the
+	 * request */
+	pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order);
+	if (!pages)
+		return -ENOMEM;
+
+	/* split the high-order page into an array of single pages */
+	xpages = 1UL << order;
+	npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	for (loop = 0; loop < npages; loop++)
+		set_page_count(pages + loop, 1);
+
+	/* trim off any pages we don't actually require */
+	for (loop = npages; loop < xpages; loop++)
+		__free_page(pages + loop);
+
+	/* clear the memory we allocated */
+	newsize = PAGE_SIZE * npages;
+	data = page_address(pages);
+	memset(data, 0, newsize);
+
+	/* attach all the pages to the inode's address space */
+	pagevec_init(&lru_pvec, 0);
+	for (loop = 0; loop < npages; loop++) {
+		struct page *page = pages + loop;
+
+		ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL);
+		if (ret < 0)
+			goto add_error;
+
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+
+		unlock_page(page);
+	}
+
+	pagevec_lru_add(&lru_pvec);
+	return 0;
+
+ fsize_exceeded:
+	send_sig(SIGXFSZ, current, 0);
+ too_big:
+	return -EFBIG;
+
+ add_error:
+	page_cache_release(pages + loop);
+	for (loop++; loop < npages; loop++)
+		__free_page(pages + loop);
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * check that file shrinkage doesn't leave any VMAs dangling in midair
+ */
+static int ramfs_nommu_check_mappings(struct inode *inode,
+				      size_t newsize, size_t size)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      newsize >> PAGE_SHIFT,
+			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
+			      ) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED)
+			return -ETXTBSY; /* not quite true, but near enough */
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+/*
+ *
+ */
+static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
+{
+	int ret;
+
+	/* assume a truncate from zero size is going to be for the purposes of
+	 * shared mmap */
+	if (size == 0) {
+		if (unlikely(newsize >> 32))
+			return -EFBIG;
+
+		return ramfs_nommu_expand_for_mapping(inode, newsize);
+	}
+
+	/* check that a decrease in size doesn't cut off any shared mappings */
+	if (newsize < size) {
+		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = vmtruncate(inode, size);
+
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * handle a change of attributes
+ * - we're specifically interested in a change of size
+ */
+static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	struct inode *inode = dentry->d_inode;
+	unsigned int old_ia_valid = ia->ia_valid;
+	int ret = 0;
+
+	/* by providing our own setattr() method, we skip this quotaism */
+	if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
+	    (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid))
+		ret = DQUOT_TRANSFER(inode, ia) ? -EDQUOT : 0;
+
+	/* pick out size-changing events */
+	if (ia->ia_valid & ATTR_SIZE) {
+		loff_t size = i_size_read(inode);
+		if (ia->ia_size != size) {
+			ret = ramfs_nommu_resize(inode, ia->ia_size, size);
+			if (ret < 0 || ia->ia_valid == ATTR_SIZE)
+				goto out;
+		} else {
+			/* we skipped the truncate but must still update
+			 * timestamps
+			 */
+			ia->ia_valid |= ATTR_MTIME|ATTR_CTIME;
+		}
+	}
+
+	ret = inode_setattr(inode, ia);
+ out:
+	ia->ia_valid = old_ia_valid;
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * try to determine where a shared mapping can be made
+ * - we require that:
+ *   - the pages to be mapped must exist
+ *   - the pages be physically contiguous in sequence
+ */
+unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+					    unsigned long addr, unsigned long len,
+					    unsigned long pgoff, unsigned long flags)
+{
+	unsigned long maxpages, lpages, nr, loop, ret;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct page **pages = NULL, **ptr, *page;
+	loff_t isize;
+
+	if (!(flags & MAP_SHARED))
+		return addr;
+
+	/* the mapping mustn't extend beyond the EOF */
+	lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	isize = i_size_read(inode);
+
+	ret = -EINVAL;
+	maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (pgoff >= maxpages)
+		goto out;
+
+	if (maxpages - pgoff < lpages)
+		goto out;
+
+	/* gang-find the pages */
+	ret = -ENOMEM;
+	pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out;
+
+	nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
+	if (nr != lpages)
+		goto out; /* leave if some pages were missing */
+
+	/* check the pages for physical adjacency */
+	ptr = pages;
+	page = *ptr++;
+	page++;
+	for (loop = lpages; loop > 1; loop--)
+		if (*ptr++ != page++)
+			goto out;
+
+	/* okay - all conditions fulfilled */
+	ret = (unsigned long) page_address(pages[0]);
+
+ out:
+	if (pages) {
+		ptr = pages;
+		for (loop = lpages; loop > 0; loop--)
+			put_page(*ptr++);
+		kfree(pages);
+	}
+
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * set up a mapping
+ */
+int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return 0;
+}
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0a88917605ae..c66bd5e4c05c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,13 +34,12 @@
 #include <linux/ramfs.h>
 
 #include <asm/uaccess.h>
+#include "internal.h"
 
 /* some random number */
 #define RAMFS_MAGIC	0x858458f6
 
 static struct super_operations ramfs_ops;
-static struct address_space_operations ramfs_aops;
-static struct inode_operations ramfs_file_inode_operations;
 static struct inode_operations ramfs_dir_inode_operations;
 
 static struct backing_dev_info ramfs_backing_dev_info = {
@@ -142,25 +141,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
 	return error;
 }
 
-static struct address_space_operations ramfs_aops = {
-	.readpage	= simple_readpage,
-	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write
-};
-
-struct file_operations ramfs_file_operations = {
-	.read		= generic_file_read,
-	.write		= generic_file_write,
-	.mmap		= generic_file_mmap,
-	.fsync		= simple_sync_file,
-	.sendfile	= generic_file_sendfile,
-	.llseek		= generic_file_llseek,
-};
-
-static struct inode_operations ramfs_file_inode_operations = {
-	.getattr	= simple_getattr,
-};
-
 static struct inode_operations ramfs_dir_inode_operations = {
 	.create		= ramfs_create,
 	.lookup		= simple_lookup,
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
new file mode 100644
index 000000000000..272c8a7120b0
--- /dev/null
+++ b/fs/ramfs/internal.h
@@ -0,0 +1,15 @@
+/* internal.h: ramfs internal definitions
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+extern struct address_space_operations ramfs_aops;
+extern struct file_operations ramfs_file_operations;
+extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index df3468a22fea..3f7a1a62165f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -33,7 +33,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 	long long retval;
 	struct inode *inode = file->f_mapping->host;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	switch (origin) {
 		case 2:
 			offset += inode->i_size;
@@ -49,7 +49,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 		}
 		retval = offset;
 	}
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
 
diff --git a/fs/readdir.c b/fs/readdir.c
index b03579bc0210..b6109329b607 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -30,13 +30,13 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf)
 	if (res)
 		goto out;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
 		res = file->f_op->readdir(file, buf, filler);
 		file_accessed(file);
 	}
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 out:
 	return res;
 }
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7892a865b58a..ad6fa964b0e7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -49,7 +49,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 	}
 
 	reiserfs_write_lock(inode->i_sb);
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	/* freeing preallocation only involves relogging blocks that
 	 * are already in the current transaction.  preallocation gets
 	 * freed at the end of each transaction, so it is impossible for
@@ -100,7 +100,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 		err = reiserfs_truncate_file(inode, 0);
 	}
       out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	reiserfs_write_unlock(inode->i_sb);
 	return err;
 }
@@ -1342,7 +1342,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
 
-	down(&inode->i_sem);	// locks the entire file for just us
+	mutex_lock(&inode->i_mutex);	// locks the entire file for just us
 
 	pos = *ppos;
 
@@ -1360,7 +1360,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 	if (res)
 		goto out;
 
-	inode_update_time(inode, 1);	/* Both mtime and ctime */
+	file_update_time(file);
 
 	// Ok, we are done with all the checks.
 
@@ -1532,12 +1532,12 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		    generic_osync_inode(inode, file->f_mapping,
 					OSYNC_METADATA | OSYNC_DATA);
 
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	reiserfs_async_progress_wait(inode->i_sb);
 	return (already_written != 0) ? already_written : res;
 
       out:
-	up(&inode->i_sem);	// unlock the file on exit.
+	mutex_unlock(&inode->i_mutex);	// unlock the file on exit.
 	return res;
 }
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a5e3a0ddbe53..ffa34b861bdb 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -40,12 +40,12 @@ void reiserfs_delete_inode(struct inode *inode)
 
 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
 	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 
 		reiserfs_delete_xattrs(inode);
 
 		if (journal_begin(&th, inode->i_sb, jbegin_count)) {
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 			goto out;
 		}
 		reiserfs_update_inode_transaction(inode);
@@ -59,11 +59,11 @@ void reiserfs_delete_inode(struct inode *inode)
 			DQUOT_FREE_INODE(inode);
 
 		if (journal_end(&th, inode->i_sb, jbegin_count)) {
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 			goto out;
 		}
 
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 
 		/* check return value from reiserfs_delete_object after
 		 * ending the transaction
@@ -551,7 +551,7 @@ static int convert_tail_for_hole(struct inode *inode,
 
 	/* we don't have to make sure the conversion did not happen while
 	 ** we were locking the page because anyone that could convert
-	 ** must first take i_sem.
+	 ** must first take i_mutex.
 	 **
 	 ** We must fix the tail page for writing because it might have buffers
 	 ** that are mapped, but have a block number of 0.  This indicates tail
@@ -586,7 +586,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th,
 	BUG_ON(!th->t_trans_id);
 
 #ifdef REISERFS_PREALLOCATE
-	if (!(flags & GET_BLOCK_NO_ISEM)) {
+	if (!(flags & GET_BLOCK_NO_IMUX)) {
 		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
 						  path, block);
 	}
@@ -2318,7 +2318,7 @@ static int map_block_for_writepage(struct inode *inode,
 	/* this is where we fill in holes in the file. */
 	if (use_get_block) {
 		retval = reiserfs_get_block(inode, block, bh_result,
-					    GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM
+					    GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
 					    | GET_BLOCK_NO_DANGLE);
 		if (!retval) {
 			if (!buffer_mapped(bh_result)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 81fc00285f60..ba8bf8df6dc7 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -120,7 +120,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
 	/* we need to make sure nobody is changing the file size beneath
 	 ** us
 	 */
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 
 	write_from = inode->i_size & (blocksize - 1);
 	/* if we are on a block boundary, we are already unpacked.  */
@@ -156,7 +156,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
 	page_cache_release(page);
 
       out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	reiserfs_write_unlock(inode->i_sb);
 	return retval;
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 42afb5bef111..397d9590c8f2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2211,7 +2211,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head tmp_bh, *bh;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 		    sb->s_blocksize - offset : towrite;
@@ -2250,7 +2250,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(inode);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index c92e124f628e..196e971c03c9 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -205,7 +205,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
 					 1) * p_s_sb->s_blocksize;
 	pos1 = pos;
 
-	// we are protected by i_sem. The tail can not disapper, not
+	// we are protected by i_mutex. The tail can not disapper, not
 	// append can be done either
 	// we are in truncate or packing tail in file_release
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 02091eaac0b4..6f99e01f94ab 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -67,11 +67,11 @@ static struct dentry *create_xa_root(struct super_block *sb)
 		goto out;
 	} else if (!xaroot->d_inode) {
 		int err;
-		down(&privroot->d_inode->i_sem);
+		mutex_lock(&privroot->d_inode->i_mutex);
 		err =
 		    privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
 						   0700);
-		up(&privroot->d_inode->i_sem);
+		mutex_unlock(&privroot->d_inode->i_mutex);
 
 		if (err) {
 			dput(xaroot);
@@ -219,7 +219,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode,
 	} else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
 		goto out;
 	} else {
-		/* inode->i_sem is down, so nothing else can try to create
+		/* inode->i_mutex is down, so nothing else can try to create
 		 * the same xattr */
 		err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
 						   0700 | S_IFREG, NULL);
@@ -268,7 +268,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name,
  * and don't mess with f->f_pos, but the idea is the same.  Do some
  * action on each and every entry in the directory.
  *
- * we're called with i_sem held, so there are no worries about the directory
+ * we're called with i_mutex held, so there are no worries about the directory
  * changing underneath us.
  */
 static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -426,7 +426,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 //        down(&inode->i_zombie);
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
@@ -435,7 +435,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
 		unlock_kernel();
 	}
 //        up(&inode->i_zombie);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
       out:
 	return res;
 }
@@ -480,7 +480,7 @@ static inline __u32 xattr_hash(const char *msg, int len)
 /* Generic extended attribute operations that can be used by xa plugins */
 
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
  */
 int
 reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
@@ -497,12 +497,6 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 	struct iattr newattrs;
 	__u32 xahash = 0;
 
-	if (IS_RDONLY(inode))
-		return -EROFS;
-
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-
 	if (get_inode_sd_version(inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
@@ -535,7 +529,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 	/* Resize it so we're ok to write there */
 	newattrs.ia_size = buffer_size;
 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-	down(&xinode->i_sem);
+	mutex_lock(&xinode->i_mutex);
 	err = notify_change(fp->f_dentry, &newattrs);
 	if (err)
 		goto out_filp;
@@ -598,7 +592,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 	}
 
       out_filp:
-	up(&xinode->i_sem);
+	mutex_unlock(&xinode->i_mutex);
 	fput(fp);
 
       out:
@@ -606,7 +600,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 }
 
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
  */
 int
 reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
@@ -758,9 +752,6 @@ int reiserfs_xattr_del(struct inode *inode, const char *name)
 	struct dentry *dir;
 	int err;
 
-	if (IS_RDONLY(inode))
-		return -EROFS;
-
 	dir = open_xa_dir(inode, FL_READONLY);
 	if (IS_ERR(dir)) {
 		err = PTR_ERR(dir);
@@ -793,7 +784,7 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
 
 }
 
-/* This is called w/ inode->i_sem downed */
+/* This is called w/ inode->i_mutex downed */
 int reiserfs_delete_xattrs(struct inode *inode)
 {
 	struct file *fp;
@@ -946,7 +937,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
 
 /*
  * Inode operation getxattr()
- * Preliminary locking: we down dentry->d_inode->i_sem
+ * Preliminary locking: we down dentry->d_inode->i_mutex
  */
 ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
@@ -970,7 +961,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 /*
  * Inode operation setxattr()
  *
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_mutex down
  */
 int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
@@ -984,12 +975,6 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	if (IS_RDONLY(dentry->d_inode))
-		return -EROFS;
-
-	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
-		return -EROFS;
-
 	reiserfs_write_lock_xattr_i(dentry->d_inode);
 	lock = !has_xattr_dir(dentry->d_inode);
 	if (lock)
@@ -1008,7 +993,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 /*
  * Inode operation removexattr()
  *
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_mutex down
  */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
@@ -1019,12 +1004,6 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name)
 	    get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	if (IS_RDONLY(dentry->d_inode))
-		return -EROFS;
-
-	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
-		return -EPERM;
-
 	reiserfs_write_lock_xattr_i(dentry->d_inode);
 	reiserfs_read_lock_xattrs(dentry->d_sb);
 
@@ -1091,7 +1070,7 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
 /*
  * Inode operation listxattr()
  *
- * Preliminary locking: we down dentry->d_inode->i_sem
+ * Preliminary locking: we down dentry->d_inode->i_mutex
  */
 ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 {
@@ -1289,9 +1268,9 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 		if (!IS_ERR(dentry)) {
 			if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
 				struct inode *inode = dentry->d_parent->d_inode;
-				down(&inode->i_sem);
+				mutex_lock(&inode->i_mutex);
 				err = inode->i_op->mkdir(inode, dentry, 0700);
-				up(&inode->i_sem);
+				mutex_unlock(&inode->i_mutex);
 				if (err) {
 					dput(dentry);
 					dentry = NULL;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a47ac9aac8b2..2dc953504cc0 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -174,7 +174,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 /*
  * Inode operation get_posix_acl().
  *
- * inode->i_sem: down
+ * inode->i_mutex: down
  * BKL held [before 2.5.x]
  */
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
@@ -237,7 +237,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 /*
  * Inode operation set_posix_acl().
  *
- * inode->i_sem: down
+ * inode->i_mutex: down
  * BKL held [before 2.5.x]
  */
 static int
@@ -312,7 +312,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	return error;
 }
 
-/* dir->i_sem: down,
+/* dir->i_mutex: locked,
  * inode is new and not released into the wild yet */
 int
 reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 51458048ca66..073f39364b11 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -16,18 +16,10 @@ static int
 user_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
 
-	int error;
-
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
-
 	if (!reiserfs_xattrs_user(inode->i_sb))
 		return -EOPNOTSUPP;
-
-	error = reiserfs_permission_locked(inode, MAY_READ, NULL);
-	if (error)
-		return error;
-
 	return reiserfs_xattr_get(inode, name, buffer, size);
 }
 
@@ -36,43 +28,21 @@ user_set(struct inode *inode, const char *name, const void *buffer,
 	 size_t size, int flags)
 {
 
-	int error;
-
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
 
 	if (!reiserfs_xattrs_user(inode->i_sb))
 		return -EOPNOTSUPP;
-
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-		return -EPERM;
-
-	error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
-
 	return reiserfs_xattr_set(inode, name, buffer, size, flags);
 }
 
 static int user_del(struct inode *inode, const char *name)
 {
-	int error;
-
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
 
 	if (!reiserfs_xattrs_user(inode->i_sb))
 		return -EOPNOTSUPP;
-
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-		return -EPERM;
-
-	error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
-
 	return 0;
 }
 
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
index 84e21ffa5ca8..10187812771e 100644
--- a/fs/relayfs/buffers.c
+++ b/fs/relayfs/buffers.c
@@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf)
 void relay_remove_buf(struct kref *kref)
 {
 	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-	relayfs_remove(buf->dentry);
+	buf->chan->cb->remove_buf_file(buf->dentry);
+	relay_destroy_buf(buf);
 }
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
index 0f7f88d067ad..383523011aad 100644
--- a/fs/relayfs/inode.c
+++ b/fs/relayfs/inode.c
@@ -26,31 +26,22 @@
 
 static struct vfsmount *		relayfs_mount;
 static int				relayfs_mount_count;
-static kmem_cache_t *			relayfs_inode_cachep;
 
 static struct backing_dev_info		relayfs_backing_dev_info = {
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
 
-static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
-				       struct rchan *chan)
+static struct inode *relayfs_get_inode(struct super_block *sb,
+				       int mode,
+ 				       struct file_operations *fops,
+				       void *data)
 {
-	struct rchan_buf *buf = NULL;
 	struct inode *inode;
 
-	if (S_ISREG(mode)) {
-		BUG_ON(!chan);
-		buf = relay_create_buf(chan);
-		if (!buf)
-			return NULL;
-	}
-
 	inode = new_inode(sb);
-	if (!inode) {
-		relay_destroy_buf(buf);
+	if (!inode)
 		return NULL;
-	}
 
 	inode->i_mode = mode;
 	inode->i_uid = 0;
@@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	switch (mode & S_IFMT) {
 	case S_IFREG:
-		inode->i_fop = &relayfs_file_operations;
-		RELAYFS_I(inode)->buf = buf;
+		inode->i_fop = fops;
+		if (data)
+			inode->u.generic_ip = data;
 		break;
 	case S_IFDIR:
 		inode->i_op = &simple_dir_inode_operations;
@@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
  *	@name: the name of the file to create
  *	@parent: parent directory
  *	@mode: mode
- *	@chan: relay channel associated with the file
+ *	@fops: file operations to use for the file
+ *	@data: user-associated data for this file
  *
  *	Returns the new dentry, NULL on failure
  *
@@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
 static struct dentry *relayfs_create_entry(const char *name,
 					   struct dentry *parent,
 					   int mode,
-					   struct rchan *chan)
+					   struct file_operations *fops,
+					   void *data)
 {
 	struct dentry *d;
 	struct inode *inode;
@@ -115,7 +109,7 @@ static struct dentry *relayfs_create_entry(const char *name,
 	}
 
 	parent = dget(parent);
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	d = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(d)) {
 		d = NULL;
@@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name,
 		goto release_mount;
 	}
 
-	inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
+	inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
 	if (!inode) {
 		d = NULL;
 		goto release_mount;
@@ -145,7 +139,7 @@ release_mount:
 	simple_release_fs(&relayfs_mount, &relayfs_mount_count);
 
 exit:
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 	dput(parent);
 	return d;
 }
@@ -155,20 +149,26 @@ exit:
  *	@name: the name of the file to create
  *	@parent: parent directory
  *	@mode: mode, if not specied the default perms are used
- *	@chan: channel associated with the file
+ *	@fops: file operations to use for the file
+ *	@data: user-associated data for this file
  *
  *	Returns file dentry if successful, NULL otherwise.
  *
  *	The file will be created user r on behalf of current user.
  */
-struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
-				   int mode, struct rchan *chan)
+struct dentry *relayfs_create_file(const char *name,
+				   struct dentry *parent,
+				   int mode,
+				   struct file_operations *fops,
+				   void *data)
 {
+	BUG_ON(!fops);
+
 	if (!mode)
 		mode = S_IRUSR;
 	mode = (mode & S_IALLUGO) | S_IFREG;
 
-	return relayfs_create_entry(name, parent, mode, chan);
+	return relayfs_create_entry(name, parent, mode, fops, data);
 }
 
 /**
@@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
 struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
 {
 	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	return relayfs_create_entry(name, parent, mode, NULL);
+	return relayfs_create_entry(name, parent, mode, NULL, NULL);
 }
 
 /**
@@ -204,7 +204,7 @@ int relayfs_remove(struct dentry *dentry)
 		return -EINVAL;
 
 	parent = dget(parent);
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	if (dentry->d_inode) {
 		if (S_ISDIR(dentry->d_inode->i_mode))
 			error = simple_rmdir(parent->d_inode, dentry);
@@ -215,7 +215,7 @@ int relayfs_remove(struct dentry *dentry)
 	}
 	if (!error)
 		dput(dentry);
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 	dput(parent);
 
 	if (!error)
@@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry)
 }
 
 /**
+ *	relayfs_remove_file - remove a file from relay filesystem
+ *	@dentry: directory dentry
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int relayfs_remove_file(struct dentry *dentry)
+{
+	return relayfs_remove(dentry);
+}
+
+/**
  *	relayfs_remove_dir - remove a directory in the relay filesystem
  *	@dentry: directory dentry
  *
@@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry)
 }
 
 /**
- *	relayfs_open - open file op for relayfs files
+ *	relay_file_open - open file op for relay files
  *	@inode: the inode
  *	@filp: the file
  *
  *	Increments the channel buffer refcount.
  */
-static int relayfs_open(struct inode *inode, struct file *filp)
+static int relay_file_open(struct inode *inode, struct file *filp)
 {
-	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	struct rchan_buf *buf = inode->u.generic_ip;
 	kref_get(&buf->kref);
+	filp->private_data = buf;
 
 	return 0;
 }
 
 /**
- *	relayfs_mmap - mmap file op for relayfs files
+ *	relay_file_mmap - mmap file op for relay files
  *	@filp: the file
  *	@vma: the vma describing what to map
  *
  *	Calls upon relay_mmap_buf to map the file into user space.
  */
-static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
-	return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
+	struct rchan_buf *buf = filp->private_data;
+	return relay_mmap_buf(buf, vma);
 }
 
 /**
- *	relayfs_poll - poll file op for relayfs files
+ *	relay_file_poll - poll file op for relay files
  *	@filp: the file
  *	@wait: poll table
  *
  *	Poll implemention.
  */
-static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
+static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask = 0;
-	struct inode *inode = filp->f_dentry->d_inode;
-	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	struct rchan_buf *buf = filp->private_data;
 
 	if (buf->finalized)
 		return POLLERR;
@@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
 }
 
 /**
- *	relayfs_release - release file op for relayfs files
+ *	relay_file_release - release file op for relay files
  *	@inode: the inode
  *	@filp: the file
  *
  *	Decrements the channel refcount, as the filesystem is
  *	no longer using it.
  */
-static int relayfs_release(struct inode *inode, struct file *filp)
+static int relay_file_release(struct inode *inode, struct file *filp)
 {
-	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	struct rchan_buf *buf = filp->private_data;
 	kref_put(&buf->kref, relay_remove_buf);
 
 	return 0;
 }
 
 /**
- *	relayfs_read_consume - update the consumed count for the buffer
+ *	relay_file_read_consume - update the consumed count for the buffer
  */
-static void relayfs_read_consume(struct rchan_buf *buf,
-				 size_t read_pos,
-				 size_t bytes_consumed)
+static void relay_file_read_consume(struct rchan_buf *buf,
+				    size_t read_pos,
+				    size_t bytes_consumed)
 {
 	size_t subbuf_size = buf->chan->subbuf_size;
 	size_t n_subbufs = buf->chan->n_subbufs;
@@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf,
 }
 
 /**
- *	relayfs_read_avail - boolean, are there unconsumed bytes available?
+ *	relay_file_read_avail - boolean, are there unconsumed bytes available?
  */
-static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
+static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
 {
 	size_t bytes_produced, bytes_consumed, write_offset;
 	size_t subbuf_size = buf->chan->subbuf_size;
@@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
 	if (bytes_produced == bytes_consumed)
 		return 0;
 
-	relayfs_read_consume(buf, read_pos, 0);
+	relay_file_read_consume(buf, read_pos, 0);
 
 	return 1;
 }
 
 /**
- *	relayfs_read_subbuf_avail - return bytes available in sub-buffer
+ *	relay_file_read_subbuf_avail - return bytes available in sub-buffer
  */
-static size_t relayfs_read_subbuf_avail(size_t read_pos,
-					struct rchan_buf *buf)
+static size_t relay_file_read_subbuf_avail(size_t read_pos,
+					   struct rchan_buf *buf)
 {
 	size_t padding, avail = 0;
 	size_t read_subbuf, read_offset, write_subbuf, write_offset;
@@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos,
 }
 
 /**
- *	relayfs_read_start_pos - find the first available byte to read
+ *	relay_file_read_start_pos - find the first available byte to read
  *
  *	If the read_pos is in the middle of padding, return the
  *	position of the first actually available byte, otherwise
  *	return the original value.
  */
-static size_t relayfs_read_start_pos(size_t read_pos,
-				     struct rchan_buf *buf)
+static size_t relay_file_read_start_pos(size_t read_pos,
+					struct rchan_buf *buf)
 {
 	size_t read_subbuf, padding, padding_start, padding_end;
 	size_t subbuf_size = buf->chan->subbuf_size;
@@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos,
 }
 
 /**
- *	relayfs_read_end_pos - return the new read position
+ *	relay_file_read_end_pos - return the new read position
  */
-static size_t relayfs_read_end_pos(struct rchan_buf *buf,
-				   size_t read_pos,
-				   size_t count)
+static size_t relay_file_read_end_pos(struct rchan_buf *buf,
+				      size_t read_pos,
+				      size_t count)
 {
 	size_t read_subbuf, padding, end_pos;
 	size_t subbuf_size = buf->chan->subbuf_size;
@@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
 }
 
 /**
- *	relayfs_read - read file op for relayfs files
+ *	relay_file_read - read file op for relay files
  *	@filp: the file
  *	@buffer: the userspace buffer
  *	@count: number of bytes to read
@@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
  *	Reads count bytes or the number of bytes available in the
  *	current sub-buffer being read, whichever is smaller.
  */
-static ssize_t relayfs_read(struct file *filp,
-			    char __user *buffer,
-			    size_t count,
-			    loff_t *ppos)
+static ssize_t relay_file_read(struct file *filp,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *ppos)
 {
+	struct rchan_buf *buf = filp->private_data;
 	struct inode *inode = filp->f_dentry->d_inode;
-	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
 	size_t read_start, avail;
 	ssize_t ret = 0;
 	void *from;
 
-	down(&inode->i_sem);
-	if(!relayfs_read_avail(buf, *ppos))
+	mutex_lock(&inode->i_mutex);
+	if(!relay_file_read_avail(buf, *ppos))
 		goto out;
 
-	read_start = relayfs_read_start_pos(*ppos, buf);
-	avail = relayfs_read_subbuf_avail(read_start, buf);
+	read_start = relay_file_read_start_pos(*ppos, buf);
+	avail = relay_file_read_subbuf_avail(read_start, buf);
 	if (!avail)
 		goto out;
 
@@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp,
 		ret = -EFAULT;
 		goto out;
 	}
-	relayfs_read_consume(buf, read_start, count);
-	*ppos = relayfs_read_end_pos(buf, read_start, count);
+	relay_file_read_consume(buf, read_start, count);
+	*ppos = relay_file_read_end_pos(buf, read_start, count);
 out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
-/**
- *	relayfs alloc_inode() implementation
- */
-static struct inode *relayfs_alloc_inode(struct super_block *sb)
-{
-	struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
-	if (!p)
-		return NULL;
-	p->buf = NULL;
-
-	return &p->vfs_inode;
-}
-
-/**
- *	relayfs destroy_inode() implementation
- */
-static void relayfs_destroy_inode(struct inode *inode)
-{
-	if (RELAYFS_I(inode)->buf)
-		relay_destroy_buf(RELAYFS_I(inode)->buf);
-
-	kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
-}
-
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
-{
-	struct relayfs_inode_info *i = p;
-	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(&i->vfs_inode);
-}
-
-struct file_operations relayfs_file_operations = {
-	.open		= relayfs_open,
-	.poll		= relayfs_poll,
-	.mmap		= relayfs_mmap,
-	.read		= relayfs_read,
+struct file_operations relay_file_operations = {
+	.open		= relay_file_open,
+	.poll		= relay_file_poll,
+	.mmap		= relay_file_mmap,
+	.read		= relay_file_read,
 	.llseek		= no_llseek,
-	.release	= relayfs_release,
+	.release	= relay_file_release,
 };
 
 static struct super_operations relayfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
-	.alloc_inode	= relayfs_alloc_inode,
-	.destroy_inode	= relayfs_destroy_inode,
 };
 
 static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
@@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = RELAYFS_MAGIC;
 	sb->s_op = &relayfs_ops;
-	inode = relayfs_get_inode(sb, mode, NULL);
+	inode = relayfs_get_inode(sb, mode, NULL, NULL);
 
 	if (!inode)
 		return -ENOMEM;
@@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = {
 
 static int __init init_relayfs_fs(void)
 {
-	int err;
-
-	relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
-				sizeof(struct relayfs_inode_info), 0,
-				0, init_once, NULL);
-	if (!relayfs_inode_cachep)
-		return -ENOMEM;
-
-	err = register_filesystem(&relayfs_fs_type);
-	if (err)
-		kmem_cache_destroy(relayfs_inode_cachep);
-
-	return err;
+	return register_filesystem(&relayfs_fs_type);
 }
 
 static void __exit exit_relayfs_fs(void)
 {
+
+
+
+
+
 	unregister_filesystem(&relayfs_fs_type);
-	kmem_cache_destroy(relayfs_inode_cachep);
 }
 
 module_init(init_relayfs_fs)
 module_exit(exit_relayfs_fs)
 
-EXPORT_SYMBOL_GPL(relayfs_file_operations);
+EXPORT_SYMBOL_GPL(relay_file_operations);
 EXPORT_SYMBOL_GPL(relayfs_create_dir);
 EXPORT_SYMBOL_GPL(relayfs_remove_dir);
+EXPORT_SYMBOL_GPL(relayfs_create_file);
+EXPORT_SYMBOL_GPL(relayfs_remove_file);
 
 MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
 MODULE_DESCRIPTION("Relay Filesystem");
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
index 2a6f7f12b7f9..abf3ceaace49 100644
--- a/fs/relayfs/relay.c
+++ b/fs/relayfs/relay.c
@@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
 {
 }
 
+/*
+ * create_buf_file_create() default callback.  Creates file to represent buf.
+ */
+static struct dentry *create_buf_file_default_callback(const char *filename,
+						       struct dentry *parent,
+						       int mode,
+						       struct rchan_buf *buf,
+						       int *is_global)
+{
+	return relayfs_create_file(filename, parent, mode,
+				   &relay_file_operations, buf);
+}
+
+/*
+ * remove_buf_file() default callback.  Removes file representing relay buffer.
+ */
+static int remove_buf_file_default_callback(struct dentry *dentry)
+{
+	return relayfs_remove(dentry);
+}
+
 /* relay channel default callbacks */
 static struct rchan_callbacks default_channel_callbacks = {
 	.subbuf_start = subbuf_start_default_callback,
 	.buf_mapped = buf_mapped_default_callback,
 	.buf_unmapped = buf_unmapped_default_callback,
+	.create_buf_file = create_buf_file_default_callback,
+	.remove_buf_file = remove_buf_file_default_callback,
 };
 
 /**
@@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
 void relay_reset(struct rchan *chan)
 {
 	unsigned int i;
+	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i])
-			continue;
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
 		__relay_reset(chan->buf[i], 0);
+		prev = chan->buf[i];
 	}
 }
 
@@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan)
  */
 static struct rchan_buf *relay_open_buf(struct rchan *chan,
 					const char *filename,
-					struct dentry *parent)
+					struct dentry *parent,
+					int *is_global)
 {
 	struct rchan_buf *buf;
 	struct dentry *dentry;
 
+	if (*is_global)
+		return chan->buf[0];
+
+ 	buf = relay_create_buf(chan);
+ 	if (!buf)
+ 		return NULL;
+
 	/* Create file in fs */
-	dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
-	if (!dentry)
+ 	dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
+ 					   buf, is_global);
+ 	if (!dentry) {
+ 		relay_destroy_buf(buf);
 		return NULL;
+ 	}
 
-	buf = RELAYFS_I(dentry->d_inode)->buf;
 	buf->dentry = dentry;
 	__relay_reset(buf, 1);
 
@@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan,
 		cb->buf_mapped = buf_mapped_default_callback;
 	if (!cb->buf_unmapped)
 		cb->buf_unmapped = buf_unmapped_default_callback;
+	if (!cb->create_buf_file)
+		cb->create_buf_file = create_buf_file_default_callback;
+	if (!cb->remove_buf_file)
+		cb->remove_buf_file = remove_buf_file_default_callback;
 	chan->cb = cb;
 }
 
@@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename,
 	unsigned int i;
 	struct rchan *chan;
 	char *tmpname;
+	int is_global = 0;
 
 	if (!base_filename)
 		return NULL;
@@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename,
 
 	for_each_online_cpu(i) {
 		sprintf(tmpname, "%s%d", base_filename, i);
-		chan->buf[i] = relay_open_buf(chan, tmpname, parent);
+		chan->buf[i] = relay_open_buf(chan, tmpname, parent,
+					      &is_global);
 		chan->buf[i]->cpu = i;
 		if (!chan->buf[i])
 			goto free_bufs;
@@ -279,6 +320,8 @@ free_bufs:
 		if (!chan->buf[i])
 			break;
 		relay_close_buf(chan->buf[i]);
+		if (is_global)
+			break;
 	}
 	kfree(tmpname);
 
@@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref)
 void relay_close(struct rchan *chan)
 {
 	unsigned int i;
+	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i])
-			continue;
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
 		relay_close_buf(chan->buf[i]);
+		prev = chan->buf[i];
 	}
 
 	if (chan->last_toobig)
@@ -415,14 +460,16 @@ void relay_close(struct rchan *chan)
 void relay_flush(struct rchan *chan)
 {
 	unsigned int i;
+	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i])
-			continue;
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
 		relay_switch_subbuf(chan->buf[i], 0);
+		prev = chan->buf[i];
 	}
 }
 
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
index 703503fa22b6..0993d3e5753b 100644
--- a/fs/relayfs/relay.h
+++ b/fs/relayfs/relay.h
@@ -1,10 +1,6 @@
 #ifndef _RELAY_H
 #define _RELAY_H
 
-struct dentry *relayfs_create_file(const char *name,
-				   struct dentry *parent,
-				   int mode,
-				   struct rchan *chan);
 extern int relayfs_remove(struct dentry *dentry);
 extern int relay_buf_empty(struct rchan_buf *buf);
 extern void relay_destroy_channel(struct kref *kref);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c74f382dabba..0a13859fd57b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,7 @@ static int
 romfs_readpage(struct file *file, struct page * page)
 {
 	struct inode *inode = page->mapping->host;
-	unsigned long offset, avail, readlen;
+	loff_t offset, avail, readlen;
 	void *buf;
 	int result = -EIO;
 
@@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page)
 		goto err_out;
 
 	/* 32 bit warning -- but not for us :) */
-	offset = page->index << PAGE_CACHE_SHIFT;
-	if (offset < inode->i_size) {
+	offset = page_offset(page);
+	if (offset < i_size_read(inode)) {
 		avail = inode->i_size-offset;
 		readlen = min_t(unsigned long, avail, PAGE_SIZE);
 		if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index f3e6b81288ab..74b86d9725a6 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 		dentry->d_fsdata = NULL;
 		smb_age_dentry(server, dentry);
 		next = next->next;
@@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_child);
+		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget_locked(dent);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index b4fcfa8b55a1..7042e62726a4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset,
 {
 	struct dentry *dentry = file->f_dentry;
 
-	DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), 
-	       count, (page->index << PAGE_CACHE_SHIFT)+offset);
+	DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
+		((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
 
 	return smb_writepage_sync(dentry->d_inode, page, offset, count);
 }
@@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file)
 		/* We must flush any dirty pages now as we won't be able to
 		   write anything after close. mmap can trigger this.
 		   "openers" should perhaps include mmap'ers ... */
-		filemap_fdatawrite(inode->i_mapping);
-		filemap_fdatawait(inode->i_mapping);
+		filemap_write_and_wait(inode->i_mapping);
 		smb_close(inode);
 	}
 	unlock_kernel();
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 10b994428fef..6ec88bf59b2d 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -697,8 +697,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
 			DENTRY_PATH(dentry),
 			(long) inode->i_size, (long) attr->ia_size);
 
-		filemap_fdatawrite(inode->i_mapping);
-		filemap_fdatawait(inode->i_mapping);
+		filemap_write_and_wait(inode->i_mapping);
 
 		error = smb_open(dentry, O_WRONLY);
 		if (error)
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 38ab558835c4..d6baec0f24ad 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -3113,7 +3113,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
 	LSET(data, 32, SMB_TIME_NO_CHANGE);
 	LSET(data, 40, SMB_UID_NO_CHANGE);
 	LSET(data, 48, SMB_GID_NO_CHANGE);
-	LSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
+	DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
 	LSET(data, 60, major);
 	LSET(data, 68, minor);
 	LSET(data, 76, 0);
diff --git a/fs/super.c b/fs/super.c
index 5a347a4f673a..c177b92419c5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,7 +72,7 @@ static struct super_block *alloc_super(void)
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
-		sema_init(&s->s_lock, 1);
+		mutex_init(&s->s_lock);
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
@@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-		s->s_old_blocksize = block_size(bdev);
-		sb_set_blocksize(s, s->s_old_blocksize);
+		sb_set_blocksize(s, block_size(bdev));
 		error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index d36780382176..49bd219275db 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -99,7 +99,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	down(&p->d_inode->i_sem);
+	mutex_lock(&p->d_inode->i_mutex);
 	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
 		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
@@ -122,7 +122,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
 		dput(*d);
 	} else
 		error = PTR_ERR(*d);
-	up(&p->d_inode->i_sem);
+	mutex_unlock(&p->d_inode->i_mutex);
 	return error;
 }
 
@@ -246,7 +246,7 @@ static void remove_dir(struct dentry * d)
 	struct dentry * parent = dget(d->d_parent);
 	struct sysfs_dirent * sd;
 
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 	d_delete(d);
 	sd = d->d_fsdata;
  	list_del_init(&sd->s_sibling);
@@ -257,7 +257,7 @@ static void remove_dir(struct dentry * d)
 	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
 		 atomic_read(&d->d_count));
 
-	up(&parent->d_inode->i_sem);
+	mutex_unlock(&parent->d_inode->i_mutex);
 	dput(parent);
 }
 
@@ -286,7 +286,7 @@ void sysfs_remove_dir(struct kobject * kobj)
 		return;
 
 	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	parent_sd = dentry->d_fsdata;
 	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
@@ -295,7 +295,7 @@ void sysfs_remove_dir(struct kobject * kobj)
 		sysfs_drop_dentry(sd, dentry);
 		sysfs_put(sd);
 	}
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	remove_dir(dentry);
 	/**
@@ -318,7 +318,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 	down_write(&sysfs_rename_sem);
 	parent = kobj->parent->dentry;
 
-	down(&parent->d_inode->i_sem);
+	mutex_lock(&parent->d_inode->i_mutex);
 
 	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
 	if (!IS_ERR(new_dentry)) {
@@ -334,7 +334,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 			error = -EEXIST;
 		dput(new_dentry);
 	}
-	up(&parent->d_inode->i_sem);	
+	mutex_unlock(&parent->d_inode->i_mutex);
 	up_write(&sysfs_rename_sem);
 
 	return error;
@@ -345,9 +345,9 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
 	struct dentry * dentry = file->f_dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	file->private_data = sysfs_new_dirent(parent_sd, NULL);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	return file->private_data ? 0 : -ENOMEM;
 
@@ -358,9 +358,9 @@ static int sysfs_dir_close(struct inode *inode, struct file *file)
 	struct dentry * dentry = file->f_dentry;
 	struct sysfs_dirent * cursor = file->private_data;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	list_del_init(&cursor->s_sibling);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	release_sysfs_dirent(cursor);
 
@@ -436,7 +436,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
 	struct dentry * dentry = file->f_dentry;
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -444,7 +444,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			up(&file->f_dentry->d_inode->i_sem);
+			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -468,7 +468,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			list_add_tail(&cursor->s_sibling, p);
 		}
 	}
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -483,4 +483,3 @@ struct file_operations sysfs_dir_operations = {
 EXPORT_SYMBOL_GPL(sysfs_create_dir);
 EXPORT_SYMBOL_GPL(sysfs_remove_dir);
 EXPORT_SYMBOL_GPL(sysfs_rename_dir);
-
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4013d7905e84..d0e3d8495165 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -364,9 +364,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
 	int error = 0;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return error;
 }
@@ -398,7 +398,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 	struct dentry * victim;
 	int res = -ENOENT;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		/* make sure dentry is really there */
@@ -420,7 +420,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 		 */
 		dput(victim);
 	}
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return res;
 }
@@ -441,22 +441,22 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 	struct iattr newattrs;
 	int res = -ENOENT;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		if (victim->d_inode &&
 		    (victim->d_parent->d_inode == dir->d_inode)) {
 			inode = victim->d_inode;
-			down(&inode->i_sem);
+			mutex_lock(&inode->i_mutex);
 			newattrs.ia_mode = (mode & S_IALLUGO) |
 						(inode->i_mode & ~S_IALLUGO);
 			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 			res = notify_change(victim, &newattrs);
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 		}
 		dput(victim);
 	}
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return res;
 }
@@ -480,4 +480,3 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
 EXPORT_SYMBOL_GPL(sysfs_update_file);
-
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 970a33f03299..c3133219941c 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -201,7 +201,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
 
 /*
  * Unhashes the dentry corresponding to given sysfs_dirent
- * Called with parent inode's i_sem held.
+ * Called with parent inode's i_mutex held.
  */
 void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
 {
@@ -232,7 +232,7 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 		/* no inode means this hasn't been made visible yet */
 		return;
 
-	down(&dir->d_inode->i_sem);
+	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
 			continue;
@@ -243,7 +243,5 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 			break;
 		}
 	}
-	up(&dir->d_inode->i_sem);
+	mutex_unlock(&dir->d_inode->i_mutex);
 }
-
-
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index de402fa915f2..e38d6338a20d 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -86,9 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
-	down(&dentry->d_inode->i_sem);
+	mutex_lock(&dentry->d_inode->i_mutex);
 	error = sysfs_add_link(dentry, name, target);
-	up(&dentry->d_inode->i_sem);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return error;
 }
 
@@ -177,4 +177,3 @@ struct inode_operations sysfs_symlink_inode_operations = {
 
 EXPORT_SYMBOL_GPL(sysfs_create_link);
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
-
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 69a085abad6f..cce8b05cba5a 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			offset = (char *)de - kaddr;
 
 			over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN),
-					(n<<PAGE_CACHE_SHIFT) | offset,
+					((loff_t)n<<PAGE_CACHE_SHIFT) | offset,
 					fs16_to_cpu(SYSV_SB(sb), de->inode),
 					DT_UNKNOWN);
 			if (over) {
@@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	}
 
 done:
-	filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+	filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
 	unlock_kernel();
 	return 0;
 }
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 6598a5037ac8..4fae57d9d115 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -41,7 +41,7 @@
 #define uint(x) xuint(x)
 #define xuint(x) __le ## x
 
-extern inline int find_next_one_bit (void * addr, int size, int offset)
+static inline int find_next_one_bit (void * addr, int size, int offset)
 {
 	uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG);
 	int result = offset & ~(BITS_PER_LONG-1);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4014f17d382e..395e582ee542 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
 		printk(KERN_ERR "udf: inode_bmap: block < 0\n");
 		return -1;
 	}
-	if (!inode)
-	{
-		printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
-		return -1;
-	}
 
 	*extoffset = 0;
 	*elen = 0;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index d0915fba155a..7c10c68902ae 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -491,7 +491,7 @@ int ufs_delete_entry (struct inode * inode, struct ufs_dir_entry * dir,
 	
 	UFSD(("ino %u, reclen %u, namlen %u, name %s\n",
 		fs32_to_cpu(sb, de->d_ino),
-		fs16to_cpu(sb, de->d_reclen),
+		fs16_to_cpu(sb, de->d_reclen),
 		ufs_get_de_namlen(sb, de), de->d_name))
 
 	while (i < bh->b_size) {
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 54828ebcf1ba..e9a42c711a9e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1275,7 +1275,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head *bh;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
@@ -1296,14 +1296,16 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
 		blk++;
 	}
 out:
-	if (len == towrite)
+	if (len == towrite) {
+		mutex_unlock(&inode->i_mutex);
 		return err;
+	}
 	if (inode->i_size < off+len-towrite)
 		i_size_write(inode, off+len-towrite);
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
diff --git a/fs/xattr.c b/fs/xattr.c
index bcc2156d4d28..80eca7d3d69f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -19,6 +19,149 @@
 #include <linux/fsnotify.h>
 #include <asm/uaccess.h>
 
+
+/*
+ * Check permissions for extended attribute access.  This is a bit complicated
+ * because different namespaces have very different rules.
+ */
+static int
+xattr_permission(struct inode *inode, const char *name, int mask)
+{
+	/*
+	 * We can never set or remove an extended attribute on a read-only
+	 * filesystem  or on an immutable / append-only inode.
+	 */
+	if (mask & MAY_WRITE) {
+		if (IS_RDONLY(inode))
+			return -EROFS;
+		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+			return -EPERM;
+	}
+
+	/*
+	 * No restriction for security.* and system.* from the VFS.  Decision
+	 * on these is left to the underlying filesystem / security module.
+	 */
+	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
+	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return 0;
+
+	/*
+	 * The trusted.* namespace can only accessed by a privilegued user.
+	 */
+	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
+
+	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
+		if (!S_ISREG(inode->i_mode) &&
+		    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+			return -EPERM;
+	}
+
+	return permission(inode, mask, NULL);
+}
+
+int
+vfs_setxattr(struct dentry *dentry, char *name, void *value,
+		size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = xattr_permission(inode, name, MAY_WRITE);
+	if (error)
+		return error;
+
+	mutex_lock(&inode->i_mutex);
+	error = security_inode_setxattr(dentry, name, value, size, flags);
+	if (error)
+		goto out;
+	error = -EOPNOTSUPP;
+	if (inode->i_op->setxattr) {
+		error = inode->i_op->setxattr(dentry, name, value, size, flags);
+		if (!error) {
+			fsnotify_xattr(dentry);
+			security_inode_post_setxattr(dentry, name, value,
+						     size, flags);
+		}
+	} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
+				XATTR_SECURITY_PREFIX_LEN)) {
+		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+		error = security_inode_setsecurity(inode, suffix, value,
+						   size, flags);
+		if (!error)
+			fsnotify_xattr(dentry);
+	}
+out:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(vfs_setxattr);
+
+ssize_t
+vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = xattr_permission(inode, name, MAY_READ);
+	if (error)
+		return error;
+
+	error = security_inode_getxattr(dentry, name);
+	if (error)
+		return error;
+
+	if (inode->i_op->getxattr)
+		error = inode->i_op->getxattr(dentry, name, value, size);
+	else
+		error = -EOPNOTSUPP;
+
+	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+				XATTR_SECURITY_PREFIX_LEN)) {
+		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+		int ret = security_inode_getsecurity(inode, suffix, value,
+						     size, error);
+		/*
+		 * Only overwrite the return value if a security module
+		 * is actually active.
+		 */
+		if (ret != -EOPNOTSUPP)
+			error = ret;
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(vfs_getxattr);
+
+int
+vfs_removexattr(struct dentry *dentry, char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	if (!inode->i_op->removexattr)
+		return -EOPNOTSUPP;
+
+	error = xattr_permission(inode, name, MAY_WRITE);
+	if (error)
+		return error;
+
+	error = security_inode_removexattr(dentry, name);
+	if (error)
+		return error;
+
+	mutex_lock(&inode->i_mutex);
+	error = inode->i_op->removexattr(dentry, name);
+	mutex_unlock(&inode->i_mutex);
+
+	if (!error)
+		fsnotify_xattr(dentry);
+	return error;
+}
+EXPORT_SYMBOL_GPL(vfs_removexattr);
+
+
 /*
  * Extended attribute SET operations
  */
@@ -51,29 +194,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
 		}
 	}
 
-	down(&d->d_inode->i_sem);
-	error = security_inode_setxattr(d, kname, kvalue, size, flags);
-	if (error)
-		goto out;
-	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
-		error = d->d_inode->i_op->setxattr(d, kname, kvalue,
-						   size, flags);
-		if (!error) {
-			fsnotify_xattr(d);
-			security_inode_post_setxattr(d, kname, kvalue,
-						     size, flags);
-		}
-	} else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
-			    sizeof XATTR_SECURITY_PREFIX - 1)) {
-		const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
-		error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
-						   size, flags);
-		if (!error)
-			fsnotify_xattr(d);
-	}
-out:
-	up(&d->d_inode->i_sem);
+	error = vfs_setxattr(d, kname, kvalue, size, flags);
 	kfree(kvalue);
 	return error;
 }
@@ -147,22 +268,7 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
 			return -ENOMEM;
 	}
 
-	error = security_inode_getxattr(d, kname);
-	if (error)
-		goto out;
-	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
-		error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
-
-	if (!strncmp(kname, XATTR_SECURITY_PREFIX,
-		     sizeof XATTR_SECURITY_PREFIX - 1)) {
-		const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
-		int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue,
-						    size, error);
-		/* Security module active: overwrite error value */
-		if (rv != -EOPNOTSUPP)
-			error = rv;
-	}
+	error = vfs_getxattr(d, kname, kvalue, size);
 	if (error > 0) {
 		if (size && copy_to_user(value, kvalue, error))
 			error = -EFAULT;
@@ -171,7 +277,6 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
 		   than XATTR_SIZE_MAX bytes. Not possible. */
 		error = -E2BIG;
 	}
-out:
 	kfree(kvalue);
 	return error;
 }
@@ -318,19 +423,7 @@ removexattr(struct dentry *d, char __user *name)
 	if (error < 0)
 		return error;
 
-	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
-		error = security_inode_removexattr(d, kname);
-		if (error)
-			goto out;
-		down(&d->d_inode->i_sem);
-		error = d->d_inode->i_op->removexattr(d, kname);
-		up(&d->d_inode->i_sem);
-		if (!error)
-			fsnotify_xattr(d);
-	}
-out:
-	return error;
+	return vfs_removexattr(d, kname);
 }
 
 asmlinkage long
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
index ce773d89a923..d3369b6ca168 100644
--- a/fs/xfs/linux-2.6/mutex.h
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -19,7 +19,7 @@
 #define __XFS_SUPPORT_MUTEX_H__
 
 #include <linux/spinlock.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 /*
  * Map the mutex'es from IRIX to Linux semaphores.
@@ -28,12 +28,8 @@
  * callers.
  */
 #define MUTEX_DEFAULT		0x0
-typedef struct semaphore	mutex_t;
 
-#define mutex_init(lock, type, name)		sema_init(lock, 1)
-#define mutex_destroy(lock)			sema_init(lock, -99)
-#define mutex_lock(lock, num)			down(lock)
-#define mutex_trylock(lock)			(down_trylock(lock) ? 0 : 1)
-#define mutex_unlock(lock)			up(lock)
+typedef struct mutex		mutex_t;
+//#define mutex_destroy(lock)			do{}while(0)
 
 #endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index f89340c61bf2..4fa4b1a5187e 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,8 +79,7 @@ fs_flushinval_pages(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 
 	if (VN_CACHED(vp)) {
-		filemap_fdatawrite(ip->i_mapping);
-		filemap_fdatawait(ip->i_mapping);
+		filemap_write_and_wait(ip->i_mapping);
 
 		truncate_inode_pages(ip->i_mapping, first);
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b78b5eb9e96c..f98c5be3dfe7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -530,6 +530,8 @@ xfs_attrmulti_attr_set(
 	char			*kbuf;
 	int			error = EFAULT;
 
+	if (IS_RDONLY(&vp->v_inode))
+		return -EROFS;
 	if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
 		return EPERM;
 	if (len > XATTR_SIZE_MAX)
@@ -557,6 +559,9 @@ xfs_attrmulti_attr_remove(
 {
 	int			error;
 
+
+	if (IS_RDONLY(&vp->v_inode))
+		return -EROFS;
 	if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
 		return EPERM;
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c83ae15bb0e6..a7c9ba1a9f7b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -19,7 +19,6 @@
 #include <linux/compat.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
-#include <linux/ioctl32.h>
 #include <linux/syscalls.h>
 #include <linux/types.h>
 #include <linux/fs.h>
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 14215a7db59f..97fb1470cf28 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -54,6 +54,9 @@
 #include <linux/xattr.h>
 #include <linux/namei.h>
 
+#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) ||	\
+	(S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))
+
 /*
  * Change the requested timestamp in the given inode.
  * We don't lock across timestamp updates, and we don't log them but
@@ -203,7 +206,7 @@ validate_fields(
 		ip->i_nlink = va.va_nlink;
 		ip->i_blocks = va.va_nblocks;
 
-		/* we're under i_sem so i_size can't change under us */
+		/* we're under i_mutex so i_size can't change under us */
 		if (i_size_read(ip) != va.va_size)
 			i_size_write(ip, va.va_size);
 	}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 279e9bc92aba..885dfafeabee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -254,7 +254,7 @@ xfs_read(
 	}
 
 	if (unlikely(ioflags & IO_ISDIRECT))
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
@@ -286,7 +286,7 @@ xfs_read(
 
 unlock_isem:
 	if (unlikely(ioflags & IO_ISDIRECT))
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
@@ -655,7 +655,7 @@ relock:
 		iolock = XFS_IOLOCK_EXCL;
 		locktype = VRWLOCK_WRITE;
 
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 	} else {
 		iolock = XFS_IOLOCK_SHARED;
 		locktype = VRWLOCK_WRITE_DIRECT;
@@ -686,7 +686,7 @@ start:
 		int		dmflags = FILP_DELAY_FLAG(file);
 
 		if (need_isem)
-			dmflags |= DM_FLAGS_ISEM;
+			dmflags |= DM_FLAGS_IMUX;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
 		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
@@ -713,7 +713,7 @@ start:
 	}
 
 	if (likely(!(ioflags & IO_INVIS))) {
-		inode_update_time(inode, 1);
+		file_update_time(file);
 		xfs_ichgtime_fast(xip, inode,
 				  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	}
@@ -772,7 +772,7 @@ retry:
 		if (need_isem) {
 			/* demote the lock now the cached pages are gone */
 			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
 			locktype = VRWLOCK_WRITE_DIRECT;
@@ -817,14 +817,14 @@ retry:
 
 		xfs_rwunlock(bdp, locktype);
 		if (need_isem)
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
 			goto out_nounlocks;
 		if (need_isem)
-			down(&inode->i_sem);
+			mutex_lock(&inode->i_mutex);
 		xfs_rwlock(bdp, locktype);
 		pos = xip->i_d.di_size;
 		ret = 0;
@@ -926,7 +926,7 @@ retry:
 	
 		xfs_rwunlock(bdp, locktype);
 		if (need_isem)
-			up(&inode->i_sem);
+			mutex_unlock(&inode->i_mutex);
 
 		error = sync_page_range(inode, mapping, pos, ret);
 		if (!error)
@@ -938,7 +938,7 @@ retry:
 	xfs_rwunlock(bdp, locktype);
  out_unlock_isem:
 	if (need_isem)
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
  out_nounlocks:
 	return -error;
 }
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 00b5043dfa5a..772ac48329ea 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -104,7 +104,7 @@ xfs_qm_dqinit(
 	 */
 	if (brandnewdquot) {
 		dqp->dq_flnext = dqp->dq_flprev = dqp;
-		mutex_init(&dqp->q_qlock,  MUTEX_DEFAULT, "xdq");
+		mutex_init(&dqp->q_qlock);
 		initnsema(&dqp->q_flock, 1, "fdq");
 		sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
 
@@ -1382,7 +1382,7 @@ void
 xfs_dqlock(
 	xfs_dquot_t *dqp)
 {
-	mutex_lock(&(dqp->q_qlock), PINOD);
+	mutex_lock(&(dqp->q_qlock));
 }
 
 void
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5328a2937127..bb6991a7a617 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -167,7 +167,7 @@ xfs_Gqm_init(void)
 	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
 	xqm->qm_nrefs = 0;
 #ifdef DEBUG
-	mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
+	xfs_mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
 #endif
 	return xqm;
 }
@@ -1166,7 +1166,7 @@ xfs_qm_init_quotainfo(
 	qinf->qi_dqreclaims = 0;
 
 	/* mutex used to serialize quotaoffs */
-	mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
+	mutex_init(&qinf->qi_quotaofflock);
 
 	/* Precalc some constants */
 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -1285,7 +1285,7 @@ xfs_qm_list_init(
 	char		*str,
 	int		n)
 {
-	mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
+	mutex_init(&list->qh_lock);
 	list->qh_next = NULL;
 	list->qh_version = 0;
 	list->qh_nelems = 0;
@@ -2762,7 +2762,7 @@ STATIC void
 xfs_qm_freelist_init(xfs_frlist_t *ql)
 {
 	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
-	mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
+	mutex_init(&ql->qh_lock);
 	ql->qh_version = 0;
 	ql->qh_nelems = 0;
 }
@@ -2772,7 +2772,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 {
 	xfs_dquot_t	*dqp, *nextdqp;
 
-	mutex_lock(&ql->qh_lock, PINOD);
+	mutex_lock(&ql->qh_lock);
 	for (dqp = ql->qh_next;
 	     dqp != (xfs_dquot_t *)ql; ) {
 		xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 12da259f2fcb..4568deb6da86 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_IWARNLIMIT	5
 #define XFS_QM_RTBWARNLIMIT	5
 
-#define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock, PINOD))
+#define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock))
 #define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
 #define XFS_QM_HOLD(xqm)	((xqm)->qm_nrefs++)
 #define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index d9d2993de435..90402a1c3983 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -363,7 +363,7 @@ xfs_qm_init(void)
 		KERN_INFO "SGI XFS Quota Management subsystem\n";
 
 	printk(message);
-	mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock");
+	mutex_init(&xfs_Gqm_lock);
 	vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
 	xfs_qm_init_procfs();
 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 24690e1af659..86a1d09f48d5 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -233,7 +233,7 @@ xfs_qm_scall_quotaoff(
 	 */
 	ASSERT(mp->m_quotainfo);
 	if (mp->m_quotainfo)
-		mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+		mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
 
 	ASSERT(mp->m_quotainfo);
 
@@ -508,7 +508,7 @@ xfs_qm_scall_quotaon(
 	/*
 	 * Switch on quota enforcement in core.
 	 */
-	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+	mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
 	mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
 	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 
@@ -617,7 +617,7 @@ xfs_qm_scall_setqlim(
 	 * a quotaoff from happening). (XXXThis doesn't currently happen
 	 * because we take the vfslock before calling xfs_qm_sysent).
 	 */
-	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+	mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
 
 	/*
 	 * Get the dquot (locked), and join it to the transaction.
@@ -1426,7 +1426,7 @@ xfs_qm_internalqcheck(
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
 	XFS_bflush(mp->m_ddev_targp);
 
-	mutex_lock(&qcheck_lock, PINOD);
+	mutex_lock(&qcheck_lock);
 	/* There should be absolutely no quota activity while this
 	   is going on. */
 	qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 7a9f3beb818c..b7ddd04aae32 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -51,7 +51,7 @@
 #define XFS_QI_MPLNEXT(mp)	((mp)->m_quotainfo->qi_dqlist.qh_next)
 #define XFS_QI_MPLNDQUOTS(mp)	((mp)->m_quotainfo->qi_dqlist.qh_nelems)
 
-#define XQMLCK(h)			(mutex_lock(&((h)->qh_lock), PINOD))
+#define XQMLCK(h)			(mutex_lock(&((h)->qh_lock)))
 #define XQMUNLCK(h)			(mutex_unlock(&((h)->qh_lock)))
 #ifdef DEBUG
 struct xfs_dqhash;
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 70ce40914c8a..69ec4f540c3a 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -24,7 +24,7 @@ static uuid_t	*uuid_table;
 void
 uuid_init(void)
 {
-	mutex_init(&uuid_monitor, MUTEX_DEFAULT, "uuid_monitor");
+	mutex_init(&uuid_monitor);
 }
 
 /*
@@ -94,7 +94,7 @@ uuid_table_insert(uuid_t *uuid)
 {
 	int	i, hole;
 
-	mutex_lock(&uuid_monitor, PVFS);
+	mutex_lock(&uuid_monitor);
 	for (i = 0, hole = -1; i < uuid_table_size; i++) {
 		if (uuid_is_nil(&uuid_table[i])) {
 			hole = i;
@@ -122,7 +122,7 @@ uuid_table_remove(uuid_t *uuid)
 {
 	int	i;
 
-	mutex_lock(&uuid_monitor, PVFS);
+	mutex_lock(&uuid_monitor);
 	for (i = 0; i < uuid_table_size; i++) {
 		if (uuid_is_nil(&uuid_table[i]))
 			continue;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5484eeb460c8..1a11c2b51701 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -117,11 +117,6 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
 	     ip->i_d.di_anextents == 0))
 		return(ENOATTR);
 
-	if (!(flags & (ATTR_KERNACCESS|ATTR_SECURE))) {
-		if ((error = xfs_iaccess(ip, S_IRUSR, cred)))
-			return(XFS_ERROR(error));
-	}
-
 	/*
 	 * Fill in the arg structure for this request.
 	 */
@@ -425,7 +420,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
 	     struct cred *cred)
 {
 	xfs_inode_t	*dp;
-	int             namelen, error;
+	int             namelen;
 
 	namelen = strlen(name);
 	if (namelen >= MAXNAMELEN)
@@ -437,14 +432,6 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return (EIO);
 
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!(flags & ATTR_SECURE) &&
-	     (error = xfs_iaccess(dp, S_IWUSR, cred))) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(error));
-	}
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
 	return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
 }
 
@@ -579,7 +566,7 @@ int
 xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
 {
 	xfs_inode_t         *dp;
-	int                 namelen, error;
+	int                 namelen;
 
 	namelen = strlen(name);
 	if (namelen >= MAXNAMELEN)
@@ -592,11 +579,7 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
 		return (EIO);
 
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!(flags & ATTR_SECURE) &&
-	     (error = xfs_iaccess(dp, S_IWUSR, cred))) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(error));
-	} else if (XFS_IFORK_Q(dp) == 0 ||
+	if (XFS_IFORK_Q(dp) == 0 ||
 		   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
 		    dp->i_d.di_anextents == 0)) {
 		xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -668,12 +651,6 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
 		return (EIO);
 
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!(flags & ATTR_SECURE) &&
-	     (error = xfs_iaccess(dp, S_IRUSR, cred))) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(error));
-	}
-
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 864bf6955689..b4c7f2bc55a0 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -152,7 +152,7 @@ typedef enum {
 
 #define DM_FLAGS_NDELAY		0x001	/* return EAGAIN after dm_pending() */
 #define DM_FLAGS_UNWANTED	0x002	/* event not in fsys dm_eventset_t */
-#define DM_FLAGS_ISEM		0x004	/* thread holds i_sem */
+#define DM_FLAGS_IMUX		0x004	/* thread holds i_mutex */
 #define DM_FLAGS_IALLOCSEM_RD	0x010	/* thread holds i_alloc_sem rd */
 #define DM_FLAGS_IALLOCSEM_WR	0x020	/* thread holds i_alloc_sem wr */
 
@@ -161,21 +161,21 @@ typedef enum {
  */
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_ISEM : 0)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+			      DM_FLAGS_IMUX : 0)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
 #endif
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
     (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
 #endif
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      0 : DM_FLAGS_ISEM)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_ISEM)
+			      0 : DM_FLAGS_IMUX)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IMUX)
 #endif
 
 
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 158829ca56f6..f40d4391fcfc 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -30,13 +30,7 @@
  * By comparing each compnent, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
-static
-#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96))
-__attribute__((unused))	/* gcc 2.95, 2.96 miscompile this when inlined */
-#else
-__inline__
-#endif
-xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
+static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 {
 	if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
 		return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 541d5dd474be..303af86739bf 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -117,7 +117,7 @@ xfs_mount_init(void)
 
 	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
 	spinlock_init(&mp->m_sb_lock, "xfs_sb");
-	mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock");
+	mutex_init(&mp->m_ilock);
 	initnsema(&mp->m_growlock, 1, "xfs_grow");
 	/*
 	 * Initialize the AIL.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 08b2e0a5d807..3432fd5a3986 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -533,7 +533,7 @@ typedef struct xfs_mod_sb {
 	int		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
-#define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock), PINOD)
+#define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock))
 #define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
 #define	XFS_SB_LOCK(mp)		mutex_spinlock(&(mp)->m_sb_lock)
 #define	XFS_SB_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_sb_lock,(s))