From 8f6cff98477edbcd8ae4976734ba7edd07bdd244 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 13 Oct 2006 12:42:36 -0500
Subject: JFS: pageno needs to be long

diRead and diWrite are representing the page number as an unsigned int.
This causes file system corruption on volumes larger than 16TB.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_imap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 489a3d63002d..ee9b473b7b80 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -318,7 +318,7 @@ int diRead(struct inode *ip)
 	struct inomap *imap;
 	int block_offset;
 	int inodes_left;
-	uint pageno;
+	unsigned long pageno;
 	int rel_inode;
 
 	jfs_info("diRead: ino = %ld", ip->i_ino);
@@ -606,7 +606,7 @@ int diWrite(tid_t tid, struct inode *ip)
 	int block_offset;
 	int inodes_left;
 	struct metapage *mp;
-	uint pageno;
+	unsigned long pageno;
 	int rel_inode;
 	int dioffset;
 	struct inode *ipimap;
-- 
cgit v1.2.3


From 62752ee198dca9209b7dee504763e51b11e9e0ca Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 10:31:38 +0200
Subject: [PATCH] Take i_mutex in splice_from_pipe()

The splice_actor may be calling ->prepare_write() and ->commit_write(). We
want i_mutex on the inode being written to before calling those so that we
don't race i_size changes.

The double locking behavior is done elsewhere in splice.c, and if we
eventually want _nolock variants of generic_file_splice_write(), fs modules
might have to replicate the nasty locking code. We introduce
inode_double_lock() and inode_double_unlock() to consolidate the locking
rules into one set of functions.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/inode.c         | 36 ++++++++++++++++++++++++++++++++++++
 fs/splice.c        | 24 +++++++++++-------------
 include/linux/fs.h |  3 +++
 3 files changed, 50 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index d9a21d122926..26cdb115ce67 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
 	wake_up_bit(&inode->i_state, __I_LOCK);
 }
 
+/*
+ * We rarely want to lock two inodes that do not have a parent/child
+ * relationship (such as directory, child inode) simultaneously. The
+ * vast majority of file systems should be able to get along fine
+ * without this. Do not use these functions except as a last resort.
+ */
+void inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+		if (inode1)
+			mutex_lock(&inode1->i_mutex);
+		else if (inode2)
+			mutex_lock(&inode2->i_mutex);
+		return;
+	}
+
+	if (inode1 < inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	} else {
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+	}
+}
+EXPORT_SYMBOL(inode_double_lock);
+
+void inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1)
+		mutex_unlock(&inode1->i_mutex);
+
+	if (inode2 && inode2 != inode1)
+		mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(inode_double_unlock);
+
 static __initdata unsigned long ihash_entries;
 static int __init set_ihash_entries(char *str)
 {
diff --git a/fs/splice.c b/fs/splice.c
index a567010b62ac..c1072b6940c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
+	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
 
 	for (;;) {
 		if (pipe->nrbufs) {
@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	inode_double_unlock(inode, pipe->inode);
 
 	if (do_wakeup) {
 		smp_mb();
@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by inode address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	if (ipipe->inode < opipe->inode) {
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
-	}
+	inode_double_lock(ipipe->inode, opipe->inode);
 
 	do {
 		if (!opipe->readers) {
@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		i++;
 	} while (len);
 
-	mutex_unlock(&ipipe->inode->i_mutex);
-	mutex_unlock(&opipe->inode->i_mutex);
+	inode_double_unlock(ipipe->inode, opipe->inode);
 
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 661c7c572149..853a02f23936 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
+extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
+extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
-- 
cgit v1.2.3


From 6da61809822c22634a3de2dcb3c60283b836a88a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 18:43:07 +0200
Subject: [PATCH] Introduce generic_file_splice_write_nolock()

This allows file systems to manage their own i_mutex locking while
still re-using the generic_file_splice_write() logic.

OCFS2 in particular wants this so that it can order cluster locks within
i_mutex.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h |  2 ++
 2 files changed, 68 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index c1072b6940c3..68e20e65c6e1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -707,13 +707,12 @@ out_ret:
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
-			 loff_t *ppos, size_t len, unsigned int flags,
-			 splice_actor *actor)
+static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+				  struct file *out, loff_t *ppos, size_t len,
+				  unsigned int flags, splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
-	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -723,14 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	/*
-	 * The actor worker might be calling ->prepare_write and
-	 * ->commit_write. Most of the time, these expect i_mutex to
-	 * be held. Since this may result in an ABBA deadlock with
-	 * pipe->inode, we have to order lock acquiry here.
-	 */
-	inode_double_lock(inode, pipe->inode);
-
 	for (;;) {
 		if (pipe->nrbufs) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -803,8 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	inode_double_unlock(inode, pipe->inode);
-
 	if (do_wakeup) {
 		smp_mb();
 		if (waitqueue_active(&pipe->wait))
@@ -815,6 +804,69 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+			 loff_t *ppos, size_t len, unsigned int flags,
+			 splice_actor *actor)
+{
+	ssize_t ret;
+	struct inode *inode = out->f_mapping->host;
+
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+	inode_double_unlock(inode, pipe->inode);
+
+	return ret;
+}
+
+/**
+ * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file. The caller is responsible
+ * for acquiring i_mutex on both inodes.
+ *
+ */
+ssize_t
+generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
+				 loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	int err;
+
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	if (ret > 0) {
+		*ppos += ret;
+
+		/*
+		 * If file or inode is SYNC and we actually wrote some data,
+		 * sync it.
+		 */
+		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			err = generic_osync_inode(inode, mapping,
+						  OSYNC_METADATA|OSYNC_DATA);
+
+			if (err)
+				ret = err;
+		}
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(generic_file_splice_write_nolock);
+
 /**
  * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 853a02f23936..d695ba2346a3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1758,6 +1758,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
+		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-- 
cgit v1.2.3


From 8c34e2d63231d4bf4852bac8521883944d770fe3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 17 Oct 2006 19:43:22 +0200
Subject: [PATCH] Remove SUID when splicing into an inode

Originally from Mark Fasheh <mark.fasheh@oracle.com>

generic_file_splice_write() does not remove S_ISUID or S_ISGID. This is
inconsistent with the way we generally write to files.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 68e20e65c6e1..49fb9f129938 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -845,6 +845,10 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
+	err = remove_suid(out->f_dentry);
+	if (unlikely(err))
+		return err;
+
 	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
 		*ppos += ret;
@@ -883,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			  loff_t *ppos, size_t len, unsigned int flags)
 {
 	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
 	ssize_t ret;
+	int err;
+
+	err = should_remove_suid(out->f_dentry);
+	if (unlikely(err)) {
+		mutex_lock(&inode->i_mutex);
+		err = __remove_suid(out->f_dentry, err);
+		mutex_unlock(&inode->i_mutex);
+		if (err)
+			return err;
+	}
 
 	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
-		struct inode *inode = mapping->host;
-
 		*ppos += ret;
 
 		/*
@@ -896,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		 * sync it.
 		 */
 		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-			int err;
-
 			mutex_lock(&inode->i_mutex);
 			err = generic_osync_inode(inode, mapping,
 						  OSYNC_METADATA|OSYNC_DATA);
-- 
cgit v1.2.3


From 9eaef27b36a6b716384948da94b8fc5bfba7b712 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 21 Oct 2006 10:24:20 -0700
Subject: [PATCH] VFS: Make d_materialise_unique() enforce directory uniqueness

If the caller tries to instantiate a directory using an inode that already
has a dentry alias, then we attempt to rename the existing dentry instead
of instantiating a new one.  Fail with an ELOOP error if the rename would
affect one of our parent directories.

This behaviour is needed in order to avoid issues such as

  http://bugzilla.kernel.org/show_bug.cgi?id=7178

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c  | 137 +++++++++++++++++++++++++++++++++++++++++++----------------
 fs/nfs/dir.c |   7 ++-
 2 files changed, 106 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2bac4ba1d1d3..a1ff91eef108 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1469,23 +1469,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
  * deleted it.
  */
  
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way.
  */
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
 	struct hlist_head *list;
 
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	/*
 	 * XXXX: do we really need to take target->d_lock?
@@ -1536,9 +1534,83 @@ already_unhashed:
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+	spin_lock(&dcache_lock);
+	d_move_locked(dentry, target);
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+	struct dentry *p;
+
+	for (p = p2; p->d_parent != p; p = p->d_parent) {
+		if (p->d_parent == p1)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+	struct mutex *m1 = NULL, *m2 = NULL;
+	struct dentry *ret;
+
+	/* If alias and dentry share a parent, then no extra locks required */
+	if (alias->d_parent == dentry->d_parent)
+		goto out_unalias;
+
+	/* Check for loops */
+	ret = ERR_PTR(-ELOOP);
+	if (d_isparent(alias, dentry))
+		goto out_err;
+
+	/* See lock_rename() */
+	ret = ERR_PTR(-EBUSY);
+	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+		goto out_err;
+	m1 = &dentry->d_sb->s_vfs_rename_mutex;
+	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+		goto out_err;
+	m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+	d_move_locked(alias, dentry);
+	ret = alias;
+out_err:
+	spin_unlock(&dcache_lock);
+	if (m2)
+		mutex_unlock(m2);
+	if (m1)
+		mutex_unlock(m1);
+	return ret;
+}
+
 /*
  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
  * named dentry in place of the dentry to be replaced.
@@ -1581,7 +1653,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
-	struct dentry *alias, *actual;
+	struct dentry *actual;
 
 	BUG_ON(!d_unhashed(dentry));
 
@@ -1593,26 +1665,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 		goto found_lock;
 	}
 
-	/* See if a disconnected directory already exists as an anonymous root
-	 * that we should splice into the tree instead */
-	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
-		spin_lock(&alias->d_lock);
-
-		/* Is this a mountpoint that we could splice into our tree? */
-		if (IS_ROOT(alias))
-			goto connect_mountpoint;
-
-		if (alias->d_name.len == dentry->d_name.len &&
-		    alias->d_parent == dentry->d_parent &&
-		    memcmp(alias->d_name.name,
-			   dentry->d_name.name,
-			   dentry->d_name.len) == 0)
-			goto replace_with_alias;
-
-		spin_unlock(&alias->d_lock);
-
-		/* Doh! Seem to be aliasing directories for some reason... */
-		dput(alias);
+	if (S_ISDIR(inode->i_mode)) {
+		struct dentry *alias;
+
+		/* Does an aliased dentry already exist? */
+		alias = __d_find_alias(inode, 0);
+		if (alias) {
+			actual = alias;
+			/* Is this an anonymous mountpoint that we could splice
+			 * into our tree? */
+			if (IS_ROOT(alias)) {
+				spin_lock(&alias->d_lock);
+				__d_materialise_dentry(dentry, alias);
+				__d_drop(alias);
+				goto found;
+			}
+			/* Nope, but we must(!) avoid directory aliasing */
+			actual = __d_unalias(dentry, alias);
+			if (IS_ERR(actual))
+				dput(alias);
+			goto out_nolock;
+		}
 	}
 
 	/* Add a unique reference */
@@ -1628,7 +1701,7 @@ found:
 	_d_rehash(actual);
 	spin_unlock(&actual->d_lock);
 	spin_unlock(&dcache_lock);
-
+out_nolock:
 	if (actual == dentry) {
 		security_d_instantiate(dentry, inode);
 		return NULL;
@@ -1637,16 +1710,6 @@ found:
 	iput(inode);
 	return actual;
 
-	/* Convert the anonymous/root alias into an ordinary dentry */
-connect_mountpoint:
-	__d_materialise_dentry(dentry, alias);
-
-	/* Replace the candidate dentry with the alias in the tree */
-replace_with_alias:
-	__d_drop(alias);
-	actual = alias;
-	goto found;
-
 shouldnt_be_hashed:
 	spin_unlock(&dcache_lock);
 	BUG();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4133ef5264e5..27b5a1051b1c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -935,8 +935,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 no_entry:
 	res = d_materialise_unique(dentry, inode);
-	if (res != NULL)
+	if (res != NULL) {
+		if (IS_ERR(res))
+			goto out_unlock;
 		dentry = res;
+	}
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unlock:
@@ -1132,6 +1135,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
+		if (IS_ERR(alias))
+			return NULL;
 		dentry = alias;
 	}
 
-- 
cgit v1.2.3


From fc22617e451f23b466d4d63bb016f5f6111b69e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 21 Oct 2006 10:24:24 -0700
Subject: [PATCH] NFS: Cache invalidation fixup

If someone has renamed a directory on the server, triggering the d_move
code in d_materialise_unique(), then we need to invalidate the cached
directory information in the source parent directory.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 27b5a1051b1c..b34cd16f472f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -936,8 +936,14 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
+		struct dentry *parent;
 		if (IS_ERR(res))
 			goto out_unlock;
+		/* Was a directory renamed! */
+		parent = dget_parent(res);
+		if (!IS_ROOT(parent))
+			nfs_mark_for_revalidate(parent->d_inode);
+		dput(parent);
 		dentry = res;
 	}
 	nfs_renew_times(dentry);
-- 
cgit v1.2.3


From bcbaecbb9968750d4bfb2686a97e396f681f88ef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Oct 2006 16:49:36 +1000
Subject: [CRYPTO] users: Select ECB/CBC where needed

CRYPTO_MANAGER is selected automatically by CONFIG_ECB and CONFIG_CBC.

config CRYPTO_ECB
        tristate "ECB support"
        select CRYPTO_BLKCIPHER
        select CRYPTO_MANAGER


I've added CONFIG_ECB to the ones you mentioned and CONFIG_CBC to
gssapi.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/net/Kconfig   | 1 +
 fs/Kconfig            | 2 ++
 net/ieee80211/Kconfig | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'fs')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index e2ed24918a58..e38846eb51fa 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2717,6 +2717,7 @@ config PPP_MPPE
        select CRYPTO
        select CRYPTO_SHA1
        select CRYPTO_ARC4
+       select CRYPTO_ECB
        ---help---
          Support for the MPPE Encryption protocol, as employed by the
 	 Microsoft Point-to-Point Tunneling Protocol.
diff --git a/fs/Kconfig b/fs/Kconfig
index fee318e6f4bb..133dcc8a4150 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1777,6 +1777,7 @@ config RPCSEC_GSS_KRB5
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CBC
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on Kerberos V5. This is required for
@@ -1795,6 +1796,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO_MD5
 	select CRYPTO_DES
 	select CRYPTO_CAST5
+	select CRYPTO_CBC
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index f7e84e9d13ad..a64be6cdf078 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -32,6 +32,7 @@ config IEEE80211_CRYPT_WEP
 	depends on IEEE80211
 	select CRYPTO
 	select CRYPTO_ARC4
+	select CRYPTO_ECB
 	select CRC32
 	---help---
 	Include software based cipher suites in support of IEEE
@@ -58,6 +59,7 @@ config IEEE80211_CRYPT_TKIP
 	depends on IEEE80211 && NET_RADIO
 	select CRYPTO
 	select CRYPTO_MICHAEL_MIC
+	select CRYPTO_ECB
 	select CRC32
 	---help---
 	Include software based cipher suites in support of IEEE 802.11i
-- 
cgit v1.2.3


From 2ae88149a27cadf2840e0ab8155bef13be285c03 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 28 Oct 2006 10:38:23 -0700
Subject: [PATCH] mm: clean up pagecache allocation

- Consolidate page_cache_alloc

- Fix splice: only the pagecache pages and filesystem data need to use
  mapping_gfp_mask.

- Fix grab_cache_page_nowait: same as splice, also honour NUMA placement.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/splice.c             |  9 ++++-----
 include/linux/pagemap.h | 14 +++++++++-----
 mm/filemap.c            | 24 ++++++------------------
 3 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 49fb9f129938..8d705954d294 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -74,7 +74,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 		wait_on_page_writeback(page);
 
 		if (PagePrivate(page))
-			try_to_release_page(page, mapping_gfp_mask(mapping));
+			try_to_release_page(page, GFP_KERNEL);
 
 		/*
 		 * If we succeeded in removing the mapping, set LRU flag
@@ -333,7 +333,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 				break;
 
 			error = add_to_page_cache_lru(page, mapping, index,
-					      mapping_gfp_mask(mapping));
+					      GFP_KERNEL);
 			if (unlikely(error)) {
 				page_cache_release(page);
 				if (error == -EEXIST)
@@ -557,7 +557,6 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
 	struct file *file = sd->file;
 	struct address_space *mapping = file->f_mapping;
-	gfp_t gfp_mask = mapping_gfp_mask(mapping);
 	unsigned int offset, this_len;
 	struct page *page;
 	pgoff_t index;
@@ -591,7 +590,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			goto find_page;
 
 		page = buf->page;
-		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
+		if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) {
 			unlock_page(page);
 			goto find_page;
 		}
@@ -613,7 +612,7 @@ find_page:
 			 * This will also lock the page
 			 */
 			ret = add_to_page_cache_lru(page, mapping, index,
-						    gfp_mask);
+						    GFP_KERNEL);
 			if (unlikely(ret))
 				goto out;
 		}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 64f950925151..c3e255bf8594 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -52,19 +52,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 void release_pages(struct page **pages, int nr, int cold);
 
 #ifdef CONFIG_NUMA
-extern struct page *page_cache_alloc(struct address_space *x);
-extern struct page *page_cache_alloc_cold(struct address_space *x);
+extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+	return alloc_pages(gfp, 0);
+}
+#endif
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return __page_cache_alloc(mapping_gfp_mask(x));
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
-#endif
 
 typedef int filler_t(void *, struct page *);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index cb26e33fd0ff..7b84dc814347 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -467,25 +467,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 }
 
 #ifdef CONFIG_NUMA
-struct page *page_cache_alloc(struct address_space *x)
+struct page *__page_cache_alloc(gfp_t gfp)
 {
 	if (cpuset_do_page_mem_spread()) {
 		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x), 0);
+		return alloc_pages_node(n, gfp, 0);
 	}
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return alloc_pages(gfp, 0);
 }
-EXPORT_SYMBOL(page_cache_alloc);
-
-struct page *page_cache_alloc_cold(struct address_space *x)
-{
-	if (cpuset_do_page_mem_spread()) {
-		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0);
-	}
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
-}
-EXPORT_SYMBOL(page_cache_alloc_cold);
+EXPORT_SYMBOL(__page_cache_alloc);
 #endif
 
 static int __sleep_on_page_lock(void *word)
@@ -826,7 +816,6 @@ struct page *
 grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 {
 	struct page *page = find_get_page(mapping, index);
-	gfp_t gfp_mask;
 
 	if (page) {
 		if (!TestSetPageLocked(page))
@@ -834,9 +823,8 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 		page_cache_release(page);
 		return NULL;
 	}
-	gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS;
-	page = alloc_pages(gfp_mask, 0);
-	if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) {
+	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
+	if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) {
 		page_cache_release(page);
 		page = NULL;
 	}
-- 
cgit v1.2.3


From 1939e49a0cb9d73785857bf312f4f65661b4b513 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 28 Oct 2006 10:38:26 -0700
Subject: [PATCH] ext4: fix printk format warnings

fs/ext4/resize.c:72: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:76: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:81: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:85: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:89: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:89: warning: long long unsigned int format, __u64 arg (arg 5)
fs/ext4/resize.c:93: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:93: warning: long long unsigned int format, __u64 arg (arg 5)
fs/ext4/resize.c:98: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:103: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:109: warning: long long unsigned int format, __u64 arg (arg 4)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/resize.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e9578052cd3..4fe49c3661b2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -69,44 +69,49 @@ static int verify_group_input(struct super_block *sb,
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap not in group (block %llu)",
-			     input->block_bitmap);
+			     (unsigned long long)input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap not in group (block %llu)",
-			     input->inode_bitmap);
+			     (unsigned long long)input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode table not in group (blocks %llu-%llu)",
-			     input->inode_table, itend - 1);
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap same as inode bitmap (%llu)",
-			     input->block_bitmap);
+			     (unsigned long long)input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in inode table (%llu-%llu)",
-			     input->block_bitmap, input->inode_table, itend-1);
+			     (unsigned long long)input->block_bitmap,
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
-			     input->inode_bitmap, input->inode_table, itend-1);
+			     (unsigned long long)input->inode_bitmap,
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
-			     input->block_bitmap, start, metaend - 1);
+			     (unsigned long long)input->block_bitmap,
+			     start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
-			     input->inode_bitmap, start, metaend - 1);
+			     (unsigned long long)input->inode_bitmap,
+			     start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode table (%llu-%llu) overlaps"
 			     "GDT table (%llu-%llu)",
-			     input->inode_table, itend - 1, start, metaend - 1);
+			     (unsigned long long)input->inode_table,
+			     itend - 1, start, metaend - 1);
 	else
 		err = 0;
 	brelse(bh);
-- 
cgit v1.2.3


From f58a74dca88d48b0669609b4957f3dd757bdc898 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 28 Oct 2006 10:38:27 -0700
Subject: [PATCH] jbd: journal_dirty_data re-check for unmapped buffers

When running several fsx's and other filesystem stress tests, we found
cases where an unmapped buffer was still being sent to submit_bh by the
ext3 dirty data journaling code.

I saw this happen in two ways, both related to another thread doing a
truncate which would unmap the buffer in question.

Either we would get into journal_dirty_data with a bh which was already
unmapped (although journal_dirty_data_fn had checked for this earlier, the
state was not locked at that point), or it would get unmapped in the middle
of journal_dirty_data when we dropped locks to call sync_dirty_buffer.

By re-checking for mapped state after we've acquired the bh state lock, we
should avoid these races.  If we find a buffer which is no longer mapped,
we essentially ignore it, because journal_unmap_buffer has already decided
that this buffer can go away.

I've also added tracepoints in these two cases, and made a couple other
tracepoint changes that I found useful in debugging this.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d5c63047a8b3..4f82bcd63e48 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	 */
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
+
+	/* Now that we have bh_state locked, are we really still mapped? */
+	if (!buffer_mapped(bh)) {
+		JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
+		goto no_journal;
+	}
+
 	if (jh->b_transaction) {
 		JBUFFER_TRACE(jh, "has transaction");
 		if (jh->b_transaction != handle->h_transaction) {
@@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				sync_dirty_buffer(bh);
 				jbd_lock_bh_state(bh);
 				spin_lock(&journal->j_list_lock);
+				/* Since we dropped the lock... */
+				if (!buffer_mapped(bh)) {
+					JBUFFER_TRACE(jh, "buffer got unmapped");
+					goto no_journal;
+				}
 				/* The buffer may become locked again at any
 				   time if it is redirtied */
 			}
@@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			}
 		}
 	} else if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "on committing transaction");
 		if (jh->b_jlist == BJ_Locked) {
 			/*
 			 * The buffer is on the committing transaction's locked
@@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * can remove it's next_transaction pointer from the
 		 * running transaction if that is set, but nothing
 		 * else. */
-		JBUFFER_TRACE(jh, "on committing transaction");
 		set_buffer_freed(bh);
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction ==
@@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * i_size already for this truncate so recovery will not
 		 * expose the disk blocks we are discarding here.) */
 		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		JBUFFER_TRACE(jh, "on running transaction");
 		may_free = __dispose_buffer(jh, transaction);
 	}
 
-- 
cgit v1.2.3


From 9b57988db9b2c81794546cb792133f0cfd064ea8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 28 Oct 2006 10:38:28 -0700
Subject: [PATCH] jbd2: journal_dirty_data re-check for unmapped buffers

When running several fsx's and other filesystem stress tests, we found
cases where an unmapped buffer was still being sent to submit_bh by the
ext3 dirty data journaling code.

I saw this happen in two ways, both related to another thread doing a
truncate which would unmap the buffer in question.

Either we would get into journal_dirty_data with a bh which was already
unmapped (although journal_dirty_data_fn had checked for this earlier, the
state was not locked at that point), or it would get unmapped in the middle
of journal_dirty_data when we dropped locks to call sync_dirty_buffer.

By re-checking for mapped state after we've acquired the bh state lock, we
should avoid these races.  If we find a buffer which is no longer mapped,
we essentially ignore it, because journal_unmap_buffer has already decided
that this buffer can go away.

I've also added tracepoints in these two cases, and made a couple other
tracepoint changes that I found useful in debugging this.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/transaction.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b6cf2be845a1..c051a94c8a97 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -967,6 +967,13 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	 */
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
+
+	/* Now that we have bh_state locked, are we really still mapped? */
+	if (!buffer_mapped(bh)) {
+		JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
+		goto no_journal;
+	}
+
 	if (jh->b_transaction) {
 		JBUFFER_TRACE(jh, "has transaction");
 		if (jh->b_transaction != handle->h_transaction) {
@@ -1028,6 +1035,11 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				sync_dirty_buffer(bh);
 				jbd_lock_bh_state(bh);
 				spin_lock(&journal->j_list_lock);
+				/* Since we dropped the lock... */
+				if (!buffer_mapped(bh)) {
+					JBUFFER_TRACE(jh, "buffer got unmapped");
+					goto no_journal;
+				}
 				/* The buffer may become locked again at any
 				   time if it is redirtied */
 			}
@@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			}
 		}
 	} else if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "on committing transaction");
 		if (jh->b_jlist == BJ_Locked) {
 			/*
 			 * The buffer is on the committing transaction's locked
@@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * can remove it's next_transaction pointer from the
 		 * running transaction if that is set, but nothing
 		 * else. */
-		JBUFFER_TRACE(jh, "on committing transaction");
 		set_buffer_freed(bh);
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction ==
@@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * i_size already for this truncate so recovery will not
 		 * expose the disk blocks we are discarding here.) */
 		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		JBUFFER_TRACE(jh, "on running transaction");
 		may_free = __dispose_buffer(jh, transaction);
 	}
 
-- 
cgit v1.2.3


From 6a2aae06cc1e87e9712a26a639f6a2f3442e2027 Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Sat, 28 Oct 2006 10:38:33 -0700
Subject: [PATCH] Fix potential OOPs in blkdev_open()

blkdev_open() calls bc_acquire() to get a struct block_device.  Since
bc_acquire() may return NULL when system is out of memory an appropriate
check is required.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index bc8f27cc4483..702b88cbd91d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1131,6 +1131,8 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	filp->f_flags |= O_LARGEFILE;
 
 	bdev = bd_acquire(inode);
+	if (bdev == NULL)
+		return -ENOMEM;
 
 	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
 	if (res)
-- 
cgit v1.2.3


From b9d7e6ae82da124dc9c579fe1061264ef2a69407 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 28 Oct 2006 10:38:41 -0700
Subject: [PATCH] hugetlb: fix size=4G parsing

On 32-bit machines, mount -t hugetlbfs -o size=4G gave a 0GB filesystem,
size=5G gave a 1GB filesystem etc: there's no point in masking size with
HPAGE_MASK just before shifting its lower bits away, and since HPAGE_MASK is a
UL, that removed all the higher bits of the unsigned long long size.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4ee3f006b861..0b23b963bb44 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -624,7 +624,6 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 				do_div(size, 100);
 				rest++;
 			}
-			size &= HPAGE_MASK;
 			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
 			value = rest;
 		} else if (!strcmp(opt,"nr_inodes")) {
-- 
cgit v1.2.3


From 856fc29505556cf263f3dcda2533cf3766c14ab6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 28 Oct 2006 10:38:43 -0700
Subject: [PATCH] hugetlb: fix prio_tree unit

hugetlb_vmtruncate_list was misconverted to prio_tree: its prio_tree is in
units of PAGE_SIZE (PAGE_CACHE_SIZE) like any other, not HPAGE_SIZE (whereas
its radix_tree is kept in units of HPAGE_SIZE, otherwise slots would be
absurdly sparse).

At first I thought the error benign, just calling __unmap_hugepage_range on
more vmas than necessary; but on 32-bit machines, when the prio_tree is
searched correctly, it happens to ensure the v_offset calculation won't
overflow.  As it stood, when truncating at or beyond 4GB, it was liable to
discard pages COWed from lower offsets; or even to clear pmd entries of
preceding vmas, triggering exit_mmap's BUG_ON(nr_ptes).

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0b23b963bb44..0bea6a619e10 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -271,26 +271,24 @@ static void hugetlbfs_drop_inode(struct inode *inode)
 		hugetlbfs_forget_inode(inode);
 }
 
-/*
- * h_pgoff is in HPAGE_SIZE units.
- * vma->vm_pgoff is in PAGE_SIZE units.
- */
 static inline void
-hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
+hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 {
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
 
-	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
-		unsigned long h_vm_pgoff;
+	vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
 		unsigned long v_offset;
 
-		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
-		v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
 		/*
-		 * Is this VMA fully outside the truncation point?
+		 * Can the expression below overflow on 32-bit arches?
+		 * No, because the prio_tree returns us only those vmas
+		 * which overlap the truncated area starting at pgoff,
+		 * and no vma on a 32-bit arch can span beyond the 4GB.
 		 */
-		if (h_vm_pgoff >= h_pgoff)
+		if (vma->vm_pgoff < pgoff)
+			v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT;
+		else
 			v_offset = 0;
 
 		__unmap_hugepage_range(vma,
@@ -303,14 +301,14 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
  */
 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
-	unsigned long pgoff;
+	pgoff_t pgoff;
 	struct address_space *mapping = inode->i_mapping;
 
 	if (offset > inode->i_size)
 		return -EINVAL;
 
 	BUG_ON(offset & ~HPAGE_MASK);
-	pgoff = offset >> HPAGE_SHIFT;
+	pgoff = offset >> PAGE_SHIFT;
 
 	inode->i_size = offset;
 	spin_lock(&mapping->i_mmap_lock);
-- 
cgit v1.2.3


From 6eac3f93f5e6b7256fb20b7608d62ec192da12be Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Sat, 28 Oct 2006 10:38:44 -0700
Subject: [PATCH] missing unused dentry in prune_dcache()?

On the the following patch:
http://linux.bkbits.net:8080/linux-2.6/gnupatch@449b144ecSF1rYskg3q-SeR2vf88zg

# ChangeSet
#   2006/06/22 15:05:57-07:00 neilb@suse.de
#   [PATCH] Fix dcache race during umount

#   If prune_dcache finds a dentry that it cannot free, it leaves it where it
#   is (at the tail of the list) and exits, on the assumption that some other
#   thread will be removing that dentry soon.

However as far as I see this comment is not correct: when we cannot take
s_umount rw_semaphore (for example because it was taken in do_remount) this
dentry is already extracted from dentry_unused list and we do not add it
into the list again.  Therefore dentry will not be found by prune_dcache()
and shrink_dcache_sb() and will leave in memory very long time until the
partition will be unmounted.

The patch adds this dentry into tail of the dentry_unused list.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Cc: Neil Brown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a1ff91eef108..a5b76b647c6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -478,11 +478,12 @@ static void prune_dcache(int count, struct super_block *sb)
 			up_read(s_umount);
 		}
 		spin_unlock(&dentry->d_lock);
-		/* Cannot remove the first dentry, and it isn't appropriate
-		 * to move it to the head of the list, so give up, and try
-		 * later
+		/*
+		 * Insert dentry at the head of the list as inserting at the
+		 * tail leads to a cycle.
 		 */
-		break;
+ 		list_add(&dentry->d_lru, &dentry_unused);
+		dentry_stat.nr_unused++;
 	}
 	spin_unlock(&dcache_lock);
 }
-- 
cgit v1.2.3


From f87135762de4328c6f17897e803e6909bc056feb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 28 Oct 2006 10:38:46 -0700
Subject: [PATCH] VFS: Fix an error in unused dentry counting

With Vasily Averin <vvs@sw.ru>

Fix an error in unused dentry counting in shrink_dcache_for_umount_subtree()
in which the count is modified without the dcache_lock held.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Vasily Averin <vvs@sw.ru>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a5b76b647c6d..fd4a428998ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -557,6 +557,7 @@ repeat:
 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 {
 	struct dentry *parent;
+	unsigned detached = 0;
 
 	BUG_ON(!IS_ROOT(dentry));
 
@@ -621,7 +622,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				atomic_dec(&parent->d_count);
 
 			list_del(&dentry->d_u.d_child);
-			dentry_stat.nr_dentry--;	/* For d_free, below */
+			detached++;
 
 			inode = dentry->d_inode;
 			if (inode) {
@@ -639,7 +640,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			 * otherwise we ascend to the parent and move to the
 			 * next sibling if there is one */
 			if (!parent)
-				return;
+				goto out;
 
 			dentry = parent;
 
@@ -648,6 +649,11 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 		dentry = list_entry(dentry->d_subdirs.next,
 				    struct dentry, d_u.d_child);
 	}
+out:
+	/* several dentries were freed, need to correct nr_dentry */
+	spin_lock(&dcache_lock);
+	dentry_stat.nr_dentry -= detached;
+	spin_unlock(&dcache_lock);
 }
 
 /*
-- 
cgit v1.2.3


From 89f68225876db7df638de2884b561facb1870239 Mon Sep 17 00:00:00 2001
From: Daniel Drake <ddrake@brontes3d.com>
Date: Mon, 30 Oct 2006 11:47:02 -0600
Subject: jfs: Add splice support

This allows the splice() and tee() syscalls to be used with JFS.

Signed-off-by: Daniel Drake <ddrake@brontes3d.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 34181b8f5a0a..aa9132d04920 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -109,6 +109,8 @@ const struct file_operations jfs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
 	.ioctl		= jfs_ioctl,
-- 
cgit v1.2.3


From 7ca85ba752e521f1b5ead1f3b91c562cc3910c7b Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 30 Oct 2006 21:42:57 +0000
Subject: [CIFS] Fix readdir breakage when blocksize set too small

Do not treat filldir running out of space as an error that needs
to be returned.

Fixes Redhat bugzilla bug # 211070

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b5b0a2a41bef..ed18c3965f7b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -896,6 +896,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 		     tmp_inode->i_ino,obj_type);
 	if(rc) {
 		cFYI(1,("filldir rc = %d",rc));
+		/* we can not return filldir errors to the caller
+		since they are "normal" when the stat blocksize
+		is too small - we return remapped error instead */
+		rc = -EOVERFLOW;
 	}
 
 	dput(tmp_dentry);
@@ -1074,6 +1078,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			we want to check for that here? */
 			rc = cifs_filldir(current_entry, file,
 					filldir, direntry, tmp_buf, max_len);
+			if(rc == -EOVERFLOW) {
+				rc = 0;
+				break;
+			}
+
 			file->f_pos++;
 			if(file->f_pos == 
 				cifsFile->srch_inf.index_of_last_entry) {
-- 
cgit v1.2.3


From 4b952a9b0877dbe8f0f69b2747abe79e3bbd2865 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 30 Oct 2006 21:46:13 +0000
Subject: [CIFS] Allow null user connections

Some servers are configured to only allow null user mounts for
guest access.  Allow nul user (anonymous) mounts e.g.
	mount -t cifs //server/share /mnt -o username=

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES   | 3 +++
 fs/cifs/connect.c | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1eb9a2ec0a3b..50afab81a59b 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,6 +1,9 @@
 Version 1.46
 ------------
 Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+Allow null user to be specified on mount ("username="). Do not return
+EINVAL on readdir when filldir fails due to overwritten blocksize
+(fixes FC problem)
 
 Version 1.45
 ------------
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4093d5332930..71f77914ce93 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -822,10 +822,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 		} else if (strnicmp(data, "nouser_xattr",12) == 0) {
 			vol->no_xattr = 1;
 		} else if (strnicmp(data, "user", 4) == 0) {
-			if (!value || !*value) {
+			if (!value) {
 				printk(KERN_WARNING
 				       "CIFS: invalid or missing username\n");
 				return 1;	/* needs_arg; */
+			} else if(!*value) {
+				/* null user, ie anonymous, authentication */
+				vol->nullauth = 1;
 			}
 			if (strnlen(value, 200) < 200) {
 				vol->username = value;
@@ -1642,6 +1645,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		/* BB fixme parse for domain name here */
 		cFYI(1, ("Username: %s ", volume_info.username));
 
+	} else if (volume_info.nullauth) {
+		cFYI(1,("null user"));
 	} else {
 		cifserror("No username specified");
         /* In userspace mount helper we can get user name from alternate
-- 
cgit v1.2.3


From bcb55165d3d1ae3ec95807d118fd6d5956cd127b Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 30 Oct 2006 16:23:45 -0500
Subject: [PATCH] fix bd_claim_by_kobject error handling

This fixes bd_claim_by_kobject to release bdev correctly in case that
bd_claim succeeds but following add_bd_holder fails.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 702b88cbd91d..b54b0a1b7c68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -751,8 +751,11 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 
 	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
-	if (res == 0)
+	if (res == 0) {
 		res = add_bd_holder(bdev, bo);
+		if (res)
+			bd_release(bdev);
+	}
 	if (res)
 		free_bd_holder(bo);
 	mutex_unlock(&bdev->bd_mutex);
-- 
cgit v1.2.3


From df6c0cd9a872ebf2298f5d66d8c789f62dbe35fc Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 30 Oct 2006 16:23:56 -0500
Subject: [PATCH] clean up add_bd_holder()

add_bd_holder() is called from bd_claim_by_kobject to put a given struct
bd_holder in the list if there is no matching entry.

There are 3 possible results of add_bd_holder():
  1. there is no matching entry and add the given one to the list
  2. there is matching entry, so just increment reference count of
     the existing one
  3. something failed during its course

1 and 2 are successful cases.  But for case 2, someone has to free the
unused struct bd_holder.

The current code frees it inside of add_bd_holder and returns same value
0 for both cases 1 and 2.  However, it's natural and less error-prone if
caller frees it since it's allocated by the caller.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 53 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index b54b0a1b7c68..aaa8301f43f1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -641,17 +641,39 @@ static void free_bd_holder(struct bd_holder *bo)
 	kfree(bo);
 }
 
+/**
+ * find_bd_holder - find matching struct bd_holder from the block device
+ *
+ * @bdev:	struct block device to be searched
+ * @bo:		target struct bd_holder
+ *
+ * Returns matching entry with @bo in @bdev->bd_holder_list.
+ * If found, increment the reference count and return the pointer.
+ * If not found, returns NULL.
+ */
+static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+{
+	struct bd_holder *tmp;
+
+	list_for_each_entry(tmp, &bdev->bd_holder_list, list)
+		if (tmp->sdir == bo->sdir) {
+			tmp->count++;
+			return tmp;
+		}
+
+	return NULL;
+}
+
 /**
  * add_bd_holder - create sysfs symlinks for bd_claim() relationship
  *
  * @bdev:	block device to be bd_claimed
  * @bo:		preallocated and initialized by alloc_bd_holder()
  *
- * If there is no matching entry with @bo in @bdev->bd_holder_list,
- * add @bo to the list, create symlinks.
+ * Add @bo to @bdev->bd_holder_list, create symlinks.
  *
- * Returns 0 if symlinks are created or already there.
- * Returns -ve if something fails and @bo can be freed.
+ * Returns 0 if symlinks are created.
+ * Returns -ve if something fails.
  */
 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 {
@@ -661,15 +683,6 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 	if (!bo)
 		return -EINVAL;
 
-	list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
-		if (tmp->sdir == bo->sdir) {
-			tmp->count++;
-			/* We've already done what we need to do here. */
-			free_bd_holder(bo);
-			return 0;
-		}
-	}
-
 	if (!bd_holder_grab_dirs(bdev, bo))
 		return -EBUSY;
 
@@ -740,7 +753,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 				struct kobject *kobj)
 {
 	int res;
-	struct bd_holder *bo;
+	struct bd_holder *bo, *found;
 
 	if (!kobj)
 		return -EINVAL;
@@ -752,11 +765,15 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
 	if (res == 0) {
-		res = add_bd_holder(bdev, bo);
-		if (res)
-			bd_release(bdev);
+		found = find_bd_holder(bdev, bo);
+		if (found == NULL) {
+			res = add_bd_holder(bdev, bo);
+			if (res)
+				bd_release(bdev);
+		}
 	}
-	if (res)
+
+	if (res || found)
 		free_bd_holder(bo);
 	mutex_unlock(&bdev->bd_mutex);
 
-- 
cgit v1.2.3


From 36a561d6a95c4b89ae4845bf91456b4f784b6eec Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 30 Oct 2006 22:07:03 -0800
Subject: [PATCH] find_bd_holder() fix

fs/block_dev.c: In function 'find_bd_holder':
fs/block_dev.c:666: warning: return makes integer from pointer without a cast
fs/block_dev.c:669: warning: return makes integer from pointer without a cast
fs/block_dev.c: In function 'add_bd_holder':
fs/block_dev.c:685: warning: unused variable 'tmp'
fs/block_dev.c: In function 'bd_claim_by_kobject':
fs/block_dev.c:773: warning: assignment makes pointer from integer without a cast

Acked-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index aaa8301f43f1..36c0e7af9d0f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -651,7 +651,8 @@ static void free_bd_holder(struct bd_holder *bo)
  * If found, increment the reference count and return the pointer.
  * If not found, returns NULL.
  */
-static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+static struct bd_holder *find_bd_holder(struct block_device *bdev,
+					struct bd_holder *bo)
 {
 	struct bd_holder *tmp;
 
@@ -677,7 +678,6 @@ static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
  */
 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 {
-	struct bd_holder *tmp;
 	int ret;
 
 	if (!bo)
-- 
cgit v1.2.3


From e5d9cbde6ce0001e49994df5fcdcbeff8be8037b Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:16 -0800
Subject: [PATCH] eCryptfs: Clean up crypto initialization

Clean up the crypto initialization code; let the crypto API take care of the
key size checks.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 66 ++++++++-----------------------------------
 fs/ecryptfs/ecryptfs_kernel.h |  4 +--
 fs/ecryptfs/keystore.c        | 19 ++++++-------
 fs/ecryptfs/main.c            | 13 ++-------
 4 files changed, 24 insertions(+), 78 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index ed35a9712fa1..82e7d02cefae 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1573,35 +1573,26 @@ out:
 
 /**
  * ecryptfs_process_cipher - Perform cipher initialization.
- * @tfm: Crypto context set by this function
  * @key_tfm: Crypto context for key material, set by this function
- * @cipher_name: Name of the cipher.
- * @key_size: Size of the key in bytes.
+ * @cipher_name: Name of the cipher
+ * @key_size: Size of the key in bytes
  *
  * Returns zero on success. Any crypto_tfm structs allocated here
  * should be released by other functions, such as on a superblock put
  * event, regardless of whether this function succeeds for fails.
  */
 int
-ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
-			char *cipher_name, size_t key_size)
+ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+			size_t *key_size)
 {
 	char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
 	int rc;
 
-	*tfm = *key_tfm = NULL;
-	if (key_size > ECRYPTFS_MAX_KEY_BYTES) {
+	*key_tfm = NULL;
+	if (*key_size > ECRYPTFS_MAX_KEY_BYTES) {
 		rc = -EINVAL;
 		printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
-		       "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES);
-		goto out;
-	}
-	*tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE
-					      | CRYPTO_TFM_REQ_WEAK_KEY));
-	if (!(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Unable to allocate crypto cipher with name "
-		       "[%s]\n", cipher_name);
+		      "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
 		goto out;
 	}
 	*key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
@@ -1611,46 +1602,13 @@ ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
 		       "[%s]\n", cipher_name);
 		goto out;
 	}
-	if (key_size < crypto_tfm_alg_min_keysize(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*tfm));
-		goto out;
-	}
-	if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
-		goto out;
-	}
-	if (key_size > crypto_tfm_alg_max_keysize(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*tfm));
-		goto out;
-	}
-	if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
-		goto out;
-	}
-	get_random_bytes(dummy_key, key_size);
-	rc = crypto_cipher_setkey(*tfm, dummy_key, key_size);
-	if (rc) {
-		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
-		       "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
-		rc = -EINVAL;
-		goto out;
-	}
-	rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size);
+	if (*key_size == 0)
+		*key_size = crypto_tfm_alg_max_keysize(*key_tfm);
+	get_random_bytes(dummy_key, *key_size);
+	rc = crypto_cipher_setkey(*key_tfm, dummy_key, *key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
-		       "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
+		       "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
 		rc = -EINVAL;
 		goto out;
 	}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 872c9958531a..4112df9dec50 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -473,8 +473,8 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
 int
-ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
-			char *cipher_name, size_t key_size);
+ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+			size_t *key_size);
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
 void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ba454785a0c5..bc706d33559a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -493,19 +493,16 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 			goto out;
 		}
 	}
-	if (password_s_ptr->session_key_encryption_key_bytes
-	    < crypto_tfm_alg_min_keysize(tfm)) {
-		printk(KERN_WARNING "Session key encryption key is [%d] bytes; "
-		       "minimum keysize for selected cipher is [%d] bytes.\n",
-		       password_s_ptr->session_key_encryption_key_bytes,
-		       crypto_tfm_alg_min_keysize(tfm));
-		rc = -EINVAL;
-		goto out;
-	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key,
-			     crypt_stat->key_size);
+	rc = crypto_cipher_setkey(tfm,
+				  password_s_ptr->session_key_encryption_key,
+				  crypt_stat->key_size);
+	if (rc < 0) {
+		printk(KERN_ERR "Error setting key for crypto context\n");
+		rc = -EINVAL;
+		goto out_free_tfm;
+	}
 	/* TODO: virt_to_scatterlist */
 	encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
 	if (!encrypted_session_key) {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 5938a232d11b..a65f4865182c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 	char *cipher_name_dst;
 	char *cipher_name_src;
 	char *cipher_key_bytes_src;
-	struct crypto_tfm *tmp_tfm;
 	int cipher_name_len;
 
 	if (!options) {
@@ -305,20 +304,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		    = '\0';
 	}
 	if (!cipher_key_bytes_set) {
-		mount_crypt_stat->global_default_cipher_key_size =
-			ECRYPTFS_DEFAULT_KEY_BYTES;
-		ecryptfs_printk(KERN_DEBUG, "Cipher key size was not "
-				"specified.  Defaulting to [%d]\n",
-				mount_crypt_stat->
-				global_default_cipher_key_size);
+		mount_crypt_stat->global_default_cipher_key_size = 0;
 	}
 	rc = ecryptfs_process_cipher(
-		&tmp_tfm,
 		&mount_crypt_stat->global_key_tfm,
 		mount_crypt_stat->global_default_cipher_name,
-		mount_crypt_stat->global_default_cipher_key_size);
-	if (tmp_tfm)
-		crypto_free_tfm(tmp_tfm);
+		&mount_crypt_stat->global_default_cipher_key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to initialize cipher [%s] "
 		       "with key size [%Zd] bytes; rc = [%d]\n",
-- 
cgit v1.2.3


From 565d9724b8ce49b530287de34aa17f45f21624d5 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:17 -0800
Subject: [PATCH] eCryptfs: Hash code to new crypto API

Update eCryptfs hash code to the new kernel crypto API.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 36 +++++++++++++++++++++---------------
 fs/ecryptfs/ecryptfs_kernel.h |  7 ++++---
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 82e7d02cefae..f14c5a38215e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -94,25 +94,31 @@ static int ecryptfs_calculate_md5(char *dst,
 				  struct ecryptfs_crypt_stat *crypt_stat,
 				  char *src, int len)
 {
-	int rc = 0;
 	struct scatterlist sg;
+	struct hash_desc desc = {
+		.tfm = crypt_stat->hash_tfm,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
-	mutex_lock(&crypt_stat->cs_md5_tfm_mutex);
+	mutex_lock(&crypt_stat->cs_hash_tfm_mutex);
 	sg_init_one(&sg, (u8 *)src, len);
-	if (!crypt_stat->md5_tfm) {
-		crypt_stat->md5_tfm =
-			crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-		if (!crypt_stat->md5_tfm) {
-			rc = -ENOMEM;
+	if (!desc.tfm) {
+		desc.tfm = crypto_alloc_hash(ECRYPTFS_DEFAULT_HASH, 0,
+					     CRYPTO_ALG_ASYNC);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
 			ecryptfs_printk(KERN_ERR, "Error attempting to "
-					"allocate crypto context\n");
+					"allocate crypto context; rc = [%d]\n",
+					rc);
 			goto out;
 		}
+		crypt_stat->hash_tfm = desc.tfm;
 	}
-	crypto_digest_init(crypt_stat->md5_tfm);
-	crypto_digest_update(crypt_stat->md5_tfm, &sg, 1);
-	crypto_digest_final(crypt_stat->md5_tfm, dst);
-	mutex_unlock(&crypt_stat->cs_md5_tfm_mutex);
+	crypto_hash_init(&desc);
+	crypto_hash_update(&desc, &sg, len);
+	crypto_hash_final(&desc, dst);
+	mutex_unlock(&crypt_stat->cs_hash_tfm_mutex);
 out:
 	return rc;
 }
@@ -178,7 +184,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 	memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 	mutex_init(&crypt_stat->cs_mutex);
 	mutex_init(&crypt_stat->cs_tfm_mutex);
-	mutex_init(&crypt_stat->cs_md5_tfm_mutex);
+	mutex_init(&crypt_stat->cs_hash_tfm_mutex);
 	ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
 }
 
@@ -192,8 +198,8 @@ void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	if (crypt_stat->tfm)
 		crypto_free_tfm(crypt_stat->tfm);
-	if (crypt_stat->md5_tfm)
-		crypto_free_tfm(crypt_stat->md5_tfm);
+	if (crypt_stat->hash_tfm)
+		crypto_free_hash(crypt_stat->hash_tfm);
 	memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 }
 
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 4112df9dec50..840aa010e0d3 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -175,6 +175,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_DEFAULT_CIPHER "aes"
 #define ECRYPTFS_DEFAULT_KEY_BYTES 16
 #define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
+#define ECRYPTFS_DEFAULT_HASH "md5"
 #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
 #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
 #define MD5_DIGEST_SIZE 16
@@ -205,14 +206,14 @@ struct ecryptfs_crypt_stat {
 	unsigned int extent_mask;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct crypto_tfm *tfm;
-	struct crypto_tfm *md5_tfm; /* Crypto context for generating
-				     * the initialization vectors */
+	struct crypto_hash *hash_tfm; /* Crypto context for generating
+				       * the initialization vectors */
 	unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
 	unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
 	unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
 	unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
 	struct mutex cs_tfm_mutex;
-	struct mutex cs_md5_tfm_mutex;
+	struct mutex cs_hash_tfm_mutex;
 	struct mutex cs_mutex;
 };
 
-- 
cgit v1.2.3


From 8bba066f4e3854755a303cee37ea37bd080a46b3 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:18 -0800
Subject: [PATCH] eCryptfs: Cipher code to new crypto API

Update cipher block encryption code to the new crypto API.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          |  92 +++++++++++++++++++++++++++++---------
 fs/ecryptfs/ecryptfs_kernel.h |   9 ++--
 fs/ecryptfs/keystore.c        | 101 ++++++++++++++++++++++++++++--------------
 fs/ecryptfs/main.c            |   2 +
 4 files changed, 146 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f14c5a38215e..2a1b6aa1a4a1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -123,6 +123,28 @@ out:
 	return rc;
 }
 
+int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
+					   char *cipher_name,
+					   char *chaining_modifier)
+{
+	int cipher_name_len = strlen(cipher_name);
+	int chaining_modifier_len = strlen(chaining_modifier);
+	int algified_name_len;
+	int rc;
+
+	algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
+	(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
+	if (!(algified_name)) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	snprintf((*algified_name), algified_name_len, "%s(%s)",
+		 chaining_modifier, cipher_name);
+	rc = 0;
+out:
+	return rc;
+}
+
 /**
  * ecryptfs_derive_iv
  * @iv: destination for the derived iv vale
@@ -197,7 +219,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	if (crypt_stat->tfm)
-		crypto_free_tfm(crypt_stat->tfm);
+		crypto_free_blkcipher(crypt_stat->tfm);
 	if (crypt_stat->hash_tfm)
 		crypto_free_hash(crypt_stat->hash_tfm);
 	memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
@@ -209,7 +231,7 @@ void ecryptfs_destruct_mount_crypt_stat(
 	if (mount_crypt_stat->global_auth_tok_key)
 		key_put(mount_crypt_stat->global_auth_tok_key);
 	if (mount_crypt_stat->global_key_tfm)
-		crypto_free_tfm(mount_crypt_stat->global_key_tfm);
+		crypto_free_blkcipher(mount_crypt_stat->global_key_tfm);
 	memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
 }
 
@@ -275,6 +297,11 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 			       struct scatterlist *src_sg, int size,
 			       unsigned char *iv)
 {
+	struct blkcipher_desc desc = {
+		.tfm = crypt_stat->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
 	int rc = 0;
 
 	BUG_ON(!crypt_stat || !crypt_stat->tfm
@@ -288,8 +315,8 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	}
 	/* Consider doing this once, when the file is opened */
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+				     crypt_stat->key_size);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
 				rc);
@@ -298,7 +325,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size);
-	crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv);
+	crypto_blkcipher_encrypt_iv(&desc, dest_sg, src_sg, size);
 	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 out:
 	return rc;
@@ -681,12 +708,17 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 			       struct scatterlist *src_sg, int size,
 			       unsigned char *iv)
 {
+	struct blkcipher_desc desc = {
+		.tfm = crypt_stat->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
 	int rc = 0;
 
 	/* Consider doing this once, when the file is opened */
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+				     crypt_stat->key_size);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
 				rc);
@@ -695,8 +727,7 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size);
-	rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size,
-				      iv);
+	rc = crypto_blkcipher_decrypt_iv(&desc, dest_sg, src_sg, size);
 	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n",
@@ -765,6 +796,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
  */
 int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 {
+	char *full_alg_name;
 	int rc = -EINVAL;
 
 	if (!crypt_stat->cipher) {
@@ -781,16 +813,24 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 		goto out;
 	}
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher,
-					   ECRYPTFS_DEFAULT_CHAINING_MODE
-					   | CRYPTO_TFM_REQ_WEAK_KEY);
-	mutex_unlock(&crypt_stat->cs_tfm_mutex);
+	rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+						    crypt_stat->cipher, "cbc");
+	if (rc)
+		goto out;
+	crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						 CRYPTO_ALG_ASYNC);
+	kfree(full_alg_name);
 	if (!crypt_stat->tfm) {
 		ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
 				"Error initializing cipher [%s]\n",
 				crypt_stat->cipher);
+		mutex_unlock(&crypt_stat->cs_tfm_mutex);
 		goto out;
 	}
+	crypto_blkcipher_set_flags(crypt_stat->tfm,
+				   (ECRYPTFS_DEFAULT_CHAINING_MODE
+				    | CRYPTO_TFM_REQ_WEAK_KEY));
+	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 	rc = 0;
 out:
 	return rc;
@@ -1588,10 +1628,11 @@ out:
  * event, regardless of whether this function succeeds for fails.
  */
 int
-ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 			size_t *key_size)
 {
 	char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
+	char *full_alg_name;
 	int rc;
 
 	*key_tfm = NULL;
@@ -1601,17 +1642,26 @@ ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
 		      "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
 		goto out;
 	}
-	*key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
-	if (!(*key_tfm)) {
-		rc = -EINVAL;
+	rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, cipher_name,
+						    "ecb");
+	if (rc)
+		goto out;
+	*key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
+	kfree(full_alg_name);
+	if (IS_ERR(*key_tfm)) {
+		rc = PTR_ERR(*key_tfm);
 		printk(KERN_ERR "Unable to allocate crypto cipher with name "
-		       "[%s]\n", cipher_name);
+		       "[%s]; rc = [%d]\n", cipher_name, rc);
 		goto out;
 	}
-	if (*key_size == 0)
-		*key_size = crypto_tfm_alg_max_keysize(*key_tfm);
+	crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	if (*key_size == 0) {
+		struct blkcipher_alg *alg = crypto_blkcipher_alg(*key_tfm);
+
+		*key_size = alg->max_keysize;
+	}
 	get_random_bytes(dummy_key, *key_size);
-	rc = crypto_cipher_setkey(*key_tfm, dummy_key, *key_size);
+	rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
 		       "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 840aa010e0d3..199fcda50e1b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -205,7 +205,7 @@ struct ecryptfs_crypt_stat {
 	size_t extent_shift;
 	unsigned int extent_mask;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	struct crypto_hash *hash_tfm; /* Crypto context for generating
 				       * the initialization vectors */
 	unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
@@ -245,7 +245,7 @@ struct ecryptfs_mount_crypt_stat {
 	struct ecryptfs_auth_tok *global_auth_tok;
 	struct key *global_auth_tok_key;
 	size_t global_default_cipher_key_size;
-	struct crypto_tfm *global_key_tfm;
+	struct crypto_blkcipher *global_key_tfm;
 	struct mutex global_key_tfm_mutex;
 	unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
 						 + 1];
@@ -426,6 +426,9 @@ void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
 void ecryptfs_destruct_mount_crypt_stat(
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
 int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
+					   char *cipher_name,
+					   char *chaining_modifier);
 int ecryptfs_write_inode_size_to_header(struct file *lower_file,
 					struct inode *lower_inode,
 					struct inode *inode);
@@ -474,7 +477,7 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
 int
-ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 			size_t *key_size);
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index bc706d33559a..c3746f56d162 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -458,14 +458,16 @@ out:
 static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 			       struct ecryptfs_crypt_stat *crypt_stat)
 {
-	int rc = 0;
 	struct ecryptfs_password *password_s_ptr;
-	struct crypto_tfm *tfm = NULL;
 	struct scatterlist src_sg[2], dst_sg[2];
 	struct mutex *tfm_mutex = NULL;
 	/* TODO: Use virt_to_scatterlist for these */
 	char *encrypted_session_key;
 	char *session_key;
+	struct blkcipher_desc desc = {
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
 	password_s_ptr = &auth_tok->token.password;
 	if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
@@ -482,22 +484,32 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	if (!strcmp(crypt_stat->cipher,
 		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
 	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
 		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
 	} else {
-		tfm = crypto_alloc_tfm(crypt_stat->cipher,
-				       CRYPTO_TFM_REQ_WEAK_KEY);
-		if (!tfm) {
-			printk(KERN_ERR "Error allocating crypto context\n");
-			rc = -ENOMEM;
+		char *full_alg_name;
+
+		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+							    crypt_stat->cipher,
+							    "ecb");
+		if (rc)
+			goto out;
+		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						  CRYPTO_ALG_ASYNC);
+		kfree(full_alg_name);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
+			printk(KERN_ERR "Error allocating crypto context; "
+			       "rc = [%d]\n", rc);
 			goto out;
 		}
+		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	rc = crypto_cipher_setkey(tfm,
-				  password_s_ptr->session_key_encryption_key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(desc.tfm,
+				     password_s_ptr->session_key_encryption_key,
+				     crypt_stat->key_size);
 	if (rc < 0) {
 		printk(KERN_ERR "Error setting key for crypto context\n");
 		rc = -EINVAL;
@@ -528,9 +540,12 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	auth_tok->session_key.decrypted_key_size =
 	    auth_tok->session_key.encrypted_key_size;
 	dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
-	/* TODO: Handle error condition */
-	crypto_cipher_decrypt(tfm, dst_sg, src_sg,
-			      auth_tok->session_key.encrypted_key_size);
+	rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
+				      auth_tok->session_key.encrypted_key_size);
+	if (rc) {
+		printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
+		goto out_free_memory;
+	}
 	auth_tok->session_key.decrypted_key_size =
 	    auth_tok->session_key.encrypted_key_size;
 	memcpy(auth_tok->session_key.decrypted_key, session_key,
@@ -543,6 +558,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	if (ecryptfs_verbosity > 0)
 		ecryptfs_dump_hex(crypt_stat->key,
 				  crypt_stat->key_size);
+out_free_memory:
 	memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
 	free_page((unsigned long)encrypted_session_key);
 	memset(session_key, 0, PAGE_CACHE_SIZE);
@@ -551,7 +567,7 @@ out_free_tfm:
 	if (tfm_mutex)
 		mutex_unlock(tfm_mutex);
 	else
-		crypto_free_tfm(tfm);
+		crypto_free_blkcipher(desc.tfm);
 out:
 	return rc;
 }
@@ -800,19 +816,21 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 		   struct ecryptfs_crypt_stat *crypt_stat,
 		   struct ecryptfs_key_record *key_rec, size_t *packet_size)
 {
-	int rc = 0;
-
 	size_t i;
 	size_t signature_is_valid = 0;
 	size_t encrypted_session_key_valid = 0;
 	char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
 	struct scatterlist dest_sg[2];
 	struct scatterlist src_sg[2];
-	struct crypto_tfm *tfm = NULL;
 	struct mutex *tfm_mutex = NULL;
 	size_t key_rec_size;
 	size_t packet_size_length;
 	size_t cipher_code;
+	struct blkcipher_desc desc = {
+		.tfm = NULL,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
 	(*packet_size) = 0;
 	/* Check for a valid signature on the auth_tok */
@@ -879,33 +897,48 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 	if (!strcmp(crypt_stat->cipher,
 		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
 	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
 		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
-	} else
-		tfm = crypto_alloc_tfm(crypt_stat->cipher, 0);
-	if (!tfm) {
-		ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
-				"context for cipher [%s]\n",
-				crypt_stat->cipher);
-		rc = -EINVAL;
-		goto out;
+	} else {
+		char *full_alg_name;
+
+		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+							    crypt_stat->cipher,
+							    "ecb");
+		if (rc)
+			goto out;
+		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						  CRYPTO_ALG_ASYNC);
+		kfree(full_alg_name);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
+			ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
+					"context for cipher [%s]; rc = [%d]\n",
+					crypt_stat->cipher, rc);
+			goto out;
+		}
+		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	rc = crypto_cipher_setkey(tfm, session_key_encryption_key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
+				     crypt_stat->key_size);
 	if (rc < 0) {
 		if (tfm_mutex)
 			mutex_unlock(tfm_mutex);
 		ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
-				"context\n");
+				"context; rc = [%d]\n", rc);
 		goto out;
 	}
 	rc = 0;
 	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
 			crypt_stat->key_size);
-	crypto_cipher_encrypt(tfm, dest_sg, src_sg,
-			      (*key_rec).enc_key_size);
+	rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg,
+				      (*key_rec).enc_key_size);
+	if (rc) {
+		printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
+		goto out;
+	}
 	if (tfm_mutex)
 		mutex_unlock(tfm_mutex);
 	ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
@@ -968,8 +1001,8 @@ encrypted_session_key_set:
 	       (*key_rec).enc_key_size);
 	(*packet_size) += (*key_rec).enc_key_size;
 out:
-	if (tfm && !tfm_mutex)
-		crypto_free_tfm(tfm);
+	if (desc.tfm && !tfm_mutex)
+		crypto_free_blkcipher(desc.tfm);
 	if (rc)
 		(*packet_size) = 0;
 	return rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a65f4865182c..a78d87d14baf 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -315,6 +315,8 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		       "with key size [%Zd] bytes; rc = [%d]\n",
 		       mount_crypt_stat->global_default_cipher_name,
 		       mount_crypt_stat->global_default_cipher_key_size, rc);
+		mount_crypt_stat->global_key_tfm = NULL;
+		mount_crypt_stat->global_auth_tok_key = NULL;
 		rc = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3


From 7ff1d74f5670329ac4b5959a675f8698ba95be20 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:19 -0800
Subject: [PATCH] eCryptfs: Consolidate lower dentry_open's

Opens on lower dentry objects happen in several places in eCryptfs, and they
all involve the same steps (dget, mntget, dentry_open).  This patch
consolidates the lower open events into a single function call.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 24 +++++++++++------------
 fs/ecryptfs/ecryptfs_kernel.h |  4 ++++
 fs/ecryptfs/file.c            | 44 +++++++++++++++++++++++++++++++++++--------
 fs/ecryptfs/inode.c           | 33 +++++++++++---------------------
 4 files changed, 63 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2a1b6aa1a4a1..f49f105394b7 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1191,28 +1191,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
 int ecryptfs_read_header_region(char *data, struct dentry *dentry,
 				struct vfsmount *mnt)
 {
-	struct file *file;
+	struct file *lower_file;
 	mm_segment_t oldfs;
 	int rc;
 
-	mnt = mntget(mnt);
-	file = dentry_open(dentry, mnt, O_RDONLY);
-	if (IS_ERR(file)) {
-		ecryptfs_printk(KERN_DEBUG, "Error opening file to "
-				"read header region\n");
-		mntput(mnt);
-		rc = PTR_ERR(file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt,
+					   O_RDONLY))) {
+		printk(KERN_ERR
+		       "Error opening lower_file to read header region\n");
 		goto out;
 	}
-	file->f_pos = 0;
+	lower_file->f_pos = 0;
 	oldfs = get_fs();
 	set_fs(get_ds());
 	/* For releases 0.1 and 0.2, all of the header information
 	 * fits in the first data extent-sized region. */
-	rc = file->f_op->read(file, (char __user *)data,
-			      ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos);
+	rc = lower_file->f_op->read(lower_file, (char __user *)data,
+			      ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos);
 	set_fs(oldfs);
-	fput(file);
+	if ((rc = ecryptfs_close_lower_file(lower_file))) {
+		printk(KERN_ERR "Error closing lower_file\n");
+		goto out;
+	}
 	rc = 0;
 out:
 	return rc;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 199fcda50e1b..f992533d1692 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -482,5 +482,9 @@ ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
 void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
+int ecryptfs_open_lower_file(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt, int flags);
+int ecryptfs_close_lower_file(struct file *lower_file);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c8550c9f9cd2..a92ef05eff8f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -198,6 +198,33 @@ retry:
 
 struct kmem_cache *ecryptfs_file_info_cache;
 
+int ecryptfs_open_lower_file(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt, int flags)
+{
+	int rc = 0;
+
+	dget(lower_dentry);
+	mntget(lower_mnt);
+	*lower_file = dentry_open(lower_dentry, lower_mnt, flags);
+	if (IS_ERR(*lower_file)) {
+		printk(KERN_ERR "Error opening lower file for lower_dentry "
+		       "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n",
+		       lower_dentry, lower_mnt, flags);
+		rc = PTR_ERR(*lower_file);
+		*lower_file = NULL;
+		goto out;
+	}
+out:
+	return rc;
+}
+
+int ecryptfs_close_lower_file(struct file *lower_file)
+{
+	fput(lower_file);
+	return 0;
+}
+
 /**
  * ecryptfs_open
  * @inode: inode speciying file to open
@@ -244,19 +271,15 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	/* This mntget & dget is undone via fput when the file is released */
-	dget(lower_dentry);
 	lower_flags = file->f_flags;
 	if ((lower_flags & O_ACCMODE) == O_WRONLY)
 		lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
 	if (file->f_flags & O_APPEND)
 		lower_flags &= ~O_APPEND;
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	mntget(lower_mnt);
 	/* Corresponding fput() in ecryptfs_release() */
-	lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags);
-	if (IS_ERR(lower_file)) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   lower_flags))) {
 		ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
 		goto out_puts;
 	}
@@ -341,11 +364,16 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 	struct file *lower_file = ecryptfs_file_to_lower(file);
 	struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
 	struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
+	int rc;
 
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file))) {
+		printk(KERN_ERR "Error closing lower_file\n");
+		goto out;
+	}
 	inode->i_blocks = lower_inode->i_blocks;
 	kmem_cache_free(ecryptfs_file_info_cache, file_info);
-	return 0;
+out:
+	return rc;
 }
 
 static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index efdd2b7b62d7..2f2c6cf972f7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -231,7 +231,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 	int lower_flags;
 	struct ecryptfs_crypt_stat *crypt_stat;
 	struct dentry *lower_dentry;
-	struct dentry *tlower_dentry = NULL;
 	struct file *lower_file;
 	struct inode *inode, *lower_inode;
 	struct vfsmount *lower_mnt;
@@ -241,30 +240,19 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 			lower_dentry->d_name.name);
 	inode = ecryptfs_dentry->d_inode;
 	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
-	tlower_dentry = dget(lower_dentry);
-	if (!tlower_dentry) {
-		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n");
-		goto out;
-	}
 	lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
 #if BITS_PER_LONG != 32
 	lower_flags |= O_LARGEFILE;
 #endif
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	mntget(lower_mnt);
 	/* Corresponding fput() at end of this function */
-	lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags);
-	if (IS_ERR(lower_file)) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   lower_flags))) {
 		ecryptfs_printk(KERN_ERR,
 				"Error opening dentry; rc = [%i]\n", rc);
 		goto out;
 	}
-	/* fput(lower_file) should handle the puts if we do this */
-	lower_file->f_dentry = tlower_dentry;
-	lower_file->f_vfsmnt = lower_mnt;
-	lower_inode = tlower_dentry->d_inode;
+	lower_inode = lower_dentry->d_inode;
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
 		ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
@@ -285,7 +273,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 	}
 	rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
 out_fput:
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file)))
+		printk(KERN_ERR "Error closing lower_file\n");
 out:
 	return rc;
 }
@@ -832,12 +821,11 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	}
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	/* This dget & mntget is released through fput at out_fput: */
-	dget(lower_dentry);
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
-	mntget(lower_mnt);
-	lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR);
-	if (unlikely(IS_ERR(lower_file))) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   O_RDWR))) {
+		ecryptfs_printk(KERN_ERR,
+				"Error opening dentry; rc = [%i]\n", rc);
 		goto out_free;
 	}
 	ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
@@ -879,7 +867,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 		= CURRENT_TIME;
 	mark_inode_dirty_sync(inode);
 out_fput:
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file)))
+		printk(KERN_ERR "Error closing lower_file\n");
 out_free:
 	if (ecryptfs_file_to_private(&fake_ecryptfs_file))
 		kmem_cache_free(ecryptfs_file_info_cache,
-- 
cgit v1.2.3


From 316bb95e8ed0ddcd767e8aa54264b6c6190f150c Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:20 -0800
Subject: [PATCH] eCryptfs: Remove ecryptfs_umount_begin

There is no point to calling the lower umount_begin when the eCryptfs
umount_begin is called.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/super.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c337c0410fb1..825757ae4867 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -137,23 +137,6 @@ static void ecryptfs_clear_inode(struct inode *inode)
 	iput(ecryptfs_inode_to_lower(inode));
 }
 
-/**
- * ecryptfs_umount_begin
- *
- * Called in do_umount().
- */
-static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags)
-{
-	struct vfsmount *lower_mnt =
-		ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root);
-	struct super_block *lower_sb;
-
-	mntput(lower_mnt);
-	lower_sb = lower_mnt->mnt_sb;
-	if (lower_sb->s_op->umount_begin)
-		lower_sb->s_op->umount_begin(lower_mnt, flags);
-}
-
 /**
  * ecryptfs_show_options
  *
@@ -193,6 +176,5 @@ struct super_operations ecryptfs_sops = {
 	.statfs = ecryptfs_statfs,
 	.remount_fs = NULL,
 	.clear_inode = ecryptfs_clear_inode,
-	.umount_begin = ecryptfs_umount_begin,
 	.show_options = ecryptfs_show_options
 };
-- 
cgit v1.2.3


From 45ec4ababe999cb95f9c0cad03b2689cb0b77a2b Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:20 -0800
Subject: [PATCH] eCryptfs: Fix handling of lower d_count

Fix the use of dget/dput calls to balance out on the lower filesystem.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/dentry.c |  8 ++++++-
 fs/ecryptfs/inode.c  | 62 +++++++++++++++++-----------------------------------
 2 files changed, 27 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index f0d2a433242b..0b9992ab990f 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -24,6 +24,7 @@
 
 #include <linux/dcache.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -76,8 +77,13 @@ static void ecryptfs_d_release(struct dentry *dentry)
 	if (ecryptfs_dentry_to_private(dentry))
 		kmem_cache_free(ecryptfs_dentry_info_cache,
 				ecryptfs_dentry_to_private(dentry));
-	if (lower_dentry)
+	if (lower_dentry) {
+		struct vfsmount *lower_mnt =
+			ecryptfs_dentry_to_lower_mnt(dentry);
+
+		mntput(lower_mnt);
 		dput(lower_dentry);
+	}
 	return;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2f2c6cf972f7..ff4865d24f0f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -325,7 +325,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *lower_dir_dentry;
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_mnt;
-	struct dentry *tlower_dentry = NULL;
 	char *encoded_name;
 	unsigned int encoded_namelen;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -336,27 +335,32 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
 	dentry->d_op = &ecryptfs_dops;
 	if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
-	    || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, "..")))
-		goto out_drop;
+	    || (dentry->d_name.len == 2
+		&& !strcmp(dentry->d_name.name, ".."))) {
+		d_drop(dentry);
+		goto out;
+	}
 	encoded_namelen = ecryptfs_encode_filename(crypt_stat,
 						   dentry->d_name.name,
 						   dentry->d_name.len,
 						   &encoded_name);
 	if (encoded_namelen < 0) {
 		rc = encoded_namelen;
-		goto out_drop;
+		d_drop(dentry);
+		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
 			"= [%d]\n", encoded_name, encoded_namelen);
 	lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
 				      encoded_namelen - 1);
 	kfree(encoded_name);
-	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
 	if (IS_ERR(lower_dentry)) {
 		ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
 		rc = PTR_ERR(lower_dentry);
-		goto out_drop;
+		d_drop(dentry);
+		goto out;
 	}
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
 	ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
        		"d_name.name = [%s]\n", lower_dentry,
 		lower_dentry->d_name.name);
@@ -397,12 +401,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 				"as we *think* we are about to unlink\n");
 		goto out;
 	}
-	tlower_dentry = dget(lower_dentry);
-	if (!tlower_dentry || IS_ERR(tlower_dentry)) {
-		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n");
-		goto out_dput;
-	}
 	/* Released in this function */
 	page_virt =
 	    (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
@@ -414,7 +412,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out_dput;
 	}
 	memset(page_virt, 0, PAGE_CACHE_SIZE);
-	rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt);
+	rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt);
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
 		ecryptfs_set_default_sizes(crypt_stat);
@@ -437,9 +435,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 
 out_dput:
 	dput(lower_dentry);
-	if (tlower_dentry)
-		dput(tlower_dentry);
-out_drop:
 	d_drop(dentry);
 out:
 	return ERR_PTR(rc);
@@ -475,8 +470,8 @@ out_lock:
 	unlock_dir(lower_dir_dentry);
 	dput(lower_new_dentry);
 	dput(lower_old_dentry);
-	if (!new_dentry->d_inode)
-		d_drop(new_dentry);
+	d_drop(new_dentry);
+	d_drop(old_dentry);
 	return rc;
 }
 
@@ -565,41 +560,24 @@ out:
 
 static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	int rc = 0;
-	struct dentry *tdentry = NULL;
 	struct dentry *lower_dentry;
-	struct dentry *tlower_dentry = NULL;
 	struct dentry *lower_dir_dentry;
+	int rc;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!(tdentry = dget(dentry))) {
-		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n",
-				dentry);
-		goto out;
-	}
+	dget(dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	if (!(tlower_dentry = dget(lower_dentry))) {
-		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry "
-				"[%p]\n", lower_dentry);
-		goto out;
-	}
+	dget(lower_dentry);
 	rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
-	if (!rc) {
-		d_delete(tlower_dentry);
-		tlower_dentry = NULL;
-	}
+	dput(lower_dentry);
+	if (!rc)
+		d_delete(lower_dentry);
 	ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
 	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
 	unlock_dir(lower_dir_dentry);
 	if (!rc)
 		d_drop(dentry);
-out:
-	if (tdentry)
-		dput(tdentry);
-	if (tlower_dentry)
-		dput(tlower_dentry);
+	dput(dentry);
 	return rc;
 }
 
-- 
cgit v1.2.3


From 8e87d4dc159148f04f515bc072df22a2c089e7f2 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 2 Nov 2006 03:45:24 +0000
Subject: [CIFS] report rename failure when target file is locked by Windows

Fixes Samba bugzilla bug # 4182

Rename by handle failures (retry after rename by path) were not
being returned back.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES |  4 +++-
 fs/cifs/inode.c | 14 +++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 50afab81a59b..0b3c37ef52e0 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,9 @@ Version 1.46
 Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
 Allow null user to be specified on mount ("username="). Do not return
 EINVAL on readdir when filldir fails due to overwritten blocksize
-(fixes FC problem)
+(fixes FC problem).  Return error in rename 2nd attempt retry (ie report
+if rename by handle also fails, after rename by path fails, we were
+not reporting whether the retry worked or not).
 
 Version 1.45
 ------------
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35d54bb0869a..dffe295825f4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -885,10 +885,14 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 			kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 		if (info_buf_source != NULL) {
 			info_buf_target = info_buf_source + 1;
-			rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
-				info_buf_source, cifs_sb_source->local_nls, 
-				cifs_sb_source->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			if (pTcon->ses->capabilities & CAP_UNIX)
+				rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
+					info_buf_source, 
+					cifs_sb_source->local_nls,
+					cifs_sb_source->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+			/* else rc is still EEXIST so will fall through to
+			   unlink the target and retry rename */
 			if (rc == 0) {
 				rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName,
 						info_buf_target,
@@ -937,7 +941,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 				 cifs_sb_source->mnt_cifs_flags & 
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (rc==0) {
-			CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
+			rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
 					      cifs_sb_source->local_nls, 
 					      cifs_sb_source->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-- 
cgit v1.2.3


From d572b87946f8c598b3cad86a7913862dd48daadb Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 2 Nov 2006 10:50:40 -0600
Subject: JFS: Remove redundant xattr permission checking

The vfs handles most permissions for setting and retrieving xattrs.
This patch removes a redundant and wrong check so that it won't override
the correct behavior which is being fixed in the vfs.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/xattr.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 4c7985ebca92..b753ba216450 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -756,6 +756,11 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 	return -EOPNOTSUPP;
 }
 
+/*
+ * Most of the permission checking is done by xattr_permission in the vfs.
+ * The local file system is responsible for handling the system.* namespace.
+ * We also need to verify that this is a namespace that we recognize.
+ */
 static int can_set_xattr(struct inode *inode, const char *name,
 			 const void *value, size_t value_len)
 {
@@ -771,10 +776,6 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
-		return -EPERM;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7bd473fcc217adec000f213e8864bf9a161d57e1 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Thu, 2 Nov 2006 22:06:56 -0800
Subject: [PATCH] eCryptfs: Fix pointer deref

I missed a pointer dereference in this kmalloc result check.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f49f105394b7..136175a69332 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -134,7 +134,7 @@ int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
 
 	algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
 	(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
-	if (!(algified_name)) {
+	if (!(*algified_name)) {
 		rc = -ENOMEM;
 		goto out;
 	}
-- 
cgit v1.2.3


From 87c2b7c045a44f6c1c7af23e64f2b286e6f7130a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 2 Nov 2006 22:06:58 -0800
Subject: [PATCH] sys_pselect7 vs compat_sys_pselect7 uaccess error handling

758333458aa719bfc26ec16eafd4ad3a9e96014d fixes the not checked copy_to_user
return value of compat_sys_pselect7.  I ran into this too because of an old
source tree, but my fix would look quite a bit different to Andi's fix.

The reason is that the compat function IMHO should behave the very same as
the non-compat function if possible.  Since sys_pselect7 does not return
-EFAULT in this specific case, change the compat code so it behaves like
sys_pselect7.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 50624d4a70c6..8d0a0018a7d2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 
 	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
 
-	if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
+	if (tsp) {
 		struct compat_timespec rts;
 
+		if (current->personality & STICKY_TIMEOUTS)
+			goto sticky;
+
 		rts.tv_sec = timeout / HZ;
 		rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
 		if (rts.tv_nsec >= NSEC_PER_SEC) {
@@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 		}
 		if (compat_timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
-		if (copy_to_user(tsp, &rts, sizeof(rts)))
-			ret = -EFAULT;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
+sticky:
+			/*
+			 * If an application puts its timeval in read-only
+			 * memory, we don't want the Linux-specific update to
+			 * the timeval to cause a fault after the select has
+			 * completed successfully. However, because we're not
+			 * updating the timeval, we can't restart the system
+			 * call.
+			 */
+			if (ret == -ERESTARTNOHAND)
+				ret = -EINTR;
+		}
 	}
 
 	if (ret == -ERESTARTNOHAND) {
-- 
cgit v1.2.3


From 05ac9d4b3d7eac9e8542c83341a0e22d09aecf8f Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:08 -0800
Subject: [PATCH] cifs: ->readpages() fixes

This just ignore the remaining pages, and will fix a forgot put_pages_list().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/file.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 976a691c5a68..7e056b9b49e8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1806,13 +1806,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		}
 		if ((rc < 0) || (smb_read_data == NULL)) {
 			cFYI(1, ("Read error in readpages: %d", rc));
-			/* clean up remaing pages off list */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-				page_cache_release(page);
-			}
 			break;
 		} else if (bytes_read > 0) {
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
@@ -1831,13 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				   this case is ok - if we are at server EOF 
 				   we will hit it on next read */
 
-			/* while (!list_empty(page_list) && (i < num_pages)) {
-					page = list_entry(page_list->prev, 
-							  struct page, list);
-					list_del(&page->list);
-					page_cache_release(page);
-				}
-				break; */
+				/* break; */
 			}
 		} else {
 			cFYI(1, ("No bytes read (%d) at offset %lld . "
@@ -1845,14 +1832,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				 bytes_read, offset));
 			/* BB turn off caching and do new lookup on 
 			   file size at server? */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-
-				/* BB removeme - replace with zero of page? */
-				page_cache_release(page);
-			}
 			break;
 		}
 		if (smb_read_data) {
-- 
cgit v1.2.3


From 2e990021bfc65b1a3778479a9e6b4811f9c1ff0e Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:09 -0800
Subject: [PATCH] fuse: ->readpages() cleanup

This just ignore the remaining pages.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bb5ace3882d..763a50daf1c0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 
 	err = -EIO;
 	if (is_bad_inode(inode))
-		goto clean_pages_up;
+		goto out;
 
 	data.file = file;
 	data.inode = inode;
 	data.req = fuse_get_req(fc);
 	err = PTR_ERR(data.req);
 	if (IS_ERR(data.req))
-		goto clean_pages_up;
+		goto out;
 
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
@@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 		else
 			fuse_put_request(fc, data.req);
 	}
-	return err;
-
-clean_pages_up:
-	put_pages_list(pages);
+out:
 	return err;
 }
 
-- 
cgit v1.2.3


From 7011774db8afca43be466f0f0428434a9edf053e Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:10 -0800
Subject: [PATCH] gfs2: ->readpages() fixes

This just ignore the remaining pages, and remove unneeded unlock_pages().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/gfs2/ops_address.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8d5963c7e123..015640b3f123 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -337,13 +337,6 @@ out:
 out_noerror:
 	ret = 0;
 out_unlock:
-	/* unlock all pages, we can't do any I/O right now */
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-		struct page *page = list_entry(pages->prev, struct page, lru);
-		list_del(&page->lru);
-		unlock_page(page);
-		page_cache_release(page);
-	}
 	if (do_unlock)
 		gfs2_holder_uninit(&gh);
 	goto out;
-- 
cgit v1.2.3


From 7ef55b8a05c02db7c07d81827c69fe8f124e8654 Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Thu, 2 Nov 2006 22:07:12 -0800
Subject: [PATCH] NFS4: fix for recursive locking problem

When I was performing some operations on NFS, I got below error on server
side.

  =============================================
  [ INFO: possible recursive locking detected ]
  2.6.19-prep #1
  ---------------------------------------------
  nfsd4/3525 is trying to acquire lock:
   (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  but task is already holding lock:
   (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  other info that might help us debug this:
  2 locks held by nfsd4/3525:
   #0:  (client_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24
   #1:  (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  stack backtrace:
   [<c04051ed>] show_trace_log_lvl+0x58/0x16a
   [<c04057fa>] show_trace+0xd/0x10
   [<c0405913>] dump_stack+0x19/0x1b
   [<c043b6f1>] __lock_acquire+0x778/0x99c
   [<c043be86>] lock_acquire+0x4b/0x6d
   [<c0611ceb>] __mutex_lock_slowpath+0xbc/0x20a
   [<c0611e5a>] mutex_lock+0x21/0x24
   [<c047fd7e>] vfs_rmdir+0x76/0xf8
   [<f94b7ce9>] nfsd4_clear_clid_dir+0x2c/0x41 [nfsd]
   [<f94b7de9>] nfsd4_remove_clid_dir+0xb1/0xe8 [nfsd]
   [<f94b307b>] laundromat_main+0x9b/0x1c3 [nfsd]
   [<c04333d6>] run_workqueue+0x7a/0xbb
   [<c0433d0b>] worker_thread+0xd2/0x107
   [<c0436285>] kthread+0xc3/0xf2
   [<c0402005>] kernel_thread_helper+0x5/0xb
  ===================================================================

Cause for this problem was,2 successive mutex_lock calls on 2 diffrent inodes ,as shown below

	static int
	nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
	{
	        int status;

	        /* For now this directory should already be empty, but we empty it of
        	 * any regular files anyway, just in case the directory was created by
	         * a kernel from the future.... */
        	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
	        mutex_lock(&dir->d_inode->i_mutex);
	        status = vfs_rmdir(dir->d_inode, dentry);
	...

	int vfs_rmdir(struct inode *dir, struct dentry *dentry)
	{
	        int error = may_delete(dir, dentry, 1);

	        if (error)
	                return error;

	        if (!dir->i_op || !dir->i_op->rmdir)
        	        return -EPERM;

	        DQUOT_INIT(dir);

	        mutex_lock(&dentry->d_inode->i_mutex);
	...

So I have developed the patch to overcome this problem.

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e9d07704680e..81b8565d3837 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
 	 * any regular files anyway, just in case the directory was created by
 	 * a kernel from the future.... */
 	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	status = vfs_rmdir(dir->d_inode, dentry);
 	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
-- 
cgit v1.2.3


From d2c89a4284ea4ecfba77c6f2d7d6f96d52e801e5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 2 Nov 2006 22:07:20 -0800
Subject: [PATCH] reiserfs: reset errval after initializing bitmap cache

Callers after reiserfs_init_bitmap_cache() expect errval to contain -EINVAL
until much later.  If a condition fails before errval is reset later,
reiserfs_fill_super() will mistakenly return 0, causing an Oops in
do_add_mount().  This patch resets errval to -EINVAL after the call.

I view this as a temporary fix and real error codes should be used
throughout reiserfs_fill_super().

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9041802df832..17249994110f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		      "jmacd-8: reiserfs_fill_super: unable to read bitmap");
 		goto error;
 	}
+	errval = -EINVAL;
 #ifdef CONFIG_REISERFS_CHECK
 	SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
 	SWARN(silent, s, "- it is slow mode for debugging.");
-- 
cgit v1.2.3


From f1f2d8713d16a1e198880bbc716eb24fae09c858 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 2 Nov 2006 22:07:29 -0800
Subject: [PATCH] Fix user.* xattr permission check for sticky dirs

The user.* extended attributes are only allowed on regular files and
directories.  Sticky directories further restrict write access to the owner
and privileged users.  (See the attr(5) man page for an explanation.)

The original check in ext2/ext3 when user.* xattrs were merged was more
restrictive than intended, and when the xattr permission checks were moved
into the VFS, read access to user.* attributes on sticky directores ended
up being denied in addition.

Originally-from: Gerard Neil <xyzzy@devferret.org>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xattr.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index 395635100f77..0901bdc2ce24 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 		return 0;
 
 	/*
-	 * The trusted.* namespace can only accessed by a privilegued user.
+	 * The trusted.* namespace can only be accessed by a privileged user.
 	 */
 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
 		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
 
+	/* In user.* namespace, only regular files and directories can have
+	 * extended attributes. For sticky directories, only the owner and
+	 * privileged user can write attributes.
+	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-		if (!S_ISREG(inode->i_mode) &&
-		    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+			return -EPERM;
+		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
+		    (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) &&
+		    !capable(CAP_FOWNER))
 			return -EPERM;
 	}
 
-- 
cgit v1.2.3


From ddac0d39cf437d02fde9795ae57d9c4b4c146de9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 4 Nov 2006 12:49:32 +0100
Subject: [PATCH] splice: fix problem introduced with inode diet

After the inode slimming patch that unionised i_pipe/i_bdev/i_cdev, it's
no longer enough to check for existance of ->i_pipe to verify that this
is a pipe.

Original patch from Eric Dumazet <dada1@cosmosbay.com>
Final solution suggested by Linus.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/splice.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 8d705954d294..da74583a00ee 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1108,6 +1108,19 @@ out_release:
 
 EXPORT_SYMBOL(do_splice_direct);
 
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline struct pipe_inode_info *pipe_info(struct inode *inode)
+{
+	if (S_ISFIFO(inode->i_mode))
+		return inode->i_pipe;
+
+	return NULL;
+}
+
 /*
  * Determine where to splice to/from.
  */
@@ -1119,7 +1132,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	pipe = in->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(in->f_dentry->d_inode);
 	if (pipe) {
 		if (off_in)
 			return -ESPIPE;
@@ -1140,7 +1153,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		return ret;
 	}
 
-	pipe = out->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(out->f_dentry->d_inode);
 	if (pipe) {
 		if (off_out)
 			return -ESPIPE;
@@ -1298,7 +1311,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 			unsigned long nr_segs, unsigned int flags)
 {
-	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *pipe;
 	struct page *pages[PIPE_BUFFERS];
 	struct partial_page partial[PIPE_BUFFERS];
 	struct splice_pipe_desc spd = {
@@ -1308,7 +1321,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	if (unlikely(!pipe))
+	pipe = pipe_info(file->f_dentry->d_inode);
+	if (!pipe)
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1535,8 +1549,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
-	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *ipipe = pipe_info(in->f_dentry->d_inode);
+	struct pipe_inode_info *opipe = pipe_info(out->f_dentry->d_inode);
 	int ret = -EINVAL;
 
 	/*
-- 
cgit v1.2.3


From 7cc13edc139108bb527b692f0548dce6bc648572 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Nov 2006 23:52:13 -0800
Subject: [PATCH] sysctl: implement CTL_UNNUMBERED

This patch takes the CTL_UNNUMBERD concept from NFS and makes it available to
all new sysctl users.

At the same time the sysctl binary interface maintenance documentation is
updated to mention and to describe what is needed to successfully maintain the
sysctl binary interface.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc.c         |  3 ---
 fs/nfs/sysctl.c        |  5 -----
 include/linux/sysctl.h | 14 +++++++++++---
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 634139232aaf..8ca18085e68d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -353,9 +353,6 @@ EXPORT_SYMBOL(lockd_down);
  * Sysctl parameters (same as module parameters, different interface).
  */
 
-/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
-#define CTL_UNNUMBERED		-2
-
 static ctl_table nlm_sysctls[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 2fe3403c2409..3ea50ac64820 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -18,11 +18,6 @@
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 static struct ctl_table_header *nfs_callback_sysctl_table;
-/*
- * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
- * Use the same values as fs/lockd/svc.c
- */
-#define CTL_UNNUMBERED -2
 
 static ctl_table nfs_cb_sysctls[] = {
 #ifdef CONFIG_NFS_V4
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c184732a70fc..d98562f1df76 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -6,10 +6,17 @@
  ****************************************************************
  ****************************************************************
  **
+ **  WARNING:
  **  The values in this file are exported to user space via 
- **  the sysctl() binary interface.  However this interface
- **  is unstable and deprecated and will be removed in the future. 
- **  For a stable interface use /proc/sys.
+ **  the sysctl() binary interface.  Do *NOT* change the
+ **  numbering of any existing values here, and do not change
+ **  any numbers within any one set of values.  If you have to
+ **  have to redefine an existing interface, use a new number for it.
+ **  The kernel will then return -ENOTDIR to any application using
+ **  the old binary interface.
+ **
+ **  For new interfaces unless you really need a binary number
+ **  please use CTL_UNNUMBERED.
  **
  ****************************************************************
  ****************************************************************
@@ -48,6 +55,7 @@ struct __sysctl_args {
 #ifdef __KERNEL__
 #define CTL_ANY		-1	/* Matches any name */
 #define CTL_NONE	0
+#define CTL_UNNUMBERED	CTL_NONE	/* sysctl without a binary number */
 #endif
 
 enum
-- 
cgit v1.2.3


From eb1dc33aa235b0e44ada6716cda385883c6e6bff Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 28 Oct 2006 03:03:48 +0400
Subject: [GFS2] don't panic needlessly

First, SLAB_PANIC is unjustified. Second, all error propagating and backing out
is in place.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 21508a13bb78..9889c1eacec1 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -84,8 +84,8 @@ static int __init init_gfs2_fs(void)
 
 	gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
 					      sizeof(struct gfs2_inode),
-					      0, (SLAB_RECLAIM_ACCOUNT|
-					      SLAB_PANIC|SLAB_MEM_SPREAD),
+					      0,  SLAB_RECLAIM_ACCOUNT|
+					          SLAB_MEM_SPREAD,
 					      gfs2_init_inode_once, NULL);
 	if (!gfs2_inode_cachep)
 		goto fail;
-- 
cgit v1.2.3


From 4a221953ed121692aa25998451a57c7f4be8b4f6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 09:57:57 -0500
Subject: [GFS2] Fix incorrect fs sync behaviour.

This adds a sync_fs superblock operation for GFS2 and removes
the journal flush from write_super in favour of sync_fs where it
ought to be. This is more or less identical to the way in which ext3
does this.

This bug was pointed out by Russell Cattelan <cattelan@redhat.com>

Cc: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_super.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 06f06f7773d0..b47d9598c047 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -138,16 +138,27 @@ static void gfs2_put_super(struct super_block *sb)
 }
 
 /**
- * gfs2_write_super - disk commit all incore transactions
- * @sb: the filesystem
+ * gfs2_write_super
+ * @sb: the superblock
  *
- * This function is called every time sync(2) is called.
- * After this exits, all dirty buffers are synced.
  */
 
 static void gfs2_write_super(struct super_block *sb)
 {
+	sb->s_dirt = 0;
+}
+
+/**
+ * gfs2_sync_fs - sync the filesystem
+ * @sb: the superblock
+ *
+ * Flushes the log to disk.
+ */
+static int gfs2_sync_fs(struct super_block *sb, int wait)
+{
+	sb->s_dirt = 0;
 	gfs2_log_flush(sb->s_fs_info, NULL);
+	return 0;
 }
 
 /**
@@ -452,17 +463,18 @@ static void gfs2_destroy_inode(struct inode *inode)
 }
 
 struct super_operations gfs2_super_ops = {
-	.alloc_inode = gfs2_alloc_inode,
-	.destroy_inode = gfs2_destroy_inode,
-	.write_inode = gfs2_write_inode,
-	.delete_inode = gfs2_delete_inode,
-	.put_super = gfs2_put_super,
-	.write_super = gfs2_write_super,
-	.write_super_lockfs = gfs2_write_super_lockfs,
-	.unlockfs = gfs2_unlockfs,
-	.statfs = gfs2_statfs,
-	.remount_fs = gfs2_remount_fs,
-	.clear_inode = gfs2_clear_inode,
-	.show_options = gfs2_show_options,
+	.alloc_inode		= gfs2_alloc_inode,
+	.destroy_inode		= gfs2_destroy_inode,
+	.write_inode		= gfs2_write_inode,
+	.delete_inode		= gfs2_delete_inode,
+	.put_super		= gfs2_put_super,
+	.write_super		= gfs2_write_super,
+	.sync_fs		= gfs2_sync_fs,
+	.write_super_lockfs 	= gfs2_write_super_lockfs,
+	.unlockfs		= gfs2_unlockfs,
+	.statfs			= gfs2_statfs,
+	.remount_fs		= gfs2_remount_fs,
+	.clear_inode		= gfs2_clear_inode,
+	.show_options		= gfs2_show_options,
 };
 
-- 
cgit v1.2.3


From 26d83dedf61d26d85f10bc34b92f4de7660fd746 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 30 Oct 2006 16:59:08 -0500
Subject: [GFS2] Fix OOM error handling

Fix the OOM error handling in inode.c where it was possible for
a NULL pointer to be dereferenced.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 57c43ac47925..d470e5286ecd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -157,6 +157,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum,
 	struct gfs2_glock *io_gl;
 	int error;
 
+	if (!inode)
+		return ERR_PTR(-ENOBUFS);
+
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
 		umode_t mode = DT2IF(type);
-- 
cgit v1.2.3


From ba542e3b92f9ea7c482ae56b68b9122eebc53a39 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Thu, 2 Nov 2006 14:41:23 +0000
Subject: [DLM] Fix kref_put oops

This patch fixes the recounting on the lockspace kobject. Previously the lockspace was freed while userspace could have had a
reference to one of its sysfs files, causing an oops in kref_put.

Now the lockspace kfree is moved into the kobject release() function

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lockspace.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 109333c8ecb9..499ee1156365 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -143,6 +143,12 @@ static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
 	return a->store ? a->store(ls, buf, len) : len;
 }
 
+static void lockspace_kobj_release(struct kobject *k)
+{
+	struct dlm_ls *ls  = container_of(k, struct dlm_ls, ls_kobj);
+	kfree(ls);
+}
+
 static struct sysfs_ops dlm_attr_ops = {
 	.show  = dlm_attr_show,
 	.store = dlm_attr_store,
@@ -151,6 +157,7 @@ static struct sysfs_ops dlm_attr_ops = {
 static struct kobj_type dlm_ktype = {
 	.default_attrs = dlm_attrs,
 	.sysfs_ops     = &dlm_attr_ops,
+	.release       = lockspace_kobj_release,
 };
 
 static struct kset dlm_kset = {
@@ -678,7 +685,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	dlm_clear_members_gone(ls);
 	kfree(ls->ls_node_array);
 	kobject_unregister(&ls->ls_kobj);
-	kfree(ls);
+        /* The ls structure will be freed when the kobject is done with */
 
 	mutex_lock(&ls_lock);
 	ls_count--;
-- 
cgit v1.2.3


From e2de7f565521a76fbbb927f701c5a1d381c71a93 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Mon, 6 Nov 2006 08:53:28 +0000
Subject: [DLM] fix oops in kref_put when removing a lockspace

Now that the lockspace struct is freed when the last sysfs object is released
this patch prevents use of that lockspace by sysfs. We attempt to re-get the
lockspace from the lockspace list and fail the request if it has been removed.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lockspace.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 499ee1156365..f8842ca443c2 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -43,6 +43,10 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
 	ssize_t ret = len;
 	int n = simple_strtol(buf, NULL, 0);
 
+	ls = dlm_find_lockspace_local(ls->ls_local_handle);
+	if (!ls)
+		return -EINVAL;
+
 	switch (n) {
 	case 0:
 		dlm_ls_stop(ls);
@@ -53,6 +57,7 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
 	default:
 		ret = -EINVAL;
 	}
+	dlm_put_lockspace(ls);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 4891d53956abd4ad9d94127c50d931124319a324 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 7 Nov 2006 16:31:16 +0000
Subject: [CIFS] NFS stress test generates flood of "close with pending write"
 messages

Informational/debug message was being logged too often. The error
case of logging having to send a close with (presumably stuck on buggy
server) pending writes is still logged.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7e056b9b49e8..4b07a8cc4633 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -492,10 +492,13 @@ int cifs_close(struct inode *inode, struct file *file)
 					the struct would be in each open file,
 					but this should give enough time to 
 					clear the socket */
-					cERROR(1,("close with pending writes"));
+#ifdef CONFIG_CIFS_DEBUG2
+					cFYI(1,("close delay, write pending"));
+#endif /* DEBUG2 */
 					msleep(timeout);
 					timeout *= 4;
-				} 
+				}
+				cERROR(1,("close with pending writes")); 
 				rc = CIFSSMBClose(xid, pTcon,
 						  pSMBFile->netfid);
 			}
-- 
cgit v1.2.3


From 5fe14c851efedf95b0e7652a3a7b93ec899d1599 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 7 Nov 2006 19:26:33 +0000
Subject: [CIFS] Explicitly set stat->blksize

CIFS may perform I/O over the network in larger chunks than the page size,
so it should explicitly set stat->blksize to ensure optimal I/O bandwidth

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dffe295825f4..1ad8c9fcc742 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1089,8 +1089,10 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct kstat *stat)
 {
 	int err = cifs_revalidate(dentry);
-	if (!err)
+	if (!err) {
 		generic_fillattr(dentry->d_inode, stat);
+		stat->blksize = CIFS_MAX_MSGSIZE;
+	}
 	return err;
 }
 
-- 
cgit v1.2.3


From 6e659c63998881e8f4a842edbe86ac8c5cdaee41 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Nov 2006 23:10:46 +0000
Subject: [CIFS]  Fix mount failure when domain not specified

    Fixes Samba bugzilla #4176

    When users do not specify their domain on mount, 2.6.18 started sending
    default domain instead of a null domain (which was the only way on some
    servers to use a default domain).  Users of 2.6.18 who did not specify
    their domain name on mounts to certain common Windows servers that were
    members of a domain, but not the domain controller, would get mount
    failures which they did not get in 2.6.18

    This fixes that issue and should remove complaints about mount
    behavior changing.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/sess.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index a8a083543ba0..bbdda99dce61 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -90,7 +90,9 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 	} */
 	/* copy user */
 	if(ses->userName == NULL) {
-		/* BB what about null user mounts - check that we do this BB */
+		/* null user mount */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
 	} else { /* 300 should be long enough for any conceivable user name */
 		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
 					  300, nls_cp);
@@ -98,10 +100,13 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* account for null termination */
 	/* copy domain */
-	if(ses->domainName == NULL)
-		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr,
-					  "CIFS_LINUX_DOM", 32, nls_cp);
-	else
+	if(ses->domainName == NULL) {
+		/* Sending null domain better than using a bogus domain name (as
+		we did briefly in 2.6.18) since server will use its default */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
+		bytes_ret = 0;
+	} else
 		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, 
 					  256, nls_cp);
 	bcc_ptr += 2 * bytes_ret;
@@ -144,13 +149,11 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 
         /* copy domain */
 	
-        if(ses->domainName == NULL) {
-                strcpy(bcc_ptr, "CIFS_LINUX_DOM");
-		bcc_ptr += 14;  /* strlen(CIFS_LINUX_DOM) */
- 	} else {
+        if(ses->domainName != NULL) {
                 strncpy(bcc_ptr, ses->domainName, 256); 
 		bcc_ptr += strnlen(ses->domainName, 256);
-	}
+	} /* else we will send a null domain name 
+	     so the server will default to its own domain */
 	*bcc_ptr = 0;
 	bcc_ptr++;
 
-- 
cgit v1.2.3


From af85852de0b32d92b14295aa6f5ba3a9ad044cf6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Wed, 8 Nov 2006 17:44:39 -0800
Subject: [PATCH] nfsd4: reindent do_open_lookup()

Minor rearrangement, cleanup of do_open_lookup().  No change in behavior.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Neil Brown <neilb@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0a7bbdc4a10a..4a73f5b2546f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -106,27 +106,25 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 					open->op_fname.len, &open->op_iattr,
 					&resfh, open->op_createmode,
 					(u32 *)open->op_verf.data, &open->op_truncate);
-	}
-	else {
+	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname.data, open->op_fname.len, &resfh);
 		fh_unlock(current_fh);
 	}
+	if (status)
+		goto out;
 
-	if (!status) {
-		set_change_info(&open->op_cinfo, current_fh);
+	set_change_info(&open->op_cinfo, current_fh);
 
-		/* set reply cache */
-		fh_dup2(current_fh, &resfh);
-		open->op_stateowner->so_replay.rp_openfh_len =
-			resfh.fh_handle.fh_size;
-		memcpy(open->op_stateowner->so_replay.rp_openfh,
-				&resfh.fh_handle.fh_base,
-				resfh.fh_handle.fh_size);
+	/* set reply cache */
+	fh_dup2(current_fh, &resfh);
+	open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size;
+	memcpy(open->op_stateowner->so_replay.rp_openfh,
+			&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
 
-		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
-	}
+	status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
 
+out:
 	fh_put(&resfh);
 	return status;
 }
-- 
cgit v1.2.3


From 81ac95c5569d7a60ab5db6c1ccec56c12b3ebcb5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Wed, 8 Nov 2006 17:44:40 -0800
Subject: [PATCH] nfsd4: fix open-create permissions

In the case where an open creates the file, we shouldn't be rechecking
permissions to open the file; the open succeeds regardless of what the new
file's mode bits say.

This patch fixes the problem, but only by introducing yet another parameter
to nfsd_create_v3.  This is ugly.  This will be fixed by later patches.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Neil Brown <neilb@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3proc.c        | 2 +-
 fs/nfsd/nfs4proc.c        | 6 ++++--
 fs/nfsd/vfs.c             | 4 +++-
 include/linux/nfsd/nfsd.h | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 64db601c2bd2..7f5bad0393b1 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -258,7 +258,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	/* Now create the file and set attributes */
 	nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
 				attr, newfhp,
-				argp->createmode, argp->verf, NULL);
+				argp->createmode, argp->verf, NULL, NULL);
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4a73f5b2546f..50bc94243ca1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -93,6 +93,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 {
 	struct svc_fh resfh;
 	__be32 status;
+	int created = 0;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -105,7 +106,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 		status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
 					open->op_fname.len, &open->op_iattr,
 					&resfh, open->op_createmode,
-					(u32 *)open->op_verf.data, &open->op_truncate);
+					(u32 *)open->op_verf.data, &open->op_truncate, &created);
 	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname.data, open->op_fname.len, &resfh);
@@ -122,7 +123,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	memcpy(open->op_stateowner->so_replay.rp_openfh,
 			&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
 
-	status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
+	if (!created)
+		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
 
 out:
 	fh_put(&resfh);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f21e917bb8ed..1a7ad8c983d1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1237,7 +1237,7 @@ __be32
 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
-	        int *truncp)
+	        int *truncp, int *created)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
@@ -1331,6 +1331,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
 	if (host_err < 0)
 		goto out_nfserr;
+	if (created)
+		*created = 1;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
 		err = nfserrno(nfsd_sync_dir(dentry));
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index eb231143d579..edb54c3171b3 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -89,7 +89,7 @@ __be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
 __be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				struct svc_fh *res, int createmode,
-				u32 *verifier, int *truncp);
+				u32 *verifier, int *truncp, int *created);
 __be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
 				loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
-- 
cgit v1.2.3


From 088406bcf66d6c7fd8a5c04c00aa410ae9077403 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Wed, 8 Nov 2006 17:44:59 -0800
Subject: [PATCH] nfsd: fix spurious error return from nfsd_create in async
 case

Commit 6264d69d7df654ca64f625e9409189a0e50734e9 modified the nfsd_create()
error handling in such a way that nfsd_create will usually return
nfserr_perm even when succesful, if the export has the async export option.

This introduced a regression that could cause mkdir() to always return a
permissions error, even though the directory in question was actually
succesfully created.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/vfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1a7ad8c983d1..bb4d926e4487 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1177,7 +1177,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	/*
 	 * Get the dir op function pointer.
 	 */
-	err = nfserr_perm;
+	err = 0;
 	switch (type) {
 	case S_IFREG:
 		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
-- 
cgit v1.2.3


From f7b2e8c76b3423a1d2501b9399261e9c9a33e100 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 9 Nov 2006 21:25:37 +0000
Subject: [CIFS] Fix minor problem with previous patch

The patch
	NFS stress test generates flood of "close with pending write

was missing an if

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4b07a8cc4633..2436ed8fc840 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -498,7 +498,8 @@ int cifs_close(struct inode *inode, struct file *file)
 					msleep(timeout);
 					timeout *= 4;
 				}
-				cERROR(1,("close with pending writes")); 
+				if(atomic_read(&pSMBFile->wrtPending))
+					cERROR(1,("close with pending writes"));
 				rc = CIFSSMBClose(xid, pTcon,
 						  pSMBFile->netfid);
 			}
-- 
cgit v1.2.3


From 93c189c1148a5e39bcc8f62568f42a77f93477c5 Mon Sep 17 00:00:00 2001
From: Vlad Apostolov <vapo@sgi.com>
Date: Sat, 11 Nov 2006 18:03:49 +1100
Subject: [XFS] 956618: Linux crashes on boot with XFS-DMAPI filesystem when
 CONFIG_XFS_TRACE is on

SGI-PV: 956618
SGI-Modid: xfs-linux-melb:xfs-kern:27196a

Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/Makefile-linux-2.6  | 17 +----------------
 fs/xfs/linux-2.6/xfs_buf.c |  4 ++--
 fs/xfs/support/debug.c     |  4 +---
 fs/xfs/xfs.h               | 23 +++++++++++++++++++++++
 4 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 291948d5085a..b49989bb89ad 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -21,22 +21,7 @@ EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
 XFS_LINUX := linux-2.6
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
-	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-	EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
-	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
-	EXTRA_CFLAGS += -DXFS_ATTR_TRACE
-	EXTRA_CFLAGS += -DXFS_BLI_TRACE
-	EXTRA_CFLAGS += -DXFS_BMAP_TRACE
-	EXTRA_CFLAGS += -DXFS_BMBT_TRACE
-	EXTRA_CFLAGS += -DXFS_DIR2_TRACE
-	EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
-	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
-	EXTRA_CFLAGS += -DXFS_LOG_TRACE
-	EXTRA_CFLAGS += -DXFS_RW_TRACE
-	EXTRA_CFLAGS += -DXFS_BUF_TRACE
-	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+	EXTRA_CFLAGS += -g
 endif
 
 obj-$(CONFIG_XFS_FS)		+= xfs.o
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index db5f5a3608ca..d3382843698e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -15,6 +15,7 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include "xfs.h"
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -31,7 +32,6 @@
 #include <linux/kthread.h>
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
-#include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
 STATIC kmem_shaker_t xfs_buf_shake;
@@ -1406,7 +1406,7 @@ xfs_alloc_bufhash(
 	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
 	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
 	btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
-					sizeof(xfs_bufhash_t), KM_SLEEP);
+					sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
 	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
 		spin_lock_init(&btp->bt_hash[i].bh_lock);
 		INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c75f68361e33..4363512d2f90 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -15,11 +15,9 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <xfs.h>
 #include "debug.h"
 #include "spin.h"
-#include <asm/page.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
 
 static char		message[256];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 1a48dbb902a7..bf0a12040b13 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -17,5 +17,28 @@
  */
 #ifndef __XFS_H__
 #define __XFS_H__
+
+#ifdef CONFIG_XFS_DEBUG
+#define STATIC
+#define DEBUG 1
+#define XFS_BUF_LOCK_TRACKING 1
+/* #define QUOTADEBUG 1 */
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+#define XFS_ALLOC_TRACE 1
+#define XFS_ATTR_TRACE 1
+#define XFS_BLI_TRACE 1
+#define XFS_BMAP_TRACE 1
+#define XFS_BMBT_TRACE 1
+#define XFS_DIR2_TRACE 1
+#define XFS_DQUOT_TRACE 1
+#define XFS_ILOCK_TRACE 1
+#define XFS_LOG_TRACE 1
+#define XFS_RW_TRACE 1
+#define XFS_BUF_TRACE 1
+#define XFS_VNODE_TRACE 1
+#endif
+
 #include <linux-2.6/xfs_linux.h>
 #endif	/* __XFS_H__ */
-- 
cgit v1.2.3


From 439b8434792d0b62e32ab1416f214a18a640cc03 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Sat, 11 Nov 2006 18:04:34 +1100
Subject: [XFS] Keep lockdep happy.

SGI-PV: 956964
SGI-Modid: xfs-linux-melb:xfs-kern:27200a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/xfs_iget.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b73d216ecaf9..1562ac2dd67c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -683,6 +683,7 @@ xfs_ireclaim(xfs_inode_t *ip)
 	/*
 	 * Free all memory associated with the inode.
 	 */
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 	xfs_idestroy(ip);
 }
 
-- 
cgit v1.2.3


From 70a505285f9859f77e07f7c12371b0d29ecf3d82 Mon Sep 17 00:00:00 2001
From: Vlad Apostolov <vapo@sgi.com>
Date: Sat, 11 Nov 2006 18:04:41 +1100
Subject: [XFS] rename uio_read() to xfs_uio_read()

SGI-PV: 957004
SGI-Modid: xfs-linux-melb:xfs-kern:27231a

Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/support/move.c | 2 +-
 fs/xfs/support/move.h | 2 +-
 fs/xfs/xfs_dir2.c     | 2 +-
 fs/xfs/xfs_vnodeops.c | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
index caefa17b80fe..ac8617ca3909 100644
--- a/fs/xfs/support/move.c
+++ b/fs/xfs/support/move.c
@@ -22,7 +22,7 @@
  * as we go.
  */
 int
-uio_read(caddr_t src, size_t len, struct uio *uio)
+xfs_uio_read(caddr_t src, size_t len, struct uio *uio)
 {
 	size_t	count;
 
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index 97a2498d2da3..977879c24ff5 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -65,6 +65,6 @@ struct uio {
 typedef struct uio uio_t;
 typedef struct iovec iovec_t;
 
-extern int	uio_read (caddr_t, size_t, uio_t *);
+extern int	xfs_uio_read (caddr_t, size_t, uio_t *);
 
 #endif  /* __XFS_SUPPORT_MOVE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8edbe1adb95b..8e8e5279334a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -678,7 +678,7 @@ xfs_dir2_put_dirent64_uio(
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
 	memcpy(idbp->d_name, pa->name, namelen);
-	rval = uio_read((caddr_t)idbp, reclen, uio);
+	rval = xfs_uio_read((caddr_t)idbp, reclen, uio);
 	pa->done = (rval == 0);
 	return rval;
 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 061e2ffdd1de..2c79c3670589 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1013,7 +1013,7 @@ xfs_readlink(
 	pathlen = (int)ip->i_d.di_size;
 
 	if (ip->i_df.if_flags & XFS_IFINLINE) {
-		error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
+		error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
 	}
 	else {
 		/*
@@ -1044,7 +1044,7 @@ xfs_readlink(
 				byte_cnt = pathlen;
 			pathlen -= byte_cnt;
 
-			error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
+			error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
 			xfs_buf_relse (bp);
 		}
 
-- 
cgit v1.2.3


From 2e2e7bb1fd857b9fc83b0cd77b6b647ebb423301 Mon Sep 17 00:00:00 2001
From: Vlad Apostolov <vapo@sgi.com>
Date: Sat, 11 Nov 2006 18:04:47 +1100
Subject: [XFS] 956664: dm_read_invis() changes i_atime

SGI-PV: 956664
SGI-Modid: xfs-linux-melb:xfs-kern:27315a

Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Signed-off-by: Sam Vaughan <sjv@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a74f854d91e6..74d094829a4d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -341,8 +341,11 @@ xfs_open_by_handle(
 		put_unused_fd(new_fd);
 		return -XFS_ERROR(-PTR_ERR(filp));
 	}
-	if (inode->i_mode & S_IFREG)
+	if (inode->i_mode & S_IFREG) {
+		/* invisible operation should not change atime */
+		filp->f_flags |= O_NOATIME;
 		filp->f_op = &xfs_invis_file_operations;
+	}
 
 	fd_install(new_fd, filp);
 	return new_fd;
-- 
cgit v1.2.3


From 7a18c386078eaf17ae54595f66c0d64d9c1cb29c Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Sat, 11 Nov 2006 18:04:54 +1100
Subject: [XFS] Clean up i_flags and i_flags_lock handling.

SGI-PV: 956832
SGI-Modid: xfs-linux-melb:xfs-kern:27358a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nscott@aconex.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c |  4 +---
 fs/xfs/xfs_iget.c            | 20 ++++++--------------
 fs/xfs/xfs_inode.c           | 17 +++++------------
 fs/xfs/xfs_inode.h           | 41 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_vnodeops.c        | 10 ++++------
 5 files changed, 57 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 38c4d128a8c0..de05abbbe7fd 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -227,9 +227,7 @@ xfs_initialize_vnode(
 		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
 		xfs_set_inodeops(inode);
 
-		spin_lock(&ip->i_flags_lock);
-		ip->i_flags &= ~XFS_INEW;
-		spin_unlock(&ip->i_flags_lock);
+		xfs_iflags_clear(ip, XFS_INEW);
 		barrier();
 
 		unlock_new_inode(inode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 1562ac2dd67c..4b0c1881d6d5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -215,7 +215,7 @@ again:
 			 * If INEW is set this inode is being set up
 			 * we need to pause and try again.
 			 */
-			if (ip->i_flags & XFS_INEW) {
+			if (xfs_iflags_test(ip, XFS_INEW)) {
 				read_unlock(&ih->ih_lock);
 				delay(1);
 				XFS_STATS_INC(xs_ig_frecycle);
@@ -230,7 +230,7 @@ again:
 				 * on its way out of the system,
 				 * we need to pause and try again.
 				 */
-				if (ip->i_flags & XFS_IRECLAIM) {
+				if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
 					read_unlock(&ih->ih_lock);
 					delay(1);
 					XFS_STATS_INC(xs_ig_frecycle);
@@ -243,9 +243,7 @@ again:
 
 				XFS_STATS_INC(xs_ig_found);
 
-				spin_lock(&ip->i_flags_lock);
-				ip->i_flags &= ~XFS_IRECLAIMABLE;
-				spin_unlock(&ip->i_flags_lock);
+				xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 				version = ih->ih_version;
 				read_unlock(&ih->ih_lock);
 				xfs_ihash_promote(ih, ip, version);
@@ -299,10 +297,7 @@ finish_inode:
 			if (lock_flags != 0)
 				xfs_ilock(ip, lock_flags);
 
-			spin_lock(&ip->i_flags_lock);
-			ip->i_flags &= ~XFS_ISTALE;
-			spin_unlock(&ip->i_flags_lock);
-
+			xfs_iflags_clear(ip, XFS_ISTALE);
 			vn_trace_exit(vp, "xfs_iget.found",
 						(inst_t *)__return_address);
 			goto return_ip;
@@ -371,10 +366,7 @@ finish_inode:
 	ih->ih_next = ip;
 	ip->i_udquot = ip->i_gdquot = NULL;
 	ih->ih_version++;
-	spin_lock(&ip->i_flags_lock);
-	ip->i_flags |= XFS_INEW;
-	spin_unlock(&ip->i_flags_lock);
-
+	xfs_iflags_set(ip, XFS_INEW);
 	write_unlock(&ih->ih_lock);
 
 	/*
@@ -625,7 +617,7 @@ xfs_iput_new(xfs_inode_t	*ip,
 	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
 
 	if ((ip->i_d.di_mode == 0)) {
-		ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
+		ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 		vn_mark_bad(vp);
 	}
 	if (inode->i_state & I_NEW)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c27d7d495aa0..17d2a471751e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2193,7 +2193,7 @@ xfs_ifree_cluster(
 			/* Inode not in memory or we found it already,
 			 * nothing to do
 			 */
-			if (!ip || (ip->i_flags & XFS_ISTALE)) {
+			if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
 				read_unlock(&ih->ih_lock);
 				continue;
 			}
@@ -2215,10 +2215,7 @@ xfs_ifree_cluster(
 
 			if (ip == free_ip) {
 				if (xfs_iflock_nowait(ip)) {
-					spin_lock(&ip->i_flags_lock);
-					ip->i_flags |= XFS_ISTALE;
-					spin_unlock(&ip->i_flags_lock);
-
+					xfs_iflags_set(ip, XFS_ISTALE);
 					if (xfs_inode_clean(ip)) {
 						xfs_ifunlock(ip);
 					} else {
@@ -2231,9 +2228,7 @@ xfs_ifree_cluster(
 
 			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
 				if (xfs_iflock_nowait(ip)) {
-					spin_lock(&ip->i_flags_lock);
-					ip->i_flags |= XFS_ISTALE;
-					spin_unlock(&ip->i_flags_lock);
+					xfs_iflags_set(ip, XFS_ISTALE);
 
 					if (xfs_inode_clean(ip)) {
 						xfs_ifunlock(ip);
@@ -2263,9 +2258,7 @@ xfs_ifree_cluster(
 				AIL_LOCK(mp,s);
 				iip->ili_flush_lsn = iip->ili_item.li_lsn;
 				AIL_UNLOCK(mp, s);
-				spin_lock(&iip->ili_inode->i_flags_lock);
-				iip->ili_inode->i_flags |= XFS_ISTALE;
-				spin_unlock(&iip->ili_inode->i_flags_lock);
+				xfs_iflags_set(ip, XFS_ISTALE);
 				pre_flushed++;
 			}
 			lip = lip->li_bio_list;
@@ -2764,7 +2757,7 @@ xfs_iunpin(
 		struct inode *inode = NULL;
 
 		spin_lock(&ip->i_flags_lock);
-		if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
+		if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
 			bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
 
 			/* make sync come back and flush this inode */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e96eb0835fe6..bc823720d88f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -305,6 +305,47 @@ typedef struct xfs_inode {
 #endif
 } xfs_inode_t;
 
+
+/*
+ * i_flags helper functions
+ */
+static inline void
+__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+	ip->i_flags |= flags;
+}
+
+static inline void
+xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+	spin_lock(&ip->i_flags_lock);
+	__xfs_iflags_set(ip, flags);
+	spin_unlock(&ip->i_flags_lock);
+}
+
+static inline void
+xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
+{
+	spin_lock(&ip->i_flags_lock);
+	ip->i_flags &= ~flags;
+	spin_unlock(&ip->i_flags_lock);
+}
+
+static inline int
+__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+	return (ip->i_flags & flags);
+}
+
+static inline int
+xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+	int ret;
+	spin_lock(&ip->i_flags_lock);
+	ret = __xfs_iflags_test(ip, flags);
+	spin_unlock(&ip->i_flags_lock);
+	return ret;
+}
 #endif	/* __KERNEL__ */
 
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 2c79c3670589..4c5d73cbb901 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3844,9 +3844,7 @@ xfs_reclaim(
 		XFS_MOUNT_ILOCK(mp);
 		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
 		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
-		spin_lock(&ip->i_flags_lock);
-		ip->i_flags |= XFS_IRECLAIMABLE;
-		spin_unlock(&ip->i_flags_lock);
+		xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 		XFS_MOUNT_IUNLOCK(mp);
 	}
 	return 0;
@@ -3872,8 +3870,8 @@ xfs_finish_reclaim(
 	 */
 	write_lock(&ih->ih_lock);
 	spin_lock(&ip->i_flags_lock);
-	if ((ip->i_flags & XFS_IRECLAIM) ||
-	    (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
+	if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+	    (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
 		spin_unlock(&ip->i_flags_lock);
 		write_unlock(&ih->ih_lock);
 		if (locked) {
@@ -3882,7 +3880,7 @@ xfs_finish_reclaim(
 		}
 		return 1;
 	}
-	ip->i_flags |= XFS_IRECLAIM;
+	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
 	write_unlock(&ih->ih_lock);
 
-- 
cgit v1.2.3


From 4c60658e0f4e253cf275f12b7c76bf128515a774 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Sat, 11 Nov 2006 18:05:00 +1100
Subject: [XFS] Prevent a deadlock when xfslogd unpins inodes.

The previous fixes for the use after free in xfs_iunpin left a nasty log
deadlock when xfslogd unpinned the inode and dropped the last reference to
the inode. the ->clear_inode() method can issue transactions, and if the
log was full, the transaction could push on the log and get stuck trying
to push the inode it was currently unpinning.

To fix this, we provide xfs_iunpin a guarantee that it will always have a
valid xfs_inode <-> linux inode link or a particular flag will be set on
the inode. We then use log forces during lookup to ensure transactions are
completed before we recycle the inode. This ensures that xfs_iunpin will
never use the linux inode after it is being freed, and any lookup on an
inode on the reclaim list will wait until it is safe to attach a new linux
inode to the xfs inode.

SGI-PV: 956832
SGI-Modid: xfs-linux-melb:xfs-kern:27359a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Shailendra Tripathi <stripathi@agami.com>
Signed-off-by: Takenori Nagano <t-nagano@ah.jp.nec.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_iget.c     | 30 ++++++++++++++++++++++++++++++
 fs/xfs/xfs_inode.c    | 47 ++++++++++++++++++++++-------------------------
 fs/xfs/xfs_vnodeops.c | 21 ++++++++++++++-------
 3 files changed, 66 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 4b0c1881d6d5..c1c89dac19cc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -237,6 +237,36 @@ again:
 
 					goto again;
 				}
+				ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+
+				/*
+				 * If lookup is racing with unlink, then we
+				 * should return an error immediately so we
+				 * don't remove it from the reclaim list and
+				 * potentially leak the inode.
+				 */
+				if ((ip->i_d.di_mode == 0) &&
+				    !(flags & XFS_IGET_CREATE)) {
+					read_unlock(&ih->ih_lock);
+					return ENOENT;
+				}
+
+				/*
+				 * There may be transactions sitting in the
+				 * incore log buffers or being flushed to disk
+				 * at this time.  We can't clear the
+				 * XFS_IRECLAIMABLE flag until these
+				 * transactions have hit the disk, otherwise we
+				 * will void the guarantee the flag provides
+				 * xfs_iunpin()
+				 */
+				if (xfs_ipincount(ip)) {
+					read_unlock(&ih->ih_lock);
+					xfs_log_force(mp, 0,
+						XFS_LOG_FORCE|XFS_LOG_SYNC);
+					XFS_STATS_INC(xs_ig_frecycle);
+					goto again;
+				}
 
 				vn_trace_exit(vp, "xfs_iget.alloc",
 					(inst_t *)__return_address);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 17d2a471751e..d72c80dbfbb1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2741,42 +2741,39 @@ xfs_iunpin(
 {
 	ASSERT(atomic_read(&ip->i_pincount) > 0);
 
-	if (atomic_dec_and_test(&ip->i_pincount)) {
+	if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
+
 		/*
-		 * If the inode is currently being reclaimed, the
-		 * linux inode _and_ the xfs vnode may have been
-		 * freed so we cannot reference either of them safely.
-		 * Hence we should not try to do anything to them
-		 * if the xfs inode is currently in the reclaim
-		 * path.
+		 * If the inode is currently being reclaimed, the link between
+		 * the bhv_vnode and the xfs_inode will be broken after the
+		 * XFS_IRECLAIM* flag is set. Hence, if these flags are not
+		 * set, then we can move forward and mark the linux inode dirty
+		 * knowing that it is still valid as it won't freed until after
+		 * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
+		 * i_flags_lock is used to synchronise the setting of the
+		 * XFS_IRECLAIM* flags and the breaking of the link, and so we
+		 * can execute atomically w.r.t to reclaim by holding this lock
+		 * here.
 		 *
-		 * However, we still need to issue the unpin wakeup
-		 * call as the inode reclaim may be blocked waiting for
-		 * the inode to become unpinned.
+		 * However, we still need to issue the unpin wakeup call as the
+		 * inode reclaim may be blocked waiting for the inode to become
+		 * unpinned.
 		 */
-		struct inode *inode = NULL;
 
-		spin_lock(&ip->i_flags_lock);
 		if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
 			bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
+			struct inode *inode = NULL;
+
+			BUG_ON(vp == NULL);
+			inode = vn_to_inode(vp);
+			BUG_ON(inode->i_state & I_CLEAR);
 
 			/* make sync come back and flush this inode */
-			if (vp) {
-				inode = vn_to_inode(vp);
-
-				if (!(inode->i_state &
-						(I_NEW|I_FREEING|I_CLEAR))) {
-					inode = igrab(inode);
-					if (inode)
-						mark_inode_dirty_sync(inode);
-				} else
-					inode = NULL;
-			}
+			if (!(inode->i_state & (I_NEW|I_FREEING)))
+				mark_inode_dirty_sync(inode);
 		}
 		spin_unlock(&ip->i_flags_lock);
 		wake_up(&ip->i_ipin_wait);
-		if (inode)
-			iput(inode);
 	}
 }
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4c5d73cbb901..bda774a04b8f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3827,11 +3827,16 @@ xfs_reclaim(
 	 */
 	xfs_synchronize_atime(ip);
 
-	/* If we have nothing to flush with this inode then complete the
-	 * teardown now, otherwise break the link between the xfs inode
-	 * and the linux inode and clean up the xfs inode later. This
-	 * avoids flushing the inode to disk during the delete operation
-	 * itself.
+	/*
+	 * If we have nothing to flush with this inode then complete the
+	 * teardown now, otherwise break the link between the xfs inode and the
+	 * linux inode and clean up the xfs inode later. This avoids flushing
+	 * the inode to disk during the delete operation itself.
+	 *
+	 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
+	 * first to ensure that xfs_iunpin() will never see an xfs inode
+	 * that has a linux inode being reclaimed. Synchronisation is provided
+	 * by the i_flags_lock.
 	 */
 	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -3840,11 +3845,13 @@ xfs_reclaim(
 	} else {
 		xfs_mount_t	*mp = ip->i_mount;
 
-		/* Protect sync from us */
+		/* Protect sync and unpin from us */
 		XFS_MOUNT_ILOCK(mp);
+		spin_lock(&ip->i_flags_lock);
+		__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
+		spin_unlock(&ip->i_flags_lock);
 		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
-		xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 		XFS_MOUNT_IUNLOCK(mp);
 	}
 	return 0;
-- 
cgit v1.2.3


From 050e714eb2bc662e9df6bf048ce86b4fbdd9bcd3 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Sat, 11 Nov 2006 18:05:06 +1100
Subject: [XFS] Remove KERNEL_VERSION macros from xfs_dmapi.h

SGI-PV: 957005
SGI-Modid: xfs-linux-melb:xfs-kern:27398a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_dmapi_priv.h | 28 ++++++++++++++++++++++++++++
 fs/xfs/xfs_dmapi.h                | 22 ++--------------------
 2 files changed, 30 insertions(+), 20 deletions(-)
 create mode 100644 fs/xfs/linux-2.6/xfs_dmapi_priv.h

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
new file mode 100644
index 000000000000..a8b0b1685eed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DMAPI_PRIV_H__
+#define __XFS_DMAPI_PRIV_H__
+
+/*
+ *	Based on IO_ISDIRECT, decide which i_ flag is set.
+ */
+#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
+			      DM_FLAGS_IMUX : 0)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
+
+#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 4e7865ad6f0e..adc3d251240d 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -157,27 +157,9 @@ typedef enum {
 #define DM_FLAGS_IALLOCSEM_WR	0x020	/* thread holds i_alloc_sem wr */
 
 /*
- *	Based on IO_ISDIRECT, decide which i_ flag is set.
+ *	Pull in platform specific event flags defines
  */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      0 : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IMUX)
-#endif
-
+#include "xfs_dmapi_priv.h"
 
 /*
  *	Macros to turn caller specified delay/block flags into
-- 
cgit v1.2.3


From ba8df43c0ee93ec05fc526278a80aaf4cb5ab1fa Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 14 Nov 2006 02:03:29 -0800
Subject: [PATCH] autofs4: panic after mount fail

Resolve the panic on failed mount of an autofs filesystem originally
reported by Mao Bibo.

It addresses two issues that happen after the mount fail.  The first a NULL
pointer reference to a field (pipe) in the autofs superblock info structure
and second the lack of super block cleanup by the autofs and autofs4
modules.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/inode.c  | 14 +++++++++++++-
 fs/autofs/waitq.c  |  1 +
 fs/autofs4/inode.c | 17 ++++++++++++++++-
 fs/autofs4/waitq.c |  6 ++----
 4 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 54c518c89e4c..38ede5c9d6fd 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -25,6 +25,14 @@ void autofs_kill_sb(struct super_block *sb)
 	struct autofs_sb_info *sbi = autofs_sbi(sb);
 	unsigned int n;
 
+	/*
+	 * In the event of a failure in get_sb_nodev the superblock
+	 * info is not present so nothing else has been setup, so
+	 * just exit when we are called from deactivate_super.
+	 */
+	if (!sbi)
+		return;
+
 	if ( !sbi->catatonic )
 		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
@@ -136,7 +144,8 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
 
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
-	sbi->catatonic = 0;
+	sbi->pipe = NULL;
+	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
 	sbi->oz_pgrp = process_group(current);
 	autofs_initialize_hash(&sbi->dirhash);
@@ -180,6 +189,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
 	if ( !pipe->f_op || !pipe->f_op->write )
 		goto fail_fput;
 	sbi->pipe = pipe;
+	sbi->catatonic = 0;
 
 	/*
 	 * Success! Install the root dentry now to indicate completion.
@@ -198,6 +208,8 @@ fail_iput:
 	iput(root_inode);
 fail_free:
 	kfree(sbi);
+	s->s_fs_info = NULL;
+	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 633f628005b4..19a9cafb5ddf 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -41,6 +41,7 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
 		wq = nwq;
 	}
 	fput(sbi->pipe);	/* Close the pipe */
+	sbi->pipe = NULL;
 	autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
 }
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 51fd8595bf85..ce7c0f1dd529 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -99,6 +99,9 @@ static void autofs4_force_release(struct autofs_sb_info *sbi)
 	struct dentry *this_parent = sbi->sb->s_root;
 	struct list_head *next;
 
+	if (!sbi->sb->s_root)
+		return;
+
 	spin_lock(&dcache_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -146,6 +149,14 @@ void autofs4_kill_sb(struct super_block *sb)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
 
+	/*
+	 * In the event of a failure in get_sb_nodev the superblock
+	 * info is not present so nothing else has been setup, so
+	 * just exit when we are called from deactivate_super.
+	 */
+	if (!sbi)
+		return;
+
 	sb->s_fs_info = NULL;
 
 	if ( !sbi->catatonic )
@@ -310,7 +321,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->pipefd = -1;
-	sbi->catatonic = 0;
+	sbi->pipe = NULL;
+	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
 	sbi->oz_pgrp = process_group(current);
 	sbi->sb = s;
@@ -388,6 +400,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 		goto fail_fput;
 	sbi->pipe = pipe;
 	sbi->pipefd = pipefd;
+	sbi->catatonic = 0;
 
 	/*
 	 * Success! Install the root dentry now to indicate completion.
@@ -412,6 +425,8 @@ fail_ino:
 	kfree(ino);
 fail_free:
 	kfree(sbi);
+	s->s_fs_info = NULL;
+	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index c0a6c8d445c7..1e4a539f4417 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -41,10 +41,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 		wake_up_interruptible(&wq->queue);
 		wq = nwq;
 	}
-	if (sbi->pipe) {
-		fput(sbi->pipe);	/* Close the pipe */
-		sbi->pipe = NULL;
-	}
+	fput(sbi->pipe);	/* Close the pipe */
+	sbi->pipe = NULL;
 }
 
 static int autofs4_write(struct file *file, const void *addr, int bytes)
-- 
cgit v1.2.3


From 68589bc353037f233fe510ad9ff432338c95db66 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 14 Nov 2006 02:03:32 -0800
Subject: [PATCH] hugetlb: prepare_hugepage_range check offset too

(David:)

If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.

But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it.  That will eventually call down to the
non-hugepage version of unmap_page_range().  On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD.  unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries.  I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.

(Hugh:)

prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down.  PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.

Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/mm/hugetlbpage.c    |  4 +++-
 arch/powerpc/mm/hugetlbpage.c |  8 ++++++--
 fs/hugetlbfs/inode.c          | 21 ++++++++-------------
 include/linux/hugetlb.h       | 10 +++++++---
 mm/mmap.c                     |  2 +-
 5 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index eee5c1cfbe32..f3a9585e98a8 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -70,8 +70,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
  * Don't actually need to do any preparation, but need to make sure
  * the address is in the right region.
  */
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
 {
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 	if (addr & ~HPAGE_MASK)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fd68b74c07c3..506d89768d45 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -491,11 +491,15 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
 	return 0;
 }
 
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
 {
 	int err = 0;
 
-	if ( (addr+len) < addr )
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
 		return -EINVAL;
 
 	if (addr < 0x100000000UL)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0bea6a619e10..7f4756963d05 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -62,24 +62,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	loff_t len, vma_len;
 	int ret;
 
-	if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1))
-		return -EINVAL;
-
-	if (vma->vm_start & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (vma->vm_end & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
-		return -EINVAL;
+	/*
+	 * vma alignment has already been checked by prepare_hugepage_range.
+	 * If you add any error returns here, do so after setting VM_HUGETLB,
+	 * so is_vm_hugetlb_page tests below unmap_region go the right way
+	 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64).
+	 */
+	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
+	vma->vm_ops = &hugetlb_vm_ops;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	mutex_lock(&inode->i_mutex);
 	file_accessed(file);
-	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
-	vma->vm_ops = &hugetlb_vm_ops;
 
 	ret = -ENOMEM;
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5081d27bfa27..ace64e57e17f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -60,8 +60,11 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(unsigned long addr, unsigned long len,
+						pgoff_t pgoff)
 {
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 	if (addr & ~HPAGE_MASK)
@@ -69,7 +72,8 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 #else
-int prepare_hugepage_range(unsigned long addr, unsigned long len);
+int prepare_hugepage_range(unsigned long addr, unsigned long len,
+						pgoff_t pgoff);
 #endif
 
 #ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
@@ -107,7 +111,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
-#define prepare_hugepage_range(addr, len)	(-EINVAL)
+#define prepare_hugepage_range(addr,len,pgoff)	(-EINVAL)
 #define pmd_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
diff --git a/mm/mmap.c b/mm/mmap.c
index 497e502dfd6b..bdace87d7c01 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		 * Check if the given range is hugepage aligned, and
 		 * can be made suitable for hugepages.
 		 */
-		ret = prepare_hugepage_range(addr, len);
+		ret = prepare_hugepage_range(addr, len, pgoff);
 	} else {
 		/*
 		 * Ensure that a normal request is not falling in a
-- 
cgit v1.2.3


From a9083081b56800691e79d541638fd2fc6366db0f Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Thu, 16 Nov 2006 01:19:16 -0800
Subject: [PATCH] eCryptfs: dput() lower d_parent on rename

On rename, for both the old and new lower dentry objects, eCryptfs is
missing a dput on the lower parent directory dentry.  This patch will
prevent the BUG() at fs/dcache.c:613 from being hit after renaming a file
inside eCryptfs and then doing a umount on the lower filesystem.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ff4865d24f0f..ebec8cfc189b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -630,6 +630,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	dput(lower_new_dentry->d_parent);
+	dput(lower_old_dentry->d_parent);
 	dput(lower_new_dentry);
 	dput(lower_old_dentry);
 	return rc;
-- 
cgit v1.2.3


From d6ddf55440833fd9404138026af246c51ebeef22 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 16 Nov 2006 01:19:22 -0800
Subject: [PATCH] hfs_fill_super returns success even if no root inode

http://kernelfun.blogspot.com/2006/11/mokb-14-11-2006-linux-26x-selinux.html

mount that image...
fs: filesystem was not cleanly unmounted, running fsck.hfs is recommended.  mounting read-only.
hfs: get root inode failed.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000018
 printing eip
...
EIP is at superblock_doinit+0x21/0x767
...
 [] selinux_sb_kern_mount+0xc/0x4b
 [] vfs_kern_mount+0x99/0xf6
 [] do_kern_mount+0x2d/0x3e
 [] do_mount+0x5fa/0x66d
 [] sys_mount+0x77/0xae
 [] syscall_call+0x7/0xb
DWARF2 unwinder stuck at syscall_call+0x7/0xb

hfs_fill_super() returns success even if
  root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
or
  sb->s_root = d_alloc_root(root_inode);

fails.  This superblock finds its way to superblock_doinit() which does:

        struct dentry *root = sb->s_root;
        struct inode *inode = root->d_inode;

and boom.  Need to make sure the error cases return an error, I think.

[akpm@osdl.org: return -ENOMEM on oom]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index d43b4fcc8ad3..85b17b3fa4a0 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -390,11 +390,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 		hfs_find_exit(&fd);
 		goto bail_no_root;
 	}
+	res = -EINVAL;
 	root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
 	hfs_find_exit(&fd);
 	if (!root_inode)
 		goto bail_no_root;
 
+	res = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
 	if (!sb->s_root)
 		goto bail_iput;
-- 
cgit v1.2.3


From 64aaa4f8b7e676ccd3f6448df6867097b873b0a6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Nov 2006 01:19:27 -0800
Subject: [PATCH] AFS: Amend the AFS configuration options

Amend the text of AFS configuration options.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 133dcc8a4150..7b1511d50b05 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2060,8 +2060,7 @@ config CODA_FS_OLD_API
 	  For most cases you probably want to say N.
 
 config AFS_FS
-# for fs/nls/Config.in
-	tristate "Andrew File System support (AFS) (Experimental)"
+	tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
 	select RXRPC
 	help
-- 
cgit v1.2.3


From da63fc7ce63b43426dc3c69c05e28de2872c159a Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 16 Nov 2006 01:19:28 -0800
Subject: [PATCH] fat: add fat_getattr()

This adds fat_getattr() for setting stat->blksize. (FAT uses the size
of cluster for proper I/O)

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/file.c            | 10 ++++++++++
 fs/msdos/namei.c         |  1 +
 fs/vfat/namei.c          |  1 +
 include/linux/msdos_fs.h |  2 ++
 4 files changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8337451e7897..0aa813d944a6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -303,7 +303,17 @@ void fat_truncate(struct inode *inode)
 	fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
+int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	generic_fillattr(inode, stat);
+	stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fat_getattr);
+
 struct inode_operations fat_file_inode_operations = {
 	.truncate	= fat_truncate,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index b0f01b3b0536..452461955cbd 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -654,6 +654,7 @@ static struct inode_operations msdos_dir_inode_operations = {
 	.rmdir		= msdos_rmdir,
 	.rename		= msdos_rename,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
 
 static int msdos_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index edb711ff7b05..0afd745a37cd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -1004,6 +1004,7 @@ static struct inode_operations vfat_dir_inode_operations = {
 	.rmdir		= vfat_rmdir,
 	.rename		= vfat_rename,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
 
 static int vfat_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index ce6c85815cbd..24a9ef1506b6 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -402,6 +402,8 @@ extern const struct file_operations fat_file_operations;
 extern struct inode_operations fat_file_inode_operations;
 extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
+extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		       struct kstat *stat);
 
 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);
-- 
cgit v1.2.3


From ae56fb16337c882c52806508f93ead4034004c7a Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Thu, 16 Nov 2006 01:19:30 -0800
Subject: [PATCH] eCryptfs: CIFS nlink fixes

When CIFS is the lower filesystem, the old lower dentry needs to be explicitly
dropped from inside eCryptfs to force a revalidate.  In addition, when CIFS is
the lower filesystem, the inode attributes need to be copied back up from the
lower inode to the eCryptfs inode on an eCryptfs revalidate.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/dentry.c | 6 ++++++
 fs/ecryptfs/inode.c  | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 0b9992ab990f..52d1e36dc746 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -57,6 +57,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
 	nd->dentry = dentry_save;
 	nd->mnt = vfsmount_save;
+	if (dentry->d_inode) {
+		struct inode *lower_inode =
+			ecryptfs_inode_to_lower(dentry->d_inode);
+
+		ecryptfs_copy_attr_all(dentry->d_inode, lower_inode);
+	}
 out:
 	return rc;
 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ebec8cfc189b..dfcc68484f47 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -470,6 +470,7 @@ out_lock:
 	unlock_dir(lower_dir_dentry);
 	dput(lower_new_dentry);
 	dput(lower_old_dentry);
+	d_drop(lower_old_dentry);
 	d_drop(new_dentry);
 	d_drop(old_dentry);
 	return rc;
@@ -484,7 +485,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
 	lock_parent(lower_dentry);
 	rc = vfs_unlink(lower_dir_inode, lower_dentry);
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n");
+		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
 		goto out_unlock;
 	}
 	ecryptfs_copy_attr_times(dir, lower_dir_inode);
-- 
cgit v1.2.3


From 7bb0386f102ece8819182ccf7fffe8bbebc32b19 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 19 Oct 2006 12:24:42 +0900
Subject: debugfs: check return value correctly

The return value is stored in "*dentry", not in "dentry".

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e77676df6713..a736d44989c4 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -147,13 +147,13 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 	*dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(dentry)) {
+	if (!IS_ERR(*dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
 			error = debugfs_mkdir(parent->d_inode, *dentry, mode);
 		else 
 			error = debugfs_create(parent->d_inode, *dentry, mode);
 	} else
-		error = PTR_ERR(dentry);
+		error = PTR_ERR(*dentry);
 	mutex_unlock(&parent->d_inode->i_mutex);
 
 	return error;
-- 
cgit v1.2.3


From d2133717d5f994cca970b5aeb9d4664feeb92ff4 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Tue, 21 Nov 2006 18:55:16 +1100
Subject: [XFS] Fix uninitialized br_state and br_startoff in
 xfs_bmap_add_extent_delay_real()

SGI-PV: 957008
SGI-Modid: xfs-linux-melb:xfs-kern:27457a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: Shailendra Tripathi <stripathi@agami.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_bmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b050c06795f..498ad50d1f45 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1171,6 +1171,8 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		r[0] = *new;
+		r[1].br_state = PREV.br_state;
+		r[1].br_startblock = 0;
 		r[1].br_startoff = new_endoff;
 		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
 		r[1].br_blockcount = temp2;
-- 
cgit v1.2.3


From e5ffd2bb62c3f2c0d9f34e0d16fab6e2c8b056fb Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 21 Nov 2006 18:55:33 +1100
Subject: [XFS] Stale the correct inode when freeing clusters.

SGI-PV: 958376
SGI-Modid: xfs-linux-melb:xfs-kern:27503a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d72c80dbfbb1..44dfac521285 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2258,7 +2258,7 @@ xfs_ifree_cluster(
 				AIL_LOCK(mp,s);
 				iip->ili_flush_lsn = iip->ili_item.li_lsn;
 				AIL_UNLOCK(mp, s);
-				xfs_iflags_set(ip, XFS_ISTALE);
+				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
 				pre_flushed++;
 			}
 			lip = lip->li_bio_list;
-- 
cgit v1.2.3


From 2d51013ed2f2b6a5d2369b7fbbd989df1f6369e2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sat, 25 Nov 2006 11:09:20 -0800
Subject: [PATCH] fuse: fix Oops in lookup

Fix bug in certain error paths of lookup routines.  The request object was
reused for sending FORGET, which is illegal.  This bug could cause an Oops
in 2.6.18.  In earlier versions it might silently corrupt memory, but this
is very unlikely.

These error paths are never triggered by libfuse, so this wasn't noticed
even with the 2.6.18 kernel, only with a filesystem using the raw kernel
interface.

Thanks to Russ Cox for the bug report and test filesystem.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c | 52 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index cfc8f81e60d0..c71a6c092ad9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -138,6 +138,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct fuse_entry_out outarg;
 		struct fuse_conn *fc;
 		struct fuse_req *req;
+		struct fuse_req *forget_req;
 		struct dentry *parent;
 
 		/* Doesn't hurt to "reset" the validity timeout */
@@ -152,25 +153,33 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		if (IS_ERR(req))
 			return 0;
 
+		forget_req = fuse_get_req(fc);
+		if (IS_ERR(forget_req)) {
+			fuse_put_request(fc, req);
+			return 0;
+		}
+
 		parent = dget_parent(entry);
 		fuse_lookup_init(req, parent->d_inode, entry, &outarg);
 		request_send(fc, req);
 		dput(parent);
 		err = req->out.h.error;
+		fuse_put_request(fc, req);
 		/* Zero nodeid is same as -ENOENT */
 		if (!err && !outarg.nodeid)
 			err = -ENOENT;
 		if (!err) {
 			struct fuse_inode *fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
-				fuse_send_forget(fc, req, outarg.nodeid, 1);
+				fuse_send_forget(fc, forget_req,
+						 outarg.nodeid, 1);
 				return 0;
 			}
 			spin_lock(&fc->lock);
 			fi->nlookup ++;
 			spin_unlock(&fc->lock);
 		}
-		fuse_put_request(fc, req);
+		fuse_put_request(fc, forget_req);
 		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			return 0;
 
@@ -221,6 +230,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	struct inode *inode = NULL;
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
+	struct fuse_req *forget_req;
 
 	if (entry->d_name.len > FUSE_NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -229,9 +239,16 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (IS_ERR(req))
 		return ERR_PTR(PTR_ERR(req));
 
+	forget_req = fuse_get_req(fc);
+	if (IS_ERR(forget_req)) {
+		fuse_put_request(fc, req);
+		return ERR_PTR(PTR_ERR(forget_req));
+	}
+
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
 	err = req->out.h.error;
+	fuse_put_request(fc, req);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
 	if (!err && outarg.nodeid &&
 	    (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode)))
@@ -240,11 +257,11 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 				  &outarg.attr);
 		if (!inode) {
-			fuse_send_forget(fc, req, outarg.nodeid, 1);
+			fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
 			return ERR_PTR(-ENOMEM);
 		}
 	}
-	fuse_put_request(fc, req);
+	fuse_put_request(fc, forget_req);
 	if (err && err != -ENOENT)
 		return ERR_PTR(err);
 
@@ -388,6 +405,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	struct fuse_entry_out outarg;
 	struct inode *inode;
 	int err;
+	struct fuse_req *forget_req;
+
+	forget_req = fuse_get_req(fc);
+	if (IS_ERR(forget_req)) {
+		fuse_put_request(fc, req);
+		return PTR_ERR(forget_req);
+	}
 
 	req->in.h.nodeid = get_node_id(dir);
 	req->out.numargs = 1;
@@ -395,24 +419,24 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	req->out.args[0].value = &outarg;
 	request_send(fc, req);
 	err = req->out.h.error;
-	if (err) {
-		fuse_put_request(fc, req);
-		return err;
-	}
+	fuse_put_request(fc, req);
+	if (err)
+		goto out_put_forget_req;
+
 	err = -EIO;
 	if (invalid_nodeid(outarg.nodeid))
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	if ((outarg.attr.mode ^ mode) & S_IFMT)
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 			  &outarg.attr);
 	if (!inode) {
-		fuse_send_forget(fc, req, outarg.nodeid, 1);
+		fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
 		return -ENOMEM;
 	}
-	fuse_put_request(fc, req);
+	fuse_put_request(fc, forget_req);
 
 	if (S_ISDIR(inode->i_mode)) {
 		struct dentry *alias;
@@ -434,8 +458,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	fuse_invalidate_attr(dir);
 	return 0;
 
- out_put_request:
-	fuse_put_request(fc, req);
+ out_put_forget_req:
+	fuse_put_request(fc, forget_req);
 	return err;
 }
 
-- 
cgit v1.2.3


From 701e054e0c2db82359f0454c7ed4fd24346d52eb Mon Sep 17 00:00:00 2001
From: Vasily Tarasov <vtaras@openvz.org>
Date: Sat, 25 Nov 2006 11:09:22 -0800
Subject: [PATCH] mounstats NULL pointer dereference

OpenVZ developers team has encountered the following problem in 2.6.19-rc6
kernel. After some seconds of running script

while [[ 1 ]]
do
	find  /proc -name mountstats | xargs cat
done

this Oops appears:

BUG: unable to handle kernel NULL pointer dereference at virtual address
00000010
 printing eip:
c01a6b70
*pde = 00000000
Oops: 0000 [#1]
SMP
Modules linked in: xt_length ipt_ttl xt_tcpmss ipt_TCPMSS iptable_mangle
iptable_filter xt_multiport xt_limit ipt_tos ipt_REJECT ip_tables x_tables
parport_pc lp parport sunrpc af_packet thermal processor fan button battery
asus_acpi ac ohci_hcd ehci_hcd usbcore i2c_nforce2 i2c_core tg3 floppy
pata_amd
ide_cd cdrom sata_nv libata
CPU:    1
EIP:    0060:[<c01a6b70>]    Not tainted VLI
EFLAGS: 00010246   (2.6.19-rc6 #2)
EIP is at mountstats_open+0x70/0xf0
eax: 00000000   ebx: e6247030   ecx: e62470f8   edx: 00000000
esi: 00000000   edi: c01a6b00   ebp: c33b83c0   esp: f4105eb4
ds: 007b   es: 007b   ss: 0068
Process cat (pid: 6044, ti=f4105000 task=f4104a70 task.ti=f4105000)
Stack: c33b83c0 c04ee940 f46a4a80 c33b83c0 e4df31b4 c01a6b00 f4105000 c0169231
       e4df31b4 c33b83c0 c33b83c0 f4105f20 00000003 f4105000 c0169445 f2503cf0
       f7f8c4c0 00008000 c33b83c0 00000000 00008000 c0169350 f4105f20 00008000
Call Trace:
 [<c01a6b00>] mountstats_open+0x0/0xf0
 [<c0169231>] __dentry_open+0x181/0x250
 [<c0169445>] nameidata_to_filp+0x35/0x50
 [<c0169350>] do_filp_open+0x50/0x60
 [<c01873d6>] seq_read+0xc6/0x300
 [<c0169511>] get_unused_fd+0x31/0xc0
 [<c01696d3>] do_sys_open+0x63/0x110
 [<c01697a7>] sys_open+0x27/0x30
 [<c01030bd>] sysenter_past_esp+0x56/0x79
 =======================
Code: 45 74 8b 54 24 20 89 44 24 08 8b 42 f0 31 d2 e8 47 cb f8 ff 85 c0 89 c3
74 51 8d 80 a0 04 00 00 e8 46 06 2c 00 8b 83 48 04 00 00 <8b> 78 10 85 ff 74
03
f0 ff 07 b0 01 86 83 a0 04 00 00 f0 ff 4b
EIP: [<c01a6b70>] mountstats_open+0x70/0xf0 SS:ESP 0068:f4105eb4

The problem is that task->nsproxy can be equal NULL for some time during
task exit. This patch fixes the BUG.

Signed-off-by: Vasily Tarasov <vtaras@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: "Serge E. Hallyn" <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c          | 3 ++-
 include/linux/nsproxy.h | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8df27401d292..795319c54f72 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -442,7 +442,8 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
 		if (task) {
 			task_lock(task);
-			namespace = task->nsproxy->namespace;
+			if (task->nsproxy)
+				namespace = task->nsproxy->namespace;
 			if (namespace)
 				get_namespace(namespace);
 			task_unlock(task);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index f6baecdeecd6..971d1c6dfc4b 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -45,8 +45,10 @@ static inline void exit_task_namespaces(struct task_struct *p)
 {
 	struct nsproxy *ns = p->nsproxy;
 	if (ns) {
-		put_nsproxy(ns);
+		task_lock(p);
 		p->nsproxy = NULL;
+		task_unlock(p);
+		put_nsproxy(ns);
 	}
 }
 #endif
-- 
cgit v1.2.3


From 4d8ebddcc525a5800dab5880946cecffe73e9dca Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 25 Nov 2006 11:09:26 -0800
Subject: [PATCH] debugfs: add header file

debugfs needs include/linux/kobject.h for <kernel_subsys>.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/debugfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a736d44989c4..137d76c3f90a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
+#include <linux/kobject.h>
 #include <linux/namei.h>
 #include <linux/debugfs.h>
 
-- 
cgit v1.2.3


From 533221fbaf001692d5db646f84f7d033fac78cc7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 25 Nov 2006 11:09:30 -0800
Subject: [PATCH] reiserfs: fmt bugfix

One reiserfs_warning() call uses %lu, but doesn't supply what to print.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index b67ce9354048..ac14318c81ba 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -74,7 +74,8 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 			igrab(inode);
 			reiserfs_warning(inode->i_sb,
 					 "pinning inode %lu because the "
-					 "preallocation can't be freed");
+					 "preallocation can't be freed",
+					 inode->i_ino);
 			goto out;
 		}
 	}
-- 
cgit v1.2.3


From de88777e6942de76410ad2eb2858f5fbb6eb9c35 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 28 Nov 2006 12:29:49 -0800
Subject: [PATCH] ecryptfs: fix crypto_alloc_blkcipher() error check

The return value of crypto_alloc_blkcipher() should be checked by IS_ERR().

Cc: Mike Halcrow <mhalcrow@us.ibm.com>
Cc: Phillip Hellewell <phillip@hellewell.homeip.net>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 136175a69332..f63a7755fe86 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -820,7 +820,8 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 	crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0,
 						 CRYPTO_ALG_ASYNC);
 	kfree(full_alg_name);
-	if (!crypt_stat->tfm) {
+	if (IS_ERR(crypt_stat->tfm)) {
+		rc = PTR_ERR(crypt_stat->tfm);
 		ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
 				"Error initializing cipher [%s]\n",
 				crypt_stat->cipher);
-- 
cgit v1.2.3