From dd8a306ac0c918268bd2ae89da2dea627f6e352d Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 10 Nov 2005 07:50:03 -0600
Subject: JFS: Add back directory i_size calculations for legacy partitions

Linux-formatted jfs partitions have a different idea about what i_size
represents than partitions formatted on OS/2.  The i_size calculation is
now based on the size of the directory index.  For legacy partitions, which
have no directory index, the i_size is never being updated.

This patch adds back the original i_size calculations for legacy partitions.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dtree.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 404f33eae507..6c3f08319846 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -1005,6 +1005,9 @@ static int dtSplitUp(tid_t tid,
 
 		DT_PUTPAGE(smp);
 
+		if (!DO_INDEX(ip))
+			ip->i_size = xlen << sbi->l2bsize;
+
 		goto freeKeyName;
 	}
 
@@ -1055,7 +1058,9 @@ static int dtSplitUp(tid_t tid,
 				xaddr = addressPXD(pxd) + xlen;
 				dbFree(ip, xaddr, (s64) n);
 			}
-		}
+		} else if (!DO_INDEX(ip))
+			ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
+
 
 	      extendOut:
 		DT_PUTPAGE(smp);
@@ -1098,6 +1103,9 @@ static int dtSplitUp(tid_t tid,
 		goto splitOut;
 	}
 
+	if (!DO_INDEX(ip))
+		ip->i_size += PSIZE;
+
 	/*
 	 * propagate up the router entry for the leaf page just split
 	 *
@@ -2424,6 +2432,9 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 		break;
 	}
 
+	if (!DO_INDEX(ip))
+		ip->i_size -= PSIZE;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4d5dbd0945d9e0833dd7964a3d6ee33157f7cc7a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 29 Nov 2005 08:28:58 -0600
Subject: JFS: make buddy table static

Idea is to reduce false cacheline sharing and stuff

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb6..adb9f05093b2 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -125,7 +125,7 @@ static int dbGetL2AGSize(s64 nblocks);
  * into the table, with the table elements yielding the maximum
  * binary buddy of free bits within the character.
  */
-static s8 budtab[256] = {
+static const s8 budtab[256] = {
 	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-- 
cgit v1.2.3


From 1de87444f8f91009b726108c9a56600645ee8751 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 24 Jan 2006 15:22:50 -0600
Subject: JFS: semaphore to mutex conversion.

the conversion was generated via scripts, and the result was validated
automatically via a script as well.

build and boot tested.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c        |  4 +--
 fs/jfs/inode.c      | 14 ++++----
 fs/jfs/jfs_dmap.c   |  6 ++--
 fs/jfs/jfs_dmap.h   |  2 +-
 fs/jfs/jfs_extent.c | 20 +++++------
 fs/jfs/jfs_imap.c   | 22 ++++++------
 fs/jfs/jfs_imap.h   |  4 +--
 fs/jfs/jfs_incore.h |  5 +--
 fs/jfs/jfs_lock.h   |  1 +
 fs/jfs/jfs_logmgr.c |  6 ++--
 fs/jfs/jfs_logmgr.h |  2 +-
 fs/jfs/jfs_txnmgr.c | 10 +++---
 fs/jfs/namei.c      | 98 ++++++++++++++++++++++++++---------------------------
 fs/jfs/super.c      |  2 +-
 fs/jfs/xattr.c      |  8 ++---
 15 files changed, 103 insertions(+), 101 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 461e4934ca7c..e0b6fdab200c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -207,12 +207,12 @@ static int jfs_acl_chmod(struct inode *inode)
 	rc = posix_acl_chmod_masq(clone, inode->i_mode);
 	if (!rc) {
 		tid_t tid = txBegin(inode->i_sb, 0);
-		down(&JFS_IP(inode)->commit_sem);
+		mutex_lock(&JFS_IP(inode)->commit_mutex);
 		rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
 		if (!rc)
 			rc = txCommit(tid, 1, &inode, 0);
 		txEnd(tid);
-		up(&JFS_IP(inode)->commit_sem);
+		mutex_unlock(&JFS_IP(inode)->commit_mutex);
 	}
 
 	posix_acl_release(clone);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9f942ca8e4e3..d7834a9117ce 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -89,16 +89,16 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	}
 
 	tid = txBegin(inode->i_sb, COMMIT_INODE);
-	down(&JFS_IP(inode)->commit_sem);
+	mutex_lock(&JFS_IP(inode)->commit_mutex);
 
 	/*
-	 * Retest inode state after taking commit_sem
+	 * Retest inode state after taking commit_mutex
 	 */
 	if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode))
 		rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
 
 	txEnd(tid);
-	up(&JFS_IP(inode)->commit_sem);
+	mutex_unlock(&JFS_IP(inode)->commit_mutex);
 	return rc;
 }
 
@@ -335,18 +335,18 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 		tid = txBegin(ip->i_sb, 0);
 
 		/*
-		 * The commit_sem cannot be taken before txBegin.
+		 * The commit_mutex cannot be taken before txBegin.
 		 * txBegin may block and there is a chance the inode
 		 * could be marked dirty and need to be committed
 		 * before txBegin unblocks
 		 */
-		down(&JFS_IP(ip)->commit_sem);
+		mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 		newsize = xtTruncate(tid, ip, length,
 				     COMMIT_TRUNCATE | COMMIT_PWMAP);
 		if (newsize < 0) {
 			txEnd(tid);
-			up(&JFS_IP(ip)->commit_sem);
+			mutex_unlock(&JFS_IP(ip)->commit_mutex);
 			break;
 		}
 
@@ -355,7 +355,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 
 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	} while (newsize > length);	/* Truncate isn't always atomic */
 }
 
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 038d8b76d113..4fb3ed184925 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -64,9 +64,9 @@
  *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
  */
 
-#define BMAP_LOCK_INIT(bmp)	init_MUTEX(&bmp->db_bmaplock)
-#define BMAP_LOCK(bmp)		down(&bmp->db_bmaplock)
-#define BMAP_UNLOCK(bmp)	up(&bmp->db_bmaplock)
+#define BMAP_LOCK_INIT(bmp)	mutex_init(&bmp->db_bmaplock)
+#define BMAP_LOCK(bmp)		mutex_lock(&bmp->db_bmaplock)
+#define BMAP_UNLOCK(bmp)	mutex_unlock(&bmp->db_bmaplock)
 
 /*
  * forward references
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 32e25884e7e8..8b14cc8e0228 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -243,7 +243,7 @@ struct dbmap {
 struct bmap {
 	struct dbmap db_bmap;		/* on-disk aggregate map descriptor */
 	struct inode *db_ipbmap;	/* ptr to aggregate map incore inode */
-	struct semaphore db_bmaplock;	/* aggregate map lock */
+	struct mutex db_bmaplock;	/* aggregate map lock */
 	atomic_t db_active[MAXAG];	/* count of active, open files in AG */
 	u32 *db_DBmap;
 };
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 4879603daa1c..5549378358bf 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -94,7 +94,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	txBeginAnon(ip->i_sb);
 
 	/* Avoid race with jfs_commit_inode() */
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/* validate extent length */
 	if (xlen > MAXXLEN)
@@ -136,14 +136,14 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 */
 	nxlen = xlen;
 	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
 
 	/* Allocate blocks to quota. */
 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
 		dbFree(ip, nxaddr, (s64) nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return -EDQUOT;
 	}
 
@@ -165,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
 		DQUOT_FREE_BLOCK(ip, nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
 
@@ -177,7 +177,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 
 	mark_inode_dirty(ip);
 
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	/*
 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
 	 * sync list.
@@ -222,7 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);
 
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 	/* validate extent length */
 	if (nxlen > MAXXLEN)
 		nxlen = MAXXLEN;
@@ -258,7 +258,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* Allocat blocks to quota. */
 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
 		dbFree(ip, nxaddr, (s64) nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return -EDQUOT;
 	}
 
@@ -338,7 +338,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 
 	mark_inode_dirty(ip);
 exit:
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	return (rc);
 }
 #endif			/* _NOTYET */
@@ -439,12 +439,12 @@ int extRecord(struct inode *ip, xad_t * xp)
 
 	txBeginAnon(ip->i_sb);
 
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/* update the extent */
 	rc = xtUpdate(0, ip, xp);
 
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	return rc;
 }
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 31b4aa13dd4b..87dd86c34c22 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -66,14 +66,14 @@ static HLIST_HEAD(aggregate_hash);
  * imap locks
  */
 /* iag free list lock */
-#define IAGFREE_LOCK_INIT(imap)		init_MUTEX(&imap->im_freelock)
-#define IAGFREE_LOCK(imap)		down(&imap->im_freelock)
-#define IAGFREE_UNLOCK(imap)		up(&imap->im_freelock)
+#define IAGFREE_LOCK_INIT(imap)		mutex_init(&imap->im_freelock)
+#define IAGFREE_LOCK(imap)		mutex_lock(&imap->im_freelock)
+#define IAGFREE_UNLOCK(imap)		mutex_unlock(&imap->im_freelock)
 
 /* per ag iag list locks */
-#define AG_LOCK_INIT(imap,index)	init_MUTEX(&(imap->im_aglock[index]))
-#define AG_LOCK(imap,agno)		down(&imap->im_aglock[agno])
-#define AG_UNLOCK(imap,agno)		up(&imap->im_aglock[agno])
+#define AG_LOCK_INIT(imap,index)	mutex_init(&(imap->im_aglock[index]))
+#define AG_LOCK(imap,agno)		mutex_lock(&imap->im_aglock[agno])
+#define AG_UNLOCK(imap,agno)		mutex_unlock(&imap->im_aglock[agno])
 
 /*
  * forward references
@@ -1261,7 +1261,7 @@ int diFree(struct inode *ip)
 	 * to be freed by the transaction;  
 	 */
 	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
-	down(&JFS_IP(ipimap)->commit_sem);
+	mutex_lock(&JFS_IP(ipimap)->commit_mutex);
 
 	/* acquire tlock of the iag page of the freed ixad 
 	 * to force the page NOHOMEOK (even though no data is
@@ -1294,7 +1294,7 @@ int diFree(struct inode *ip)
 	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 	txEnd(tid);
-	up(&JFS_IP(ipimap)->commit_sem);
+	mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 	/* unlock the AG inode map information */
 	AG_UNLOCK(imap, agno);
@@ -2554,13 +2554,13 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 		 * addressing structure pointing to the new iag page;
 		 */
 		tid = txBegin(sb, COMMIT_FORCE);
-		down(&JFS_IP(ipimap)->commit_sem);
+		mutex_lock(&JFS_IP(ipimap)->commit_mutex);
 
 		/* update the inode map addressing structure to point to it */
 		if ((rc =
 		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
 			txEnd(tid);
-			up(&JFS_IP(ipimap)->commit_sem);
+			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 			/* Free the blocks allocated for the iag since it was
 			 * not successfully added to the inode map
 			 */
@@ -2626,7 +2626,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 		txEnd(tid);
-		up(&JFS_IP(ipimap)->commit_sem);
+		mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 		duplicateIXtree(sb, blkno, xlen, &xaddr);
 
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 6b59adec036a..6e24465f0f98 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -140,8 +140,8 @@ struct dinomap {
 struct inomap {
 	struct dinomap im_imap;		/* 4096: inode allocation control */
 	struct inode *im_ipimap;	/* 4: ptr to inode for imap   */
-	struct semaphore im_freelock;	/* 4: iag free list lock      */
-	struct semaphore im_aglock[MAXAG];	/* 512: per AG locks          */
+	struct mutex im_freelock;	/* 4: iag free list lock      */
+	struct mutex im_aglock[MAXAG];	/* 512: per AG locks          */
 	u32 *im_DBGdimap;
 	atomic_t im_numinos;	/* num of backed inodes */
 	atomic_t im_numfree;	/* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index dc21a5bd54d4..a97ead889a68 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -19,6 +19,7 @@
 #ifndef _H_JFS_INCORE
 #define _H_JFS_INCORE
 
+#include <linux/mutex.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -62,12 +63,12 @@ struct jfs_inode_info {
 	 */
 	struct rw_semaphore rdwrlock;
 	/*
-	 * commit_sem serializes transaction processing on an inode.
+	 * commit_mutex serializes transaction processing on an inode.
 	 * It must be taken after beginning a transaction (txBegin), since
 	 * dirty inodes may be committed while a new transaction on the
 	 * inode is blocked in txBegin or TxBeginAnon
 	 */
-	struct semaphore commit_sem;
+	struct mutex commit_mutex;
 	/* xattr_sem allows us to access the xattrs without taking i_mutex */
 	struct rw_semaphore xattr_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index 10ad1d086685..70ac9f7d1e00 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -20,6 +20,7 @@
 #define _H_JFS_LOCK
 
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/sched.h>
 
 /*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index d27bac6acaa3..06bded6c12b9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -87,9 +87,9 @@ DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
 /*
  *	log read/write serialization (per log)
  */
-#define LOG_LOCK_INIT(log)	init_MUTEX(&(log)->loglock)
-#define LOG_LOCK(log)		down(&((log)->loglock))
-#define LOG_UNLOCK(log)		up(&((log)->loglock))
+#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
+#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
+#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
 
 
 /*
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index e4978b5b65ee..8c6909b80014 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -389,7 +389,7 @@ struct jfs_log {
 	int eor;		/* 4: eor of last record in eol page */
 	struct lbuf *bp;	/* 4: current log page buffer */
 
-	struct semaphore loglock;	/* 4: log write serialization lock */
+	struct mutex loglock;	/* 4: log write serialization lock */
 
 	/* syncpt */
 	int nextsync;		/* 4: bytes to write before next syncpt */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 2ddb6b892bcf..d38f605d9481 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2876,10 +2876,10 @@ restart:
 		 */
 		TXN_UNLOCK();
 		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
-		down(&jfs_ip->commit_sem);
+		mutex_lock(&jfs_ip->commit_mutex);
 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
-		up(&jfs_ip->commit_sem);
+		mutex_unlock(&jfs_ip->commit_mutex);
 		/*
 		 * Just to be safe.  I don't know how
 		 * long we can run without blocking
@@ -2952,7 +2952,7 @@ int jfs_sync(void *arg)
 				 * Inode is being freed
 				 */
 				list_del_init(&jfs_ip->anon_inode_list);
-			} else if (! down_trylock(&jfs_ip->commit_sem)) {
+			} else if (! !mutex_trylock(&jfs_ip->commit_mutex)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed
@@ -2961,7 +2961,7 @@ int jfs_sync(void *arg)
 				tid = txBegin(ip->i_sb, COMMIT_INODE);
 				rc = txCommit(tid, 1, &ip, 0);
 				txEnd(tid);
-				up(&jfs_ip->commit_sem);
+				mutex_unlock(&jfs_ip->commit_mutex);
 
 				iput(ip);
 				/*
@@ -2971,7 +2971,7 @@ int jfs_sync(void *arg)
 				cond_resched();
 				TXN_LOCK();
 			} else {
-				/* We can't get the commit semaphore.  It may
+				/* We can't get the commit mutex.  It may
 				 * be held by a thread waiting for tlock's
 				 * so let's not block here.  Save it to
 				 * put back on the anon_list.
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4abbe8604302..ed4d170c212d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dip);
 	if (rc)
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dip);
 	if (rc)
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);
 		txEnd(tid);
-		up(&JFS_IP(dip)->commit_sem);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 		goto out2;
 	}
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
@@ -488,8 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);	/* Marks FS Dirty */
 		txEnd(tid);
-		up(&JFS_IP(dip)->commit_sem);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		IWRITE_UNLOCK(ip);
 		goto out1;
 	}
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if ((new_size = commitZeroLink(tid, ip)) < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
 			txEnd(tid);
-			up(&JFS_IP(dip)->commit_sem);
-			up(&JFS_IP(ip)->commit_sem);
+			mutex_unlock(&JFS_IP(dip)->commit_mutex);
+			mutex_unlock(&JFS_IP(ip)->commit_mutex);
 			IWRITE_UNLOCK(ip);
 			rc = new_size;
 			goto out1;
@@ -556,13 +556,13 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(dip->i_sb, 0);
-		down(&JFS_IP(ip)->commit_sem);
+		mutex_lock(&JFS_IP(ip)->commit_mutex);
 		new_size = xtTruncate_pmap(tid, ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
@@ -570,7 +570,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		} else
 			rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC);
 		txEnd(tid);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	}
 
 	if (ip->i_nlink == 0)
@@ -805,8 +805,8 @@ static int jfs_link(struct dentry *old_dentry,
 
 	tid = txBegin(ip->i_sb, 0);
 
-	down(&JFS_IP(dir)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dir)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/*
 	 * scan parent directory for entry/freespace
@@ -847,8 +847,8 @@ static int jfs_link(struct dentry *old_dentry,
       out:
 	txEnd(tid);
 
-	up(&JFS_IP(dir)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 	jfs_info("jfs_link: rc:%d", rc);
 	return rc;
@@ -916,8 +916,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_security(tid, ip, dip);
 	if (rc)
@@ -1037,8 +1037,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -1141,13 +1141,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	tid = txBegin(new_dir->i_sb, 0);
 
-	down(&JFS_IP(new_dir)->commit_sem);
-	down(&JFS_IP(old_ip)->commit_sem);
+	mutex_lock(&JFS_IP(new_dir)->commit_mutex);
+	mutex_lock(&JFS_IP(old_ip)->commit_mutex);
 	if (old_dir != new_dir)
-		down(&JFS_IP(old_dir)->commit_sem);
+		mutex_lock(&JFS_IP(old_dir)->commit_mutex);
 
 	if (new_ip) {
-		down(&JFS_IP(new_ip)->commit_sem);
+		mutex_lock(&JFS_IP(new_ip)->commit_mutex);
 		/*
 		 * Change existing directory entry to new inode number
 		 */
@@ -1160,10 +1160,10 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (S_ISDIR(new_ip->i_mode)) {
 			new_ip->i_nlink--;
 			if (new_ip->i_nlink) {
-				up(&JFS_IP(new_dir)->commit_sem);
-				up(&JFS_IP(old_ip)->commit_sem);
+				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
+				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
 				if (old_dir != new_dir)
-					up(&JFS_IP(old_dir)->commit_sem);
+					mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 				if (!S_ISDIR(old_ip->i_mode) && new_ip)
 					IWRITE_UNLOCK(new_ip);
 				jfs_error(new_ip->i_sb,
@@ -1282,16 +1282,16 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
       out4:
 	txEnd(tid);
 
-	up(&JFS_IP(new_dir)->commit_sem);
-	up(&JFS_IP(old_ip)->commit_sem);
+	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
 	if (old_dir != new_dir)
-		up(&JFS_IP(old_dir)->commit_sem);
+		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 	if (new_ip)
-		up(&JFS_IP(new_ip)->commit_sem);
+		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(new_ip->i_sb, 0);
-		down(&JFS_IP(new_ip)->commit_sem);
+		mutex_lock(&JFS_IP(new_ip)->commit_mutex);
 		new_size = xtTruncate_pmap(tid, new_ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);
@@ -1299,7 +1299,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		} else
 			rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
 		txEnd(tid);
-		up(&JFS_IP(new_ip)->commit_sem);
+		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 	}
 	if (new_ip && (new_ip->i_nlink == 0))
 		set_cflag(COMMIT_Nolink, new_ip);
@@ -1361,8 +1361,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	tid = txBegin(dir->i_sb, 0);
 
-	down(&JFS_IP(dir)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dir)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dir);
 	if (rc)
@@ -1407,8 +1407,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(ip)->commit_sem);
-	up(&JFS_IP(dir)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8d31f1336431..1639d2cd371f 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -617,7 +617,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 		memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
 		INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
 		init_rwsem(&jfs_ip->rdwrlock);
-		init_MUTEX(&jfs_ip->commit_sem);
+		mutex_init(&jfs_ip->commit_mutex);
 		init_rwsem(&jfs_ip->xattr_sem);
 		spin_lock_init(&jfs_ip->ag_lock);
 		jfs_ip->active_ag = -1;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index f23048f9471f..9bc5b7c055ce 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -934,13 +934,13 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	}
 
 	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
+	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
 			    flags);
 	if (!rc)
 		rc = txCommit(tid, 1, &inode, 0);
 	txEnd(tid);
-	up(&ji->commit_sem);
+	mutex_unlock(&ji->commit_mutex);
 
 	return rc;
 }
@@ -1093,12 +1093,12 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 		return rc;
 
 	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
+	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
 	if (!rc)
 		rc = txCommit(tid, 1, &inode, 0);
 	txEnd(tid);
-	up(&ji->commit_sem);
+	mutex_unlock(&ji->commit_mutex);
 
 	return rc;
 }
-- 
cgit v1.2.3


From fa3241d24cf1182b0ffb6e4d412c3bc2a2ab7bf6 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Thu, 9 Feb 2006 09:09:16 -0600
Subject: JFS: ext2 inode attributes for jfs

ext2 inode attributes with relevance for jfs:

'a' 	EXT2_APPEND_FL       -> append only
'i' 	EXT2_IMMUTABLE_FL    -> immutable file
's' 	EXT2_SECRM_FL	     -> zero file
'u' 	EXT2_UNRM_FL	     -> allow for unrm
'A' 	EXT2_NOATIME_FL      -> no access time
'D' 	EXT2_DIRSYNC_FL      -> dirsync
'S' 	EXT2_SYNC_FL	     -> sync

overview of jfs flags (partially for OS/2)

value	   (OS/2)	Linux	ext2 attrs
------------------------------------------------
0x00010000 IFJOURNAL	-
0x00020000 ISPARSE  	used
0x00040000 INLINEEA 	used
0x00080000 -	    	-	JFS_NOATIME_FL

0x00100000 -	    	-	JFS_DIRSYNC_FL
0x00200000 -	    	-	JFS_SYNC_FL
0x00400000 -	    	-	JFS_SECRM_FL
0x00800000 ISWAPFILE	-	JFS_UNRM_FL

0x01000000 -	    	-	JFS_APPEND_FL
0x02000000 IREADONLY	-	JFS_IMMUTABLE_FL
0x04000000 IHIDDEN  	-	-
0x08000000 ISYSTEM  	-	-

0x10000000 -	    	-
0x20000000 IDIRECTORY	used
0x40000000 IARCHIVE 	-
0x80000000 INEWNAME 	-

the implementation is straight forward, except
for the fact that the attributes have to be mapped
to match with the ext2 ones to avoid a separate
tool for manipulating them (this could be avoided
when using a separate flag field in the on-disk
representation, but the overhead is minimal)

a special jfs_ioctl is added to allow for the new
JFS_IOC_GETFLAGS and JFS_IOC_SETFLAGS calls.

a helper function jfs_set_inode_flags() to transfer
the flags from the on-disk version to the inode

minor changes to allow flag inheritance on inode
creation, as well as a cleanup of the on-disk
flags (including the new ones)

beforementioned helper to map between ext2 and jfs
versions of the new flags ...

the JFS_SECRM_FL and JFS_UNRM_FL are not done yet
and I'm not 100% sure they are worth the effort,
the rest seems to work out of the box ...

Signed-off-by: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/Makefile     |  3 ++-
 fs/jfs/file.c       |  1 +
 fs/jfs/inode.c      |  1 +
 fs/jfs/jfs_dinode.h | 31 +++++++++++++++++++++++++++----
 fs/jfs/jfs_inode.c  | 37 ++++++++++++++++++++++++++++++++++---
 fs/jfs/jfs_inode.h  |  3 +++
 fs/jfs/namei.c      |  1 +
 7 files changed, 69 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 6f1e0e95587a..3adb6395e42d 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -8,7 +8,8 @@ jfs-y    := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
 	    jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
 	    jfs_unicode.o jfs_dtree.o jfs_inode.o \
 	    jfs_extent.o symlink.o jfs_metapage.o \
-	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o
+	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
+	    resize.o xattr.o ioctl.o
 
 jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
 
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index c2c19c9ed9a4..e1ac6e497e2b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -113,4 +113,5 @@ struct file_operations jfs_file_operations = {
  	.sendfile	= generic_file_sendfile,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
+	.ioctl		= jfs_ioctl,
 };
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index d7834a9117ce..51a5fed90cca 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -55,6 +55,7 @@ void jfs_read_inode(struct inode *inode)
 		inode->i_op = &jfs_file_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	}
+	jfs_set_inode_flags(inode);
 }
 
 /*
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 580a3258449b..9f2572aea561 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -139,13 +139,36 @@ struct dinode {
 
 /* more extended mode bits: attributes for OS/2 */
 #define IREADONLY	0x02000000	/* no write access to file */
-#define IARCHIVE	0x40000000	/* file archive bit */
-#define ISYSTEM		0x08000000	/* system file */
 #define IHIDDEN		0x04000000	/* hidden file */
-#define IRASH		0x4E000000	/* mask for changeable attributes */
-#define INEWNAME	0x80000000	/* non-8.3 filename format */
+#define ISYSTEM		0x08000000	/* system file */
+
 #define IDIRECTORY	0x20000000	/* directory (shadow of real bit) */
+#define IARCHIVE	0x40000000	/* file archive bit */
+#define INEWNAME	0x80000000	/* non-8.3 filename format */
+
+#define IRASH		0x4E000000	/* mask for changeable attributes */
 #define ATTRSHIFT	25	/* bits to shift to move attribute
 				   specification to mode position */
 
+/* extended attributes for Linux */
+
+#define JFS_NOATIME_FL		0x00080000 /* do not update atime */
+
+#define JFS_DIRSYNC_FL		0x00100000 /* dirsync behaviour */
+#define JFS_SYNC_FL		0x00200000 /* Synchronous updates */
+#define JFS_SECRM_FL		0x00400000 /* Secure deletion */
+#define JFS_UNRM_FL		0x00800000 /* allow for undelete */
+
+#define JFS_APPEND_FL		0x01000000 /* writes to file may only append */
+#define JFS_IMMUTABLE_FL	0x02000000 /* Immutable file */
+
+#define JFS_FL_USER_VISIBLE	0x03F80000
+#define JFS_FL_USER_MODIFIABLE	0x03F80000
+#define JFS_FL_INHERIT		0x03C80000
+
+/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
+#define JFS_IOC_GETFLAGS	_IOR('f', 1, long)
+#define JFS_IOC_SETFLAGS	_IOW('f', 2, long)
+
+
 #endif /*_H_JFS_DINODE */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 2af5efbfd06f..ae2772cba2d1 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -25,6 +25,26 @@
 #include "jfs_dinode.h"
 #include "jfs_debug.h"
 
+
+void jfs_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = JFS_IP(inode)->mode2;
+
+	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
+		S_NOATIME | S_DIRSYNC | S_SYNC);
+
+	if (flags & JFS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & JFS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & JFS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & JFS_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+	if (flags & JFS_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+}
+
 /*
  * NAME:	ialloc()
  *
@@ -74,10 +94,20 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	}
 
 	inode->i_mode = mode;
-	if (S_ISDIR(mode))
-		jfs_inode->mode2 = IDIRECTORY | mode;
+	/* inherit flags from parent */
+	jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
+
+	if (S_ISDIR(mode)) {
+		jfs_inode->mode2 |= IDIRECTORY;
+		jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
+	}
+	else if (S_ISLNK(mode))
+		jfs_inode->mode2 &=
+			~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
 	else
-		jfs_inode->mode2 = INLINEEA | ISPARSE | mode;
+		jfs_inode->mode2 |= INLINEEA | ISPARSE;
+	jfs_inode->mode2 |= mode;
+
 	inode->i_blksize = sb->s_blocksize;
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -98,6 +128,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	jfs_inode->atlhead = 0;
 	jfs_inode->atltail = 0;
 	jfs_inode->xtlid = 0;
+	jfs_set_inode_flags(inode);
 
 	jfs_info("ialloc returns inode = 0x%p\n", inode);
 
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b54bac576cb3..095d471b9f9a 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -20,6 +20,8 @@
 
 extern struct inode *ialloc(struct inode *, umode_t);
 extern int jfs_fsync(struct file *, struct dentry *, int);
+extern int jfs_ioctl(struct inode *, struct file *,
+			unsigned int, unsigned long);
 extern void jfs_read_inode(struct inode *);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode*, int);
@@ -29,6 +31,7 @@ extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
+extern void jfs_set_inode_flags(struct inode *);
 
 extern struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index ed4d170c212d..309cee575f7d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1523,6 +1523,7 @@ struct file_operations jfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= jfs_readdir,
 	.fsync		= jfs_fsync,
+	.ioctl		= jfs_ioctl,
 };
 
 static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
-- 
cgit v1.2.3


From 4837c672fd4d43c519d6b53308ee68d45b91b872 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 10 Feb 2006 08:11:53 -0600
Subject: JFS: Fix regression.  fsck complains if symlinks do not have INLINEEA
 attribute

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ae2772cba2d1..ffd2a8a00787 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -101,11 +101,11 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 		jfs_inode->mode2 |= IDIRECTORY;
 		jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
 	}
-	else if (S_ISLNK(mode))
-		jfs_inode->mode2 &=
-			~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
-	else
+	else {
 		jfs_inode->mode2 |= INLINEEA | ISPARSE;
+		if (S_ISLNK(mode))
+			jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
+	}
 	jfs_inode->mode2 |= mode;
 
 	inode->i_blksize = sb->s_blocksize;
-- 
cgit v1.2.3


From 91dbb4deb30e817efc8d6bed89b1190a489ca776 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Feb 2006 12:49:04 -0600
Subject: JFS: Use the kthread_ API

Use the kthread_ API instead of opencoding lots of hairy code for kernel
thread creation and teardown.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_logmgr.c     | 20 ++++++------------
 fs/jfs/jfs_superblock.h |  9 +++-----
 fs/jfs/jfs_txnmgr.c     | 26 +++++++----------------
 fs/jfs/super.c          | 55 ++++++++++++++++++++-----------------------------
 4 files changed, 38 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 06bded6c12b9..3113ff53bd55 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -64,6 +64,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
+#include <linux/kthread.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
 #include <linux/suspend.h>
@@ -81,7 +82,6 @@
  */
 static struct lbuf *log_redrive_list;
 static DEFINE_SPINLOCK(log_redrive_lock);
-DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
 
 
 /*
@@ -1980,7 +1980,7 @@ static inline void lbmRedrive(struct lbuf *bp)
 	log_redrive_list = bp;
 	spin_unlock_irqrestore(&log_redrive_lock, flags);
 
-	wake_up(&jfs_IO_thread_wait);
+	wake_up_process(jfsIOthread);
 }
 
 
@@ -2347,13 +2347,7 @@ int jfsIOWait(void *arg)
 {
 	struct lbuf *bp;
 
-	daemonize("jfsIO");
-
-	complete(&jfsIOwait);
-
 	do {
-		DECLARE_WAITQUEUE(wq, current);
-
 		spin_lock_irq(&log_redrive_lock);
 		while ((bp = log_redrive_list) != 0) {
 			log_redrive_list = bp->l_redrive_next;
@@ -2362,21 +2356,19 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
+		spin_unlock_irq(&log_redrive_lock);
+
 		if (freezing(current)) {
-			spin_unlock_irq(&log_redrive_lock);
 			refrigerator();
 		} else {
-			add_wait_queue(&jfs_IO_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
-			spin_unlock_irq(&log_redrive_lock);
 			schedule();
 			current->state = TASK_RUNNING;
-			remove_wait_queue(&jfs_IO_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	jfs_info("jfsIOWait being killed!");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 /*
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index fcf781bf31cb..682cf1a68a18 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -113,12 +113,9 @@ extern int jfs_mount(struct super_block *);
 extern int jfs_mount_rw(struct super_block *, int);
 extern int jfs_umount(struct super_block *);
 extern int jfs_umount_rw(struct super_block *);
-
-extern int jfs_stop_threads;
-extern struct completion jfsIOwait;
-extern wait_queue_head_t jfs_IO_thread_wait;
-extern wait_queue_head_t jfs_commit_thread_wait;
-extern wait_queue_head_t jfs_sync_thread_wait;
 extern int jfs_extendfs(struct super_block *, s64, int);
 
+extern struct task_struct *jfsIOthread;
+extern struct task_struct *jfsSyncThread;
+
 #endif /*_H_JFS_SUPERBLOCK */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d38f605d9481..ac3d66948e8c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -49,6 +49,7 @@
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/kthread.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -121,8 +122,7 @@ static DEFINE_SPINLOCK(jfsTxnLock);
 #define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 
-DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait);
-DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
+static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 static int jfs_commit_thread_waking;
 
 /*
@@ -207,7 +207,7 @@ static lid_t txLockAlloc(void)
 	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 		jfs_info("txLockAlloc tlocks low");
 		jfs_tlocks_low = 1;
-		wake_up(&jfs_sync_thread_wait);
+		wake_up_process(jfsSyncThread);
 	}
 
 	return lid;
@@ -2743,10 +2743,6 @@ int jfs_lazycommit(void *arg)
 	unsigned long flags;
 	struct jfs_sb_info *sbi;
 
-	daemonize("jfsCommit");
-
-	complete(&jfsIOwait);
-
 	do {
 		LAZY_LOCK(flags);
 		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
@@ -2806,13 +2802,13 @@ int jfs_lazycommit(void *arg)
 			current->state = TASK_RUNNING;
 			remove_wait_queue(&jfs_commit_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	if (!list_empty(&TxAnchor.unlock_queue))
 		jfs_err("jfs_lazycommit being killed w/pending transactions!");
 	else
 		jfs_info("jfs_lazycommit being killed\n");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 void txLazyUnlock(struct tblock * tblk)
@@ -2932,10 +2928,6 @@ int jfs_sync(void *arg)
 	int rc;
 	tid_t tid;
 
-	daemonize("jfsSync");
-
-	complete(&jfsIOwait);
-
 	do {
 		/*
 		 * write each inode on the anonymous inode list
@@ -2996,19 +2988,15 @@ int jfs_sync(void *arg)
 			TXN_UNLOCK();
 			refrigerator();
 		} else {
-			DECLARE_WAITQUEUE(wq, current);
-
-			add_wait_queue(&jfs_sync_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
 			TXN_UNLOCK();
 			schedule();
 			current->state = TASK_RUNNING;
-			remove_wait_queue(&jfs_sync_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	jfs_info("jfs_sync being killed");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1639d2cd371f..bd6720d807a6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -25,6 +25,7 @@
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
+#include <linux/kthread.h>
 #include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
@@ -54,11 +55,9 @@ static int commit_threads = 0;
 module_param(commit_threads, int, 0);
 MODULE_PARM_DESC(commit_threads, "Number of commit threads");
 
-int jfs_stop_threads;
-static pid_t jfsIOthread;
-static pid_t jfsCommitThread[MAX_COMMIT_THREADS];
-static pid_t jfsSyncThread;
-DECLARE_COMPLETION(jfsIOwait);
+static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS];
+struct task_struct *jfsIOthread;
+struct task_struct *jfsSyncThread;
 
 #ifdef CONFIG_JFS_DEBUG
 int jfsloglevel = JFS_LOGLEVEL_WARN;
@@ -661,12 +660,12 @@ static int __init init_jfs_fs(void)
 	/*
 	 * I/O completion thread (endio)
 	 */
-	jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL);
-	if (jfsIOthread < 0) {
-		jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread);
+	jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO");
+	if (IS_ERR(jfsIOthread)) {
+		rc = PTR_ERR(jfsIOthread);
+		jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 		goto end_txmngr;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 
 	if (commit_threads < 1)
 		commit_threads = num_online_cpus();
@@ -674,24 +673,21 @@ static int __init init_jfs_fs(void)
 		commit_threads = MAX_COMMIT_THREADS;
 
 	for (i = 0; i < commit_threads; i++) {
-		jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL,
-						   CLONE_KERNEL);
-		if (jfsCommitThread[i] < 0) {
-			jfs_err("init_jfs_fs: fork failed w/rc = %d",
-				jfsCommitThread[i]);
+		jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit");
+		if (IS_ERR(jfsCommitThread[i])) {
+			rc = PTR_ERR(jfsCommitThread[i]);
+			jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 			commit_threads = i;
 			goto kill_committask;
 		}
-		/* Wait until thread starts */
-		wait_for_completion(&jfsIOwait);
 	}
 
-	jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL);
-	if (jfsSyncThread < 0) {
-		jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread);
+	jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync");
+	if (IS_ERR(jfsSyncThread)) {
+		rc = PTR_ERR(jfsSyncThread);
+		jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 		goto kill_committask;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 
 #ifdef PROC_FS_JFS
 	jfs_proc_init();
@@ -700,13 +696,9 @@ static int __init init_jfs_fs(void)
 	return register_filesystem(&jfs_fs_type);
 
 kill_committask:
-	jfs_stop_threads = 1;
-	wake_up_all(&jfs_commit_thread_wait);
 	for (i = 0; i < commit_threads; i++)
-		wait_for_completion(&jfsIOwait);
-
-	wake_up(&jfs_IO_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait for thread exit */
+		kthread_stop(jfsCommitThread[i]);
+	kthread_stop(jfsIOthread);
 end_txmngr:
 	txExit();
 free_metapage:
@@ -722,16 +714,13 @@ static void __exit exit_jfs_fs(void)
 
 	jfs_info("exit_jfs_fs called");
 
-	jfs_stop_threads = 1;
 	txExit();
 	metapage_exit();
-	wake_up(&jfs_IO_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread exits */
-	wake_up_all(&jfs_commit_thread_wait);
+
+	kthread_stop(jfsIOthread);
 	for (i = 0; i < commit_threads; i++)
-		wait_for_completion(&jfsIOwait);
-	wake_up(&jfs_sync_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until Sync thread exits */
+		kthread_stop(jfsCommitThread[i]);
+	kthread_stop(jfsSyncThread);
 #ifdef PROC_FS_JFS
 	jfs_proc_clean();
 #endif
-- 
cgit v1.2.3


From d9e902668e815f9f33ba5056089684b0704eeac6 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Wed, 22 Feb 2006 14:14:58 -0600
Subject: JFS: Add missing file from fa3241d24cf1182b0ffb6e4d412c3bc2a2ab7bf6

My mistake here.  I failed to checkin fs/jfs/ioctl.c

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/ioctl.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 fs/jfs/ioctl.c

(limited to 'fs')

diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
new file mode 100644
index 000000000000..67b3774820eb
--- /dev/null
+++ b/fs/jfs/ioctl.c
@@ -0,0 +1,107 @@
+/*
+ * linux/fs/jfs/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/ctype.h>
+#include <linux/capability.h>
+#include <linux/time.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+
+#include "jfs_incore.h"
+#include "jfs_dinode.h"
+#include "jfs_inode.h"
+
+
+static struct {
+	long jfs_flag;
+	long ext2_flag;
+} jfs_map[] = {
+	{JFS_NOATIME_FL, EXT2_NOATIME_FL},
+	{JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
+	{JFS_SYNC_FL, EXT2_SYNC_FL},
+	{JFS_SECRM_FL, EXT2_SECRM_FL},
+	{JFS_UNRM_FL, EXT2_UNRM_FL},
+	{JFS_APPEND_FL, EXT2_APPEND_FL},
+	{JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+	{0, 0},
+};
+
+static long jfs_map_ext2(unsigned long flags, int from)
+{
+	int index=0;
+	long mapped=0;
+
+	while (jfs_map[index].jfs_flag) {
+		if (from) {
+			if (jfs_map[index].ext2_flag & flags)
+				mapped |= jfs_map[index].jfs_flag;
+		} else {
+			if (jfs_map[index].jfs_flag & flags)
+				mapped |= jfs_map[index].ext2_flag;
+		}
+		index++;
+	}
+	return mapped;
+}
+
+
+int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct jfs_inode_info *jfs_inode = JFS_IP(inode);
+	unsigned int flags;
+
+	switch (cmd) {
+	case JFS_IOC_GETFLAGS:
+		flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
+		flags = jfs_map_ext2(flags, 0);
+		return put_user(flags, (int __user *) arg);
+	case JFS_IOC_SETFLAGS: {
+		unsigned int oldflags;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		flags = jfs_map_ext2(flags, 1);
+		if (!S_ISDIR(inode->i_mode))
+			flags &= ~JFS_DIRSYNC_FL;
+
+		oldflags = jfs_inode->mode2;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 */
+		if ((oldflags & JFS_IMMUTABLE_FL) ||
+			((flags ^ oldflags) &
+			(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
+		}
+
+		flags = flags & JFS_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+		jfs_inode->mode2 = flags;
+
+		jfs_set_inode_flags(inode);
+		inode->i_ctime = CURRENT_TIME_SEC;
+		mark_inode_dirty(inode);
+		return 0;
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
-- 
cgit v1.2.3


From 5b3030e39049212c975665cdb3eeabcfaf7c94ca Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Thu, 23 Feb 2006 09:47:13 -0600
Subject: JFS: kzalloc conversion

this converts fs/jfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_logmgr.c   | 9 +++------
 fs/jfs/jfs_metapage.c | 3 +--
 fs/jfs/super.c        | 3 +--
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 3113ff53bd55..0b348b13b551 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1105,11 +1105,10 @@ int lmLogOpen(struct super_block *sb)
 		}
 	}
 
-	if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
+	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
 		up(&jfs_log_sem);
 		return -ENOMEM;
 	}
-	memset(log, 0, sizeof(struct jfs_log));
 	INIT_LIST_HEAD(&log->sb_list);
 	init_waitqueue_head(&log->syncwait);
 
@@ -1181,9 +1180,8 @@ static int open_inline_log(struct super_block *sb)
 	struct jfs_log *log;
 	int rc;
 
-	if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
+	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
 		return -ENOMEM;
-	memset(log, 0, sizeof(struct jfs_log));
 	INIT_LIST_HEAD(&log->sb_list);
 	init_waitqueue_head(&log->syncwait);
 
@@ -1216,12 +1214,11 @@ static int open_dummy_log(struct super_block *sb)
 
 	down(&jfs_log_sem);
 	if (!dummy_log) {
-		dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL);
+		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
 		if (!dummy_log) {
 			up(&jfs_log_sem);
 			return -ENOMEM;
 		}
-		memset(dummy_log, 0, sizeof(struct jfs_log));
 		INIT_LIST_HEAD(&dummy_log->sb_list);
 		init_waitqueue_head(&dummy_log->syncwait);
 		dummy_log->no_integrity = 1;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 8a53981f9f27..5fbaeaadccd3 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -104,10 +104,9 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 	if (PagePrivate(page))
 		a = mp_anchor(page);
 	else {
-		a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
+		a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
 		if (!a)
 			return -ENOMEM;
-		memset(a, 0, sizeof(struct meta_anchor));
 		set_page_private(page, (unsigned long)a);
 		SetPagePrivate(page);
 		kmap(page);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index bd6720d807a6..ab6333347004 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -395,10 +395,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!new_valid_dev(sb->s_bdev->bd_dev))
 		return -EOVERFLOW;
 
-	sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOSPC;
-	memset(sbi, 0, sizeof (struct jfs_sb_info));
 	sb->s_fs_info = sbi;
 	sbi->sb = sb;
 
-- 
cgit v1.2.3


From bb8047d3540affd6b8c2adac3fe792e07143be0f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 7 Mar 2006 11:53:46 +0000
Subject: NTFS: Fix two compiler warnings on Alpha.  Thanks to Andrew Morton
 for       reporting them.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/dir.c  | 2 +-
 fs/ntfs/file.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index b0690d4c8906..9d9ed3fe371d 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1136,7 +1136,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (fpos == 1) {
 		ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
 				"inode 0x%lx, DT_DIR.",
-				parent_ino(filp->f_dentry));
+				(unsigned long)parent_ino(filp->f_dentry));
 		rc = filldir(dirent, "..", 2, fpos,
 				parent_ino(filp->f_dentry), DT_DIR);
 		if (rc)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5027d3d1b3fe..2e5ba0c535d9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -943,7 +943,8 @@ rl_not_mapped_enoent:
 		}
 		ni->runlist.rl = rl;
 		status.runlist_merged = 1;
-		ntfs_debug("Allocated cluster, lcn 0x%llx.", lcn);
+		ntfs_debug("Allocated cluster, lcn 0x%llx.",
+				(unsigned long long)lcn);
 		/* Map and lock the mft record and get the attribute record. */
 		if (!NInoAttr(ni))
 			base_ni = ni;
-- 
cgit v1.2.3


From be0bf7da19135a7a0f8c275f20c819940be218d9 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 8 Mar 2006 10:59:15 -0600
Subject: JFS: Take logsync lock before testing mp->lsn

This fixes a race where lsn could be cleared before taking the lock

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c | 7 ++-----
 fs/jfs/jfs_imap.c | 6 ++----
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 4fb3ed184925..c161c98954e0 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -532,10 +532,10 @@ dbUpdatePMap(struct inode *ipbmap,
 
 		lastlblkno = lblkno;
 
+		LOGSYNC_LOCK(log, flags);
 		if (mp->lsn != 0) {
 			/* inherit older/smaller lsn */
 			logdiff(diffp, mp->lsn, log);
-			LOGSYNC_LOCK(log, flags);
 			if (difft < diffp) {
 				mp->lsn = lsn;
 
@@ -548,20 +548,17 @@ dbUpdatePMap(struct inode *ipbmap,
 			logdiff(diffp, mp->clsn, log);
 			if (difft > diffp)
 				mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log, flags);
 		} else {
 			mp->log = log;
 			mp->lsn = lsn;
 
 			/* insert bp after tblock in logsync list */
-			LOGSYNC_LOCK(log, flags);
-
 			log->count++;
 			list_add(&mp->synclist, &tblk->synclist);
 
 			mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log, flags);
 		}
+		LOGSYNC_UNLOCK(log, flags);
 	}
 
 	/* write the last buffer. */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 87dd86c34c22..b62a048b6881 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2844,11 +2844,11 @@ diUpdatePMap(struct inode *ipimap,
 	 */
 	lsn = tblk->lsn;
 	log = JFS_SBI(tblk->sb)->log;
+	LOGSYNC_LOCK(log, flags);
 	if (mp->lsn != 0) {
 		/* inherit older/smaller lsn */
 		logdiff(difft, lsn, log);
 		logdiff(diffp, mp->lsn, log);
-		LOGSYNC_LOCK(log, flags);
 		if (difft < diffp) {
 			mp->lsn = lsn;
 			/* move mp after tblock in logsync list */
@@ -2860,17 +2860,15 @@ diUpdatePMap(struct inode *ipimap,
 		logdiff(diffp, mp->clsn, log);
 		if (difft > diffp)
 			mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log, flags);
 	} else {
 		mp->log = log;
 		mp->lsn = lsn;
 		/* insert mp after tblock in logsync list */
-		LOGSYNC_LOCK(log, flags);
 		log->count++;
 		list_add(&mp->synclist, &tblk->synclist);
 		mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log, flags);
 	}
+	LOGSYNC_UNLOCK(log, flags);
 	write_metapage(mp);
 	return (0);
 }
-- 
cgit v1.2.3


From 69eb66d7da7dba2696281981347698e1693c2340 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 9 Mar 2006 13:59:30 -0600
Subject: JFS: add uid, gid, and umask mount options

OS/2 doesn't initialize the uid, gid, or unix-style permission bits.  The
uid, gid, & umask mount options perform pretty much like those for the fat
file system, overriding what is stored on disk.  This is useful for users
sharing the file system with OS/2.

I implemented a little feature so that if you mask the execute bit, it
will be re-enabled on directories when the appropriate read bit is unmasked.
I didn't want to implement an fmask & dmask option.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c        |  3 +++
 fs/jfs/jfs_imap.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 fs/jfs/jfs_incore.h |  5 +++++
 fs/jfs/jfs_inode.c  |  7 +++++++
 fs/jfs/super.c      | 38 ++++++++++++++++++++++++++++++++++----
 5 files changed, 93 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e0b6fdab200c..e2281300979c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -183,6 +183,9 @@ cleanup:
 		posix_acl_release(acl);
 	} else
 		inode->i_mode &= ~current->fs->umask;
+	
+	JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) |
+			       inode->i_mode;
 
 	return rc;
 }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b62a048b6881..ccbe60aff83d 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3074,14 +3074,40 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 
 	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
 	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
 
 	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
+	if (sbi->umask != -1) {
+		ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
+		/* For directories, add x permission if r is allowed by umask */
+		if (S_ISDIR(ip->i_mode)) {
+			if (ip->i_mode & 0400)
+				ip->i_mode |= 0100;
+			if (ip->i_mode & 0040)
+				ip->i_mode |= 0010;
+			if (ip->i_mode & 0004)
+				ip->i_mode |= 0001;
+		}
+	}
 	ip->i_nlink = le32_to_cpu(dip->di_nlink);
-	ip->i_uid = le32_to_cpu(dip->di_uid);
-	ip->i_gid = le32_to_cpu(dip->di_gid);
+
+	jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
+	if (sbi->uid == -1)
+		ip->i_uid = jfs_ip->saved_uid;
+	else {
+		ip->i_uid = sbi->uid;
+	}
+
+	jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
+	if (sbi->gid == -1)
+		ip->i_gid = jfs_ip->saved_gid;
+	else {
+		ip->i_gid = sbi->gid;
+	}
+
 	ip->i_size = le64_to_cpu(dip->di_size);
 	ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
 	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
@@ -3132,21 +3158,33 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 
 	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
-	dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
+	dip->di_inostamp = cpu_to_le32(sbi->inostamp);
 	dip->di_number = cpu_to_le32(ip->i_ino);
 	dip->di_gen = cpu_to_le32(ip->i_generation);
 	dip->di_size = cpu_to_le64(ip->i_size);
 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
-	dip->di_uid = cpu_to_le32(ip->i_uid);
-	dip->di_gid = cpu_to_le32(ip->i_gid);
+	if (sbi->uid == -1)
+		dip->di_uid = cpu_to_le32(ip->i_uid);
+	else
+		dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
+	if (sbi->gid == -1)
+		dip->di_gid = cpu_to_le32(ip->i_gid);
+	else
+		dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
 	/*
 	 * mode2 is only needed for storing the higher order bits.
 	 * Trust i_mode for the lower order ones
 	 */
-	dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode);
+	if (sbi->umask == -1)
+		dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
+					   ip->i_mode);
+	else /* Leave the original permissions alone */
+		dip->di_mode = cpu_to_le32(jfs_ip->mode2);
+
 	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
 	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
 	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index a97ead889a68..54d73716ca8c 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -38,6 +38,8 @@
 struct jfs_inode_info {
 	int	fileset;	/* fileset number (always 16)*/
 	uint	mode2;		/* jfs-specific mode		*/
+	uint	saved_uid;	/* saved for uid mount option */
+	uint	saved_gid;	/* saved for gid mount option */
 	pxd_t   ixpxd;		/* inode extent descriptor	*/
 	dxd_t	acl;		/* dxd describing acl	*/
 	dxd_t	ea;		/* dxd describing ea	*/
@@ -170,6 +172,9 @@ struct jfs_sb_info {
 	uint		state;		/* mount/recovery state	*/
 	unsigned long	flag;		/* mount time flags */
 	uint		p_state;	/* state prior to going no integrity */
+	uint		uid;		/* uid to override on-disk uid */
+	uint		gid;		/* gid to override on-disk gid */
+	uint		umask;		/* umask to override on-disk umask */
 };
 
 /* jfs_sb_info commit_state */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ffd2a8a00787..495df402916d 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -82,6 +82,13 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	} else
 		inode->i_gid = current->fsgid;
 
+	/*
+	 * New inodes need to save sane values on disk when
+	 * uid & gid mount options are used
+	 */
+	jfs_inode->saved_uid = inode->i_uid;
+	jfs_inode->saved_gid = inode->i_gid;
+
 	/*
 	 * Allocate inode to quota.
 	 */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ab6333347004..18f69e6aa719 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -194,7 +194,7 @@ static void jfs_put_super(struct super_block *sb)
 enum {
 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
 	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
-	Opt_usrquota, Opt_grpquota
+	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
 };
 
 static match_table_t tokens = {
@@ -208,6 +208,9 @@ static match_table_t tokens = {
 	{Opt_ignore, "quota"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_grpquota, "grpquota"},
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_umask, "umask=%u"},
 	{Opt_err, NULL}
 };
 
@@ -312,7 +315,29 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			       "JFS: quota operations not supported\n");
 			break;
 #endif
-
+		case Opt_uid:
+		{
+			char *uid = args[0].from;
+			sbi->uid = simple_strtoul(uid, &uid, 0);
+			break;
+		}
+		case Opt_gid:
+		{
+			char *gid = args[0].from;
+			sbi->gid = simple_strtoul(gid, &gid, 0);
+			break;
+		}
+		case Opt_umask:
+		{
+			char *umask = args[0].from;
+			sbi->umask = simple_strtoul(umask, &umask, 8);
+			if (sbi->umask & ~0777) {
+				printk(KERN_ERR
+				       "JFS: Invalid value of umask\n");
+				goto cleanup;
+			}
+			break;
+		}
 		default:
 			printk("jfs: Unrecognized mount option \"%s\" "
 					" or missing value\n", p);
@@ -400,6 +425,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOSPC;
 	sb->s_fs_info = sbi;
 	sbi->sb = sb;
+	sbi->uid = sbi->gid = sbi->umask = -1;
 
 	/* initialize the mount flag and determine the default error handler */
 	flag = JFS_ERR_REMOUNT_RO;
@@ -562,10 +588,14 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
 
+	if (sbi->uid != -1)
+		seq_printf(seq, ",uid=%d", sbi->uid);
+	if (sbi->gid != -1)
+		seq_printf(seq, ",gid=%d", sbi->gid);
+	if (sbi->umask != -1)
+		seq_printf(seq, ",umask=%03o", sbi->umask);
 	if (sbi->flag & JFS_NOINTEGRITY)
 		seq_puts(seq, ",nointegrity");
-	else
-		seq_puts(seq, ",integrity");
 
 #if defined(CONFIG_QUOTA)
 	if (sbi->flag & JFS_USRQUOTA)
-- 
cgit v1.2.3


From 0c9512d74635198d90f349acec19381e446ba2b4 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:02:13 +1100
Subject: [XFS] find_exported_dentry().  XFS does not need to use this symbol
 as it is provided by a vector through the superblock export operations when
 the filesystem is exported by NFS. The fix is to call that vector instead of
 using the exported symbol directly.

SGI-PV: 948858
SGI-Modid: xfs-linux-melb:xfs-kern:25062a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 80eb249f2fa0..821bd12dd858 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -78,7 +78,7 @@ linvfs_decode_fh(
 	}
 
 	fh = (__u32 *)&ifid;
-	return find_exported_dentry(sb, fh, parent, acceptable, context);
+	return sb->s_export_op->find_exported_dentry(sb, fh, parent, acceptable, context);
 }
 
 
-- 
cgit v1.2.3


From 9f4cbecd7e5ee6390fecd6032dc04ca8c9805dc9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:05:30 +1100
Subject: [XFS] XFS propagates MS_NOATIME through two levels internally but
 doesn't actually use it.  Kill this dead code.	Signed-off-by: Christoph
 Hellwig <hch@lst.de>

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25086a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 2 --
 fs/xfs/xfs_clnt.h            | 2 --
 fs/xfs/xfs_mount.h           | 2 --
 fs/xfs/xfs_vfsops.c          | 7 -------
 4 files changed, 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f22e426d9e42..59989f6f83ef 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -76,8 +76,6 @@ xfs_args_allocate(
 	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
 
 	/* Copy the already-parsed mount(2) flags we're interested in */
-	if (sb->s_flags & MS_NOATIME)
-		args->flags |= XFSMNT_NOATIME;
 	if (sb->s_flags & MS_DIRSYNC)
 		args->flags |= XFSMNT_DIRSYNC;
 	if (sb->s_flags & MS_SYNCHRONOUS)
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index f57cc9ac875e..022fff62085b 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -68,8 +68,6 @@ struct xfs_mount_args {
 						 * enforcement */
 #define XFSMNT_PQUOTAENF	0x00000040	/* IRIX project quota limit
 						 * enforcement */
-#define XFSMNT_NOATIME		0x00000100	/* don't modify access
-						 * times on reads */
 #define XFSMNT_NOALIGN		0x00000200	/* don't allocate at
 						 * stripe boundaries*/
 #define XFSMNT_RETERR		0x00000400	/* return error to user */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cd3cf9613a00..4c9817a80435 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -386,8 +386,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
 						   disk errors in metadata */
-#define XFS_MOUNT_NOATIME	(1ULL << 5)	/* don't modify inode access
-						   times on reads */
 #define XFS_MOUNT_RETERR	(1ULL << 6)     /* return alignment errors to
 						   user */
 #define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b6ad370fab3d..2a0a9efb8ccb 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -258,8 +258,6 @@ xfs_start_flags(
 		mp->m_inoadd = XFS_INO64_OFFSET;
 	}
 #endif
-	if (ap->flags & XFSMNT_NOATIME)
-		mp->m_flags |= XFS_MOUNT_NOATIME;
 	if (ap->flags & XFSMNT_RETERR)
 		mp->m_flags |= XFS_MOUNT_RETERR;
 	if (ap->flags & XFSMNT_NOALIGN)
@@ -654,11 +652,6 @@ xfs_mntupdate(
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 	int		error;
 
-	if (args->flags & XFSMNT_NOATIME)
-		mp->m_flags |= XFS_MOUNT_NOATIME;
-	else
-		mp->m_flags &= ~XFS_MOUNT_NOATIME;
-
 	if (args->flags & XFSMNT_BARRIER)
 		mp->m_flags |= XFS_MOUNT_BARRIER;
 	else
-- 
cgit v1.2.3


From 8d280b98cfe3c0b69c37d355218975c1c0279bb0 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:13:09 +1100
Subject: [XFS] On machines with more than 8 cpus, when running parallel I/O
 threads, the incore superblock lock becomes the limiting factor for buffered
 write throughput. Make the contended fields in the incore superblock use
 per-cpu counters so that there is no global lock to limit scalability.

SGI-PV: 946630
SGI-Modid: xfs-linux-melb:xfs-kern:25106a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |   5 +
 fs/xfs/xfs_fsops.c           |   1 +
 fs/xfs/xfs_mount.c           | 560 +++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_mount.h           |  34 +++
 fs/xfs/xfs_vfsops.c          |   3 +-
 5 files changed, 586 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 67389b745526..377a9f54a049 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -100,6 +100,11 @@
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
+#if CONFIG_SMP
+#define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#endif
 
 /*
  * State flag for unwritten extent buffers.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b4d971b01588..56caa88713ab 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,6 +462,7 @@ xfs_fs_counts(
 {
 	unsigned long	s;
 
+	xfs_icsb_sync_counters_lazy(mp);
 	s = XFS_SB_LOCK(mp);
 	cnt->freedata = mp->m_sb.sb_fdblocks;
 	cnt->freertx = mp->m_sb.sb_frextents;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 62188ea392c7..9b43b7b3d760 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,11 +51,31 @@ STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
+
+#ifdef HAVE_PERCPU_SB
+STATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
+STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
+STATIC void	xfs_icsb_sync_counters(xfs_mount_t *);
+STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
+						int, int);
+STATIC int	xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
+						int, int);
+
+#else
+
+#define xfs_icsb_destroy_counters(mp)			do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b)		do { } while (0)
+#define xfs_icsb_sync_counters(mp)			do { } while (0)
+#define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)
+#define xfs_icsb_modify_counters_locked(mp, a, b, c)	do { } while (0)
+
+#endif
+
 static const struct {
-    short offset;
-    short type;     /* 0 = integer
-		* 1 = binary / string (no translation)
-		*/
+	short offset;
+	short type;	/* 0 = integer
+			 * 1 = binary / string (no translation)
+			 */
 } xfs_sb_info[] = {
     { offsetof(xfs_sb_t, sb_magicnum),   0 },
     { offsetof(xfs_sb_t, sb_blocksize),  0 },
@@ -113,7 +133,11 @@ xfs_mount_init(void)
 {
 	xfs_mount_t *mp;
 
-	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
+	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
+
+	if (xfs_icsb_init_counters(mp)) {
+		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+	}
 
 	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
 	spinlock_init(&mp->m_sb_lock, "xfs_sb");
@@ -136,8 +160,8 @@ xfs_mount_init(void)
  */
 void
 xfs_mount_free(
-	xfs_mount_t *mp,
-	int	    remove_bhv)
+	xfs_mount_t	*mp,
+	int		remove_bhv)
 {
 	if (mp->m_ihash)
 		xfs_ihash_free(mp);
@@ -177,6 +201,7 @@ xfs_mount_free(
 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	}
 
+	xfs_icsb_destroy_counters(mp);
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
@@ -527,6 +552,10 @@ xfs_readsb(xfs_mount_t *mp)
 		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 	}
 
+	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+
 	mp->m_sb_bp = bp;
 	xfs_buf_relse(bp);
 	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
@@ -1154,6 +1183,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 	sbp = xfs_getsb(mp, 0);
 	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
 		XFS_FORCED_SHUTDOWN(mp))) {
+
+		xfs_icsb_sync_counters(mp);
+
 		/*
 		 * mark shared-readonly if desired
 		 */
@@ -1227,7 +1259,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
 
 	xfs_trans_log_buf(tp, bp, first, last);
 }
-
 /*
  * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
  * a delta to a specified field in the in-core superblock.  Simply
@@ -1237,7 +1268,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
  *
  * The SB_LOCK must be held when this routine is called.
  */
-STATIC int
+int
 xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
 			int delta, int rsvd)
 {
@@ -1406,9 +1437,26 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
 	unsigned long	s;
 	int	status;
 
-	s = XFS_SB_LOCK(mp);
-	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-	XFS_SB_UNLOCK(mp, s);
+	/* check for per-cpu counters */
+	switch (field) {
+#ifdef HAVE_PERCPU_SB
+	case XFS_SBS_ICOUNT:
+	case XFS_SBS_IFREE:
+	case XFS_SBS_FDBLOCKS:
+		if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+			status = xfs_icsb_modify_counters(mp, field,
+							delta, rsvd);
+			break;
+		}
+		/* FALLTHROUGH */
+#endif
+	default:
+		s = XFS_SB_LOCK(mp);
+		status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+		XFS_SB_UNLOCK(mp, s);
+		break;
+	}
+
 	return status;
 }
 
@@ -1445,8 +1493,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 		 * from the loop so we'll fall into the undo loop
 		 * below.
 		 */
-		status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-						    msbp->msb_delta, rsvd);
+		switch (msbp->msb_field) {
+#ifdef HAVE_PERCPU_SB
+		case XFS_SBS_ICOUNT:
+		case XFS_SBS_IFREE:
+		case XFS_SBS_FDBLOCKS:
+			if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+				status = xfs_icsb_modify_counters_locked(mp,
+							msbp->msb_field,
+							msbp->msb_delta, rsvd);
+				break;
+			}
+			/* FALLTHROUGH */
+#endif
+		default:
+			status = xfs_mod_incore_sb_unlocked(mp,
+						msbp->msb_field,
+						msbp->msb_delta, rsvd);
+			break;
+		}
+
 		if (status != 0) {
 			break;
 		}
@@ -1463,8 +1529,28 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 	if (status != 0) {
 		msbp--;
 		while (msbp >= msb) {
-			status = xfs_mod_incore_sb_unlocked(mp,
-				    msbp->msb_field, -(msbp->msb_delta), rsvd);
+			switch (msbp->msb_field) {
+#ifdef HAVE_PERCPU_SB
+			case XFS_SBS_ICOUNT:
+			case XFS_SBS_IFREE:
+			case XFS_SBS_FDBLOCKS:
+				if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+					status =
+					    xfs_icsb_modify_counters_locked(mp,
+							msbp->msb_field,
+							-(msbp->msb_delta),
+							rsvd);
+					break;
+				}
+				/* FALLTHROUGH */
+#endif
+			default:
+				status = xfs_mod_incore_sb_unlocked(mp,
+							msbp->msb_field,
+							-(msbp->msb_delta),
+							rsvd);
+				break;
+			}
 			ASSERT(status == 0);
 			msbp--;
 		}
@@ -1577,3 +1663,445 @@ xfs_mount_log_sbunit(
 	xfs_mod_sb(tp, fields);
 	xfs_trans_commit(tp, 0, NULL);
 }
+
+
+#ifdef HAVE_PERCPU_SB
+/*
+ * Per-cpu incore superblock counters
+ *
+ * Simple concept, difficult implementation
+ *
+ * Basically, replace the incore superblock counters with a distributed per cpu
+ * counter for contended fields (e.g.  free block count).
+ *
+ * Difficulties arise in that the incore sb is used for ENOSPC checking, and
+ * hence needs to be accurately read when we are running low on space. Hence
+ * there is a method to enable and disable the per-cpu counters based on how
+ * much "stuff" is available in them.
+ *
+ * Basically, a counter is enabled if there is enough free resource to justify
+ * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
+ * ENOSPC), then we disable the counters to synchronise all callers and
+ * re-distribute the available resources.
+ *
+ * If, once we redistributed the available resources, we still get a failure,
+ * we disable the per-cpu counter and go through the slow path.
+ *
+ * The slow path is the current xfs_mod_incore_sb() function.  This means that
+ * when we disable a per-cpu counter, we need to drain it's resources back to
+ * the global superblock. We do this after disabling the counter to prevent
+ * more threads from queueing up on the counter.
+ *
+ * Essentially, this means that we still need a lock in the fast path to enable
+ * synchronisation between the global counters and the per-cpu counters. This
+ * is not a problem because the lock will be local to a CPU almost all the time
+ * and have little contention except when we get to ENOSPC conditions.
+ *
+ * Basically, this lock becomes a barrier that enables us to lock out the fast
+ * path while we do things like enabling and disabling counters and
+ * synchronising the counters.
+ *
+ * Locking rules:
+ *
+ * 	1. XFS_SB_LOCK() before picking up per-cpu locks
+ * 	2. per-cpu locks always picked up via for_each_online_cpu() order
+ * 	3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
+ * 	4. modifying per-cpu counters requires holding per-cpu lock
+ * 	5. modifying global counters requires holding XFS_SB_LOCK
+ *	6. enabling or disabling a counter requires holding the XFS_SB_LOCK
+ *	   and _none_ of the per-cpu locks.
+ *
+ * Disabled counters are only ever re-enabled by a balance operation
+ * that results in more free resources per CPU than a given threshold.
+ * To ensure counters don't remain disabled, they are rebalanced when
+ * the global resource goes above a higher threshold (i.e. some hysteresis
+ * is present to prevent thrashing).
+ *
+ * Note: hotplug CPUs not yet supported
+ */
+int
+xfs_icsb_init_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
+	if (mp->m_sb_cnts == NULL)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_lock_init(&cntp->icsb_lock);
+	}
+	/*
+	 * start with all counters disabled so that the
+	 * initial balance kicks us off correctly
+	 */
+	mp->m_icsb_counters = -1;
+	return 0;
+}
+
+STATIC void
+xfs_icsb_destroy_counters(
+	xfs_mount_t	*mp)
+{
+	if (mp->m_sb_cnts)
+		free_percpu(mp->m_sb_cnts);
+}
+
+
+STATIC inline void
+xfs_icsb_lock_all_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_lock(&cntp->icsb_lock);
+	}
+}
+
+STATIC inline void
+xfs_icsb_unlock_all_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_unlock(&cntp->icsb_lock);
+	}
+}
+
+STATIC void
+xfs_icsb_count(
+	xfs_mount_t	*mp,
+	xfs_icsb_cnts_t	*cnt,
+	int		flags)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
+
+	if (!(flags & XFS_ICSB_LAZY_COUNT))
+		xfs_icsb_lock_all_counters(mp);
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		cnt->icsb_icount += cntp->icsb_icount;
+		cnt->icsb_ifree += cntp->icsb_ifree;
+		cnt->icsb_fdblocks += cntp->icsb_fdblocks;
+	}
+
+	if (!(flags & XFS_ICSB_LAZY_COUNT))
+		xfs_icsb_unlock_all_counters(mp);
+}
+
+STATIC int
+xfs_icsb_counter_disabled(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field)
+{
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+	return test_bit(field, &mp->m_icsb_counters);
+}
+
+STATIC int
+xfs_icsb_disable_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field)
+{
+	xfs_icsb_cnts_t	cnt;
+
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+
+	xfs_icsb_lock_all_counters(mp);
+	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
+		/* drain back to superblock */
+
+		xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
+		switch(field) {
+		case XFS_SBS_ICOUNT:
+			mp->m_sb.sb_icount = cnt.icsb_icount;
+			break;
+		case XFS_SBS_IFREE:
+			mp->m_sb.sb_ifree = cnt.icsb_ifree;
+			break;
+		case XFS_SBS_FDBLOCKS:
+			mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	xfs_icsb_unlock_all_counters(mp);
+
+	return 0;
+}
+
+STATIC void
+xfs_icsb_enable_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	uint64_t	count,
+	uint64_t	resid)
+{
+	xfs_icsb_cnts_t	*cntp;
+	int		i;
+
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+
+	xfs_icsb_lock_all_counters(mp);
+	for_each_online_cpu(i) {
+		cntp = per_cpu_ptr(mp->m_sb_cnts, i);
+		switch (field) {
+		case XFS_SBS_ICOUNT:
+			cntp->icsb_icount = count + resid;
+			break;
+		case XFS_SBS_IFREE:
+			cntp->icsb_ifree = count + resid;
+			break;
+		case XFS_SBS_FDBLOCKS:
+			cntp->icsb_fdblocks = count + resid;
+			break;
+		default:
+			BUG();
+			break;
+		}
+		resid = 0;
+	}
+	clear_bit(field, &mp->m_icsb_counters);
+	xfs_icsb_unlock_all_counters(mp);
+}
+
+STATIC void
+xfs_icsb_sync_counters_int(
+	xfs_mount_t	*mp,
+	int		flags)
+{
+	xfs_icsb_cnts_t	cnt;
+	int		s;
+
+	/* Pass 1: lock all counters */
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		s = XFS_SB_LOCK(mp);
+
+	xfs_icsb_count(mp, &cnt, flags);
+
+	/* Step 3: update mp->m_sb fields */
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
+		mp->m_sb.sb_icount = cnt.icsb_icount;
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
+		mp->m_sb.sb_ifree = cnt.icsb_ifree;
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
+		mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
+
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		XFS_SB_UNLOCK(mp, s);
+}
+
+/*
+ * Accurate update of per-cpu counters to incore superblock
+ */
+STATIC void
+xfs_icsb_sync_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_sync_counters_int(mp, 0);
+}
+
+/*
+ * lazy addition used for things like df, background sb syncs, etc
+ */
+void
+xfs_icsb_sync_counters_lazy(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
+}
+
+/*
+ * Balance and enable/disable counters as necessary.
+ *
+ * Thresholds for re-enabling counters are somewhat magic.
+ * inode counts are chosen to be the same number as single
+ * on disk allocation chunk per CPU, and free blocks is
+ * something far enough zero that we aren't going thrash
+ * when we get near ENOSPC.
+ */
+#define XFS_ICSB_INO_CNTR_REENABLE	64
+#define XFS_ICSB_FDBLK_CNTR_REENABLE	512
+STATIC void
+xfs_icsb_balance_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t  field,
+	int		flags)
+{
+	uint64_t	count, resid = 0;
+	int		weight = num_online_cpus();
+	int		s;
+
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		s = XFS_SB_LOCK(mp);
+
+	/* disable counter and sync counter */
+	xfs_icsb_disable_counter(mp, field);
+
+	/* update counters  - first CPU gets residual*/
+	switch (field) {
+	case XFS_SBS_ICOUNT:
+		count = mp->m_sb.sb_icount;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_INO_CNTR_REENABLE)
+			goto out;
+		break;
+	case XFS_SBS_IFREE:
+		count = mp->m_sb.sb_ifree;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_INO_CNTR_REENABLE)
+			goto out;
+		break;
+	case XFS_SBS_FDBLOCKS:
+		count = mp->m_sb.sb_fdblocks;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
+			goto out;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	xfs_icsb_enable_counter(mp, field, count, resid);
+out:
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		XFS_SB_UNLOCK(mp, s);
+}
+
+STATIC int
+xfs_icsb_modify_counters_int(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd,
+	int		flags)
+{
+	xfs_icsb_cnts_t	*icsbp;
+	long long	lcounter;	/* long counter for 64 bit fields */
+	int		cpu, s, locked = 0;
+	int		ret = 0, balance_done = 0;
+
+again:
+	cpu = get_cpu();
+	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
+	spin_lock(&icsbp->icsb_lock);
+	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
+		goto slow_path;
+
+	switch (field) {
+	case XFS_SBS_ICOUNT:
+		lcounter = icsbp->icsb_icount;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_icount = lcounter;
+		break;
+
+	case XFS_SBS_IFREE:
+		lcounter = icsbp->icsb_ifree;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_ifree = lcounter;
+		break;
+
+	case XFS_SBS_FDBLOCKS:
+		BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
+
+		lcounter = icsbp->icsb_fdblocks;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_fdblocks = lcounter;
+		break;
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock(&icsbp->icsb_lock);
+	put_cpu();
+	if (locked)
+		XFS_SB_UNLOCK(mp, s);
+	return 0;
+
+	/*
+	 * The slow path needs to be run with the SBLOCK
+	 * held so that we prevent other threads from
+	 * attempting to run this path at the same time.
+	 * this provides exclusion for the balancing code,
+	 * and exclusive fallback if the balance does not
+	 * provide enough resources to continue in an unlocked
+	 * manner.
+	 */
+slow_path:
+	spin_unlock(&icsbp->icsb_lock);
+	put_cpu();
+
+	/* need to hold superblock incase we need
+	 * to disable a counter */
+	if (!(flags & XFS_ICSB_SB_LOCKED)) {
+		s = XFS_SB_LOCK(mp);
+		locked = 1;
+		flags |= XFS_ICSB_SB_LOCKED;
+	}
+	if (!balance_done) {
+		xfs_icsb_balance_counter(mp, field, flags);
+		balance_done = 1;
+		goto again;
+	} else {
+		/*
+		 * we might not have enough on this local
+		 * cpu to allocate for a bulk request.
+		 * We need to drain this field from all CPUs
+		 * and disable the counter fastpath
+		 */
+		xfs_icsb_disable_counter(mp, field);
+	}
+
+	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+
+	if (locked)
+		XFS_SB_UNLOCK(mp, s);
+	return ret;
+}
+
+STATIC int
+xfs_icsb_modify_counters(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd)
+{
+	return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
+}
+
+/*
+ * Called when superblock is already locked
+ */
+STATIC int
+xfs_icsb_modify_counters_locked(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd)
+{
+	return xfs_icsb_modify_counters_int(mp, field, delta,
+						rsvd, XFS_ICSB_SB_LOCKED);
+}
+#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4c9817a80435..7cca5110ca44 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -267,6 +267,32 @@ typedef struct xfs_ioops {
 #define XFS_IODONE(vfsp) \
 	(*(mp)->m_io_ops.xfs_iodone)(vfsp)
 
+#ifdef HAVE_PERCPU_SB
+
+/*
+ * Valid per-cpu incore superblock counters. Note that if you add new counters,
+ * you may need to define new counter disabled bit field descriptors as there
+ * are more possible fields in the superblock that can fit in a bitfield on a
+ * 32 bit platform. The XFS_SBS_* values for the current current counters just
+ * fit.
+ */
+typedef struct xfs_icsb_cnts {
+	uint64_t	icsb_fdblocks;
+	uint64_t	icsb_ifree;
+	uint64_t	icsb_icount;
+	spinlock_t	icsb_lock;
+} xfs_icsb_cnts_t;
+
+#define XFS_ICSB_SB_LOCKED	(1 << 0)	/* sb already locked */
+#define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
+
+extern int	xfs_icsb_init_counters(struct xfs_mount *);
+extern void	xfs_icsb_sync_counters_lazy(struct xfs_mount *);
+
+#else
+#define xfs_icsb_init_counters(mp)	(0)
+#define xfs_icsb_sync_counters_lazy(mp)	do { } while (0)
+#endif
 
 typedef struct xfs_mount {
 	bhv_desc_t		m_bhv;		/* vfs xfs behavior */
@@ -372,6 +398,10 @@ typedef struct xfs_mount {
 	struct xfs_qmops	m_qm_ops;	/* vector of XQM ops */
 	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
 	atomic_t		m_active_trans;	/* number trans frozen */
+#ifdef HAVE_PERCPU_SB
+	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
+	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
+#endif
 } xfs_mount_t;
 
 /*
@@ -409,6 +439,8 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
 #define XFS_MOUNT_COMPAT_IOSIZE	(1ULL << 22)	/* don't report large preferred
 						 * I/O size in stat() */
+#define XFS_MOUNT_NO_PERCPU_SB	(1ULL << 23)	/* don't use per-cpu superblock
+						   counters */
 
 
 /*
@@ -546,6 +578,8 @@ extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
 extern int	xfs_unmountfs_writesb(xfs_mount_t *);
 extern int	xfs_unmount_flush(xfs_mount_t *, int);
 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+extern int	xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
+			int, int);
 extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
 			uint, int);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 2a0a9efb8ccb..2e1045837881 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -55,7 +55,7 @@
 #include "xfs_clnt.h"
 #include "xfs_fsops.h"
 
-STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
+STATIC int	xfs_sync(bhv_desc_t *, int, cred_t *);
 
 int
 xfs_init(void)
@@ -807,6 +807,7 @@ xfs_statvfs(
 
 	statp->f_type = XFS_SB_MAGIC;
 
+	xfs_icsb_sync_counters_lazy(mp);
 	s = XFS_SB_LOCK(mp);
 	statp->f_bsize = sbp->sb_blocksize;
 	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-- 
cgit v1.2.3


From 8758280fcc6129be89503efe93bb59eaf2f85d28 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:18:19 +1100
Subject: [XFS] Cleanup the use of zones/slabs, more consistent and allows
 flags to be passed.

SGI-PV: 949073
SGI-Modid: xfs-linux-melb:xfs-kern:25122a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/kmem.h      | 91 ++++++++++++++++++++++++++------------------
 fs/xfs/linux-2.6/xfs_buf.c   |  7 ++--
 fs/xfs/linux-2.6/xfs_super.c | 38 +++++++++---------
 fs/xfs/xfs_trans.h           |  2 +-
 fs/xfs/xfs_vfsops.c          | 34 ++++++++++++-----
 5 files changed, 100 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index c64a29cdfff3..f0268a84e6fd 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -23,17 +23,8 @@
 #include <linux/mm.h>
 
 /*
- * memory management routines
+ * Process flags handling
  */
-#define KM_SLEEP	0x0001u
-#define KM_NOSLEEP	0x0002u
-#define KM_NOFS		0x0004u
-#define KM_MAYFAIL	0x0008u
-
-#define	kmem_zone	kmem_cache
-#define kmem_zone_t	struct kmem_cache
-
-typedef unsigned long xfs_pflags_t;
 
 #define PFLAGS_TEST_NOIO()              (current->flags & PF_NOIO)
 #define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
@@ -67,74 +58,102 @@ typedef unsigned long xfs_pflags_t;
 	*(NSTATEP) = *(OSTATEP);	\
 } while (0)
 
-static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP	0x0001u
+#define KM_NOSLEEP	0x0002u
+#define KM_NOFS		0x0004u
+#define KM_MAYFAIL	0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
 {
-	gfp_t	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
+	gfp_t	lflags;
 
-#ifdef DEBUG
-	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
-		printk(KERN_WARNING
-		    "XFS: memory allocation with wrong flags (%x)\n", flags);
-		BUG();
-	}
-#endif
+	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
 
 	if (flags & KM_NOSLEEP) {
-		lflags |= GFP_ATOMIC;
+		lflags = GFP_ATOMIC | __GFP_NOWARN;
 	} else {
-		lflags |= GFP_KERNEL;
-
-		/* avoid recusive callbacks to filesystem during transactions */
+		lflags = GFP_KERNEL | __GFP_NOWARN;
 		if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
 			lflags &= ~__GFP_FS;
 	}
-
-        return lflags;
+	return lflags;
 }
 
-static __inline kmem_zone_t *
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void  kmem_free(void *, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD	0
+
+#define kmem_zone	kmem_cache
+#define kmem_zone_t	struct kmem_cache
+
+static inline kmem_zone_t *
 kmem_zone_init(int size, char *zone_name)
 {
 	return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
 }
 
-static __inline void
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+		     void (*construct)(void *, kmem_zone_t *, unsigned long))
+{
+	return kmem_cache_create(zone_name, size, 0, flags, construct, NULL);
+}
+
+static inline void
 kmem_zone_free(kmem_zone_t *zone, void *ptr)
 {
 	kmem_cache_free(zone, ptr);
 }
 
-static __inline void
+static inline void
 kmem_zone_destroy(kmem_zone_t *zone)
 {
 	if (zone && kmem_cache_destroy(zone))
 		BUG();
 }
 
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void  kmem_free(void *, size_t);
+/*
+ * Low memory cache shrinkers
+ */
 
 typedef struct shrinker *kmem_shaker_t;
 typedef int (*kmem_shake_func_t)(int, gfp_t);
 
-static __inline kmem_shaker_t
+static inline kmem_shaker_t
 kmem_shake_register(kmem_shake_func_t sfunc)
 {
 	return set_shrinker(DEFAULT_SEEKS, sfunc);
 }
 
-static __inline void
+static inline void
 kmem_shake_deregister(kmem_shaker_t shrinker)
 {
 	remove_shrinker(shrinker);
 }
 
-static __inline int
+static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
 	return (gfp_mask & __GFP_WAIT);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bfb4f2917bb6..cdb905ab4dba 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1805,13 +1805,12 @@ xfs_flush_buftarg(
 int __init
 xfs_buf_init(void)
 {
-	int		error = -ENOMEM;
-
 #ifdef XFS_BUF_TRACE
 	xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
 #endif
 
-	xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+	xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+						KM_ZONE_HWALIGN, NULL);
 	if (!xfs_buf_zone)
 		goto out_free_trace_buf;
 
@@ -1839,7 +1838,7 @@ xfs_buf_init(void)
 #ifdef XFS_BUF_TRACE
 	ktrace_free(xfs_buf_trace_buf);
 #endif
-	return error;
+	return -ENOMEM;
 }
 
 void
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 59989f6f83ef..0c7ed4b29c54 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -337,8 +337,8 @@ linvfs_alloc_inode(
 {
 	vnode_t			*vp;
 
-	vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
-	if (!vp)
+	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
+	if (unlikely(!vp))
 		return NULL;
 	return LINVFS_GET_IP(vp);
 }
@@ -352,23 +352,21 @@ linvfs_destroy_inode(
 
 STATIC void
 linvfs_inode_init_once(
-	void			*data,
-	kmem_cache_t		*cachep,
+	void			*vnode,
+	kmem_zone_t		*zonep,
 	unsigned long		flags)
 {
-	vnode_t			*vp = (vnode_t *)data;
-
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(LINVFS_GET_IP(vp));
+		      SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(LINVFS_GET_IP((vnode_t *)vnode));
 }
 
 STATIC int
-linvfs_init_zones(void)
+xfs_init_zones(void)
 {
-	xfs_vnode_zone = kmem_cache_create("xfs_vnode",
-				sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
-				linvfs_inode_init_once, NULL);
+	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM,
+					linvfs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
 
@@ -377,14 +375,12 @@ linvfs_init_zones(void)
 		goto out_destroy_vnode_zone;
 
 	xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
-			mempool_alloc_slab, mempool_free_slab,
-			xfs_ioend_zone);
+					mempool_alloc_slab, mempool_free_slab,
+					xfs_ioend_zone);
 	if (!xfs_ioend_pool)
 		goto out_free_ioend_zone;
-
 	return 0;
 
-
  out_free_ioend_zone:
 	kmem_zone_destroy(xfs_ioend_zone);
  out_destroy_vnode_zone:
@@ -394,7 +390,7 @@ linvfs_init_zones(void)
 }
 
 STATIC void
-linvfs_destroy_zones(void)
+xfs_destroy_zones(void)
 {
 	mempool_destroy(xfs_ioend_pool);
 	kmem_zone_destroy(xfs_vnode_zone);
@@ -405,7 +401,7 @@ linvfs_destroy_zones(void)
  * Attempt to flush the inode, this will actually fail
  * if the inode is pinned, but we dirty the inode again
  * at the point when it is unpinned after a log write,
- * since this is when the inode itself becomes flushable. 
+ * since this is when the inode itself becomes flushable.
  */
 STATIC int
 linvfs_write_inode(
@@ -963,7 +959,7 @@ init_xfs_fs( void )
 
 	ktrace_init(64);
 
-	error = linvfs_init_zones();
+	error = xfs_init_zones();
 	if (error < 0)
 		goto undo_zones;
 
@@ -986,7 +982,7 @@ undo_register:
 	xfs_buf_terminate();
 
 undo_buffers:
-	linvfs_destroy_zones();
+	xfs_destroy_zones();
 
 undo_zones:
 	return error;
@@ -1000,7 +996,7 @@ exit_xfs_fs( void )
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
 	xfs_buf_terminate();
-	linvfs_destroy_zones();
+	xfs_destroy_zones();
 	ktrace_uninit();
 }
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index d77901c07f63..e48befa4e337 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -380,7 +380,7 @@ typedef struct xfs_trans {
 	xfs_trans_header_t	t_header;	/* header for in-log trans */
 	unsigned int		t_busy_free;	/* busy descs free */
 	xfs_log_busy_chunk_t	t_busy;		/* busy/async free blocks */
-        xfs_pflags_t            t_pflags;       /* saved pflags state */
+	unsigned long		t_pflags;	/* saved process flags state */
 } xfs_trans_t;
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 2e1045837881..5dd84fe609cc 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -77,11 +77,12 @@ xfs_init(void)
 						 "xfs_bmap_free_item");
 	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
 					    "xfs_btree_cur");
-	xfs_inode_zone = kmem_zone_init(sizeof(xfs_inode_t), "xfs_inode");
 	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
 	xfs_da_state_zone =
 		kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
 	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
 
 	/*
 	 * The size of the zone allocated buf log item is the maximum
@@ -93,17 +94,30 @@ xfs_init(void)
 				(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
 				  NBWORD) * sizeof(int))),
 			       "xfs_buf_item");
-	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-				       ((XFS_EFD_MAX_FAST_EXTENTS - 1) * sizeof(xfs_extent_t))),
+	xfs_efd_zone =
+		kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+			       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))),
 				      "xfs_efd_item");
-	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-				       ((XFS_EFI_MAX_FAST_EXTENTS - 1) * sizeof(xfs_extent_t))),
+	xfs_efi_zone =
+		kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+			       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))),
 				      "xfs_efi_item");
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-	xfs_ili_zone = kmem_zone_init(sizeof(xfs_inode_log_item_t), "xfs_ili");
-	xfs_chashlist_zone = kmem_zone_init(sizeof(xfs_chashlist_t),
-					    "xfs_chashlist");
-	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
+
+	/*
+	 * These zones warrant special memory allocator hints
+	 */
+	xfs_inode_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD, NULL);
+	xfs_ili_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+					KM_ZONE_SPREAD, NULL);
+	xfs_chashlist_zone =
+		kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist",
+					KM_ZONE_SPREAD, NULL);
 
 	/*
 	 * Allocate global trace buffers.
-- 
cgit v1.2.3


From 20722a91921bd9e9d4ba5c8f801d143e72e58418 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:19:08 +1100
Subject: [XFS] Fix a mutex_destroy diagnostic about a locked-mutex-on-destroy
 from quota code.

SGI-PV: 949149
SGI-Modid: xfs-linux-melb:xfs-kern:25123a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7c0e39dc6189..fd4abb8a32c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -2788,9 +2788,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 		xfs_qm_dqdestroy(dqp);
 		dqp = nextdqp;
 	}
-	/*
-	 * Don't bother about unlocking.
-	 */
+	mutex_unlock(&ql->qh_lock);
 	mutex_destroy(&ql->qh_lock);
 
 	ASSERT(ql->qh_nelems == 0);
-- 
cgit v1.2.3


From e0cc2325d151c3f4f3276b2deda734faf742146f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:19:55 +1100
Subject: [XFS] Flag the XFS inode cache as in need of spreading also.

SGI-PV: 949073
SGI-Modid: xfs-linux-melb:xfs-kern:25170a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0c7ed4b29c54..4d8613f65c2c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -365,7 +365,8 @@ STATIC int
 xfs_init_zones(void)
 {
 	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
-					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM,
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD,
 					linvfs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
-- 
cgit v1.2.3


From d2c32edf64a7e9bc8dfb5cb3a8f7bf7be94c93ae Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:20:13 +1100
Subject: [XFS] When compiling with gcc 4.0 and CONFIG_SMP unset, there are
 many warnings along the lines: xfs_linux.h:103:5: warning: "CONFIG_SMP" is
 not defined.

SGI-PV: 946630
SGI-Modid: xfs-linux-melb:xfs-kern:25171a

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 377a9f54a049..e2be64982bc1 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -100,7 +100,7 @@
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
-#if CONFIG_SMP
+#ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
 #else
 #undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
-- 
cgit v1.2.3


From 2d0f864be3266eb0a8b4b48f36e2f777eace00b3 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:20:33 +1100
Subject: [XFS] Make headers compile for more compiler variants; minor cleanup.

SGI-PV: 949432
SGI-Modid: xfs-linux-melb:xfs-kern:25184a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2.h   | 27 +++++++++++----------------
 fs/xfs/xfs_dir_sf.h | 24 +++++++++++++-----------
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 3158f5dc431f..7dd364b1e038 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -55,16 +55,16 @@ typedef	__uint32_t	xfs_dir2_db_t;
 /*
  * Byte offset in a directory.
  */
-typedef	xfs_off_t		xfs_dir2_off_t;
+typedef	xfs_off_t	xfs_dir2_off_t;
 
 /*
  * For getdents, argument struct for put routines.
  */
 typedef int (*xfs_dir2_put_t)(struct xfs_dir2_put_args *pa);
 typedef struct xfs_dir2_put_args {
-	xfs_off_t		cook;		/* cookie of (next) entry */
+	xfs_off_t	cook;		/* cookie of (next) entry */
 	xfs_intino_t	ino;		/* inode number */
-	struct xfs_dirent	*dbp;		/* buffer pointer */
+	xfs_dirent_t	*dbp;		/* buffer pointer */
 	char		*name;		/* directory entry name */
 	int		namelen;	/* length of name */
 	int		done;		/* output: set if value was stored */
@@ -75,18 +75,13 @@ typedef struct xfs_dir2_put_args {
 /*
  * Other interfaces used by the rest of the dir v2 code.
  */
-extern int
-	xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
-			    xfs_dir2_db_t *dbp);
-
-extern int
-	xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
-
-extern int
-	xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
-
-extern int
-	xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-			      struct xfs_dabuf *bp);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+				xfs_dir2_db_t *dbp);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+				struct xfs_dabuf *bp);
 
 #endif	/* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
index fe44c6f4d560..5b20b4d3f57d 100644
--- a/fs/xfs/xfs_dir_sf.h
+++ b/fs/xfs/xfs_dir_sf.h
@@ -35,19 +35,21 @@ typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
  * and the elements much be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
+typedef struct xfs_dir_sf_hdr {		/* constant-structure header block */
+	xfs_dir_ino_t	parent;		/* parent dir inode number */
+	__uint8_t	count;		/* count of active entries */
+} xfs_dir_sf_hdr_t;
+
+typedef struct xfs_dir_sf_entry {
+	xfs_dir_ino_t	inumber;	/* referenced inode number */
+	__uint8_t	namelen;	/* actual length of name (no NULL) */
+	__uint8_t	name[1];	/* name */
+} xfs_dir_sf_entry_t;
+
 typedef struct xfs_dir_shortform {
-	struct xfs_dir_sf_hdr {		/* constant-structure header block */
-		xfs_dir_ino_t parent;	/* parent dir inode number */
-		__uint8_t count;	/* count of active entries */
-	} hdr;
-	struct xfs_dir_sf_entry {
-		xfs_dir_ino_t inumber;	/* referenced inode number */
-		__uint8_t namelen;	/* actual length of name (no NULL) */
-		__uint8_t name[1];	/* name */
-	} list[1];			/* variable sized array */
+	xfs_dir_sf_hdr_t	hdr;
+	xfs_dir_sf_entry_t	list[1];	/* variable sized array */
 } xfs_dir_shortform_t;
-typedef struct xfs_dir_sf_hdr xfs_dir_sf_hdr_t;
-typedef struct xfs_dir_sf_entry xfs_dir_sf_entry_t;
 
 /*
  * We generate this then sort it, so that readdirs are returned in
-- 
cgit v1.2.3


From e8234a6871aa0de1ed0aeeecb5230ecf3ab414e2 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:23:52 +1100
Subject: [XFS] Add support for hotplug CPUs to the per-CPU superblock counters
 by registering a notifier callback that listens to CPU up/down events to
 modify the counters appropriately.

SGI-PV: 949726
SGI-Modid: xfs-linux-melb:xfs-kern:25214a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |  2 ++
 fs/xfs/xfs_mount.c           | 74 ++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_mount.h           |  1 +
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e2be64982bc1..9fdc14cffb70 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -73,6 +73,8 @@
 #include <linux/list.h>
 #include <linux/proc_fs.h>
 #include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9b43b7b3d760..a64110b9023b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -60,6 +60,7 @@ STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
 						int, int);
 STATIC int	xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
 						int, int);
+STATIC int	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 
 #else
 
@@ -1716,9 +1717,72 @@ xfs_mount_log_sbunit(
  * To ensure counters don't remain disabled, they are rebalanced when
  * the global resource goes above a higher threshold (i.e. some hysteresis
  * is present to prevent thrashing).
+ */
+
+/*
+ * hot-plug CPU notifier support.
  *
- * Note: hotplug CPUs not yet supported
+ * We cannot use the hotcpu_register() function because it does
+ * not allow notifier instances. We need a notifier per filesystem
+ * as we need to be able to identify the filesystem to balance
+ * the counters out. This is acheived by having a notifier block
+ * embedded in the xfs_mount_t and doing pointer magic to get the
+ * mount pointer from the notifier block address.
  */
+STATIC int
+xfs_icsb_cpu_notify(
+	struct notifier_block *nfb,
+	unsigned long action,
+	void *hcpu)
+{
+	xfs_icsb_cnts_t *cntp;
+	xfs_mount_t	*mp;
+	int		s;
+
+	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
+	cntp = (xfs_icsb_cnts_t *)
+			per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
+	switch (action) {
+	case CPU_UP_PREPARE:
+		/* Easy Case - initialize the area and locks, and
+		 * then rebalance when online does everything else for us. */
+		spin_lock_init(&cntp->icsb_lock);
+		cntp->icsb_icount = 0;
+		cntp->icsb_ifree = 0;
+		cntp->icsb_fdblocks = 0;
+		break;
+	case CPU_ONLINE:
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+		break;
+	case CPU_DEAD:
+		/* Disable all the counters, then fold the dead cpu's
+		 * count into the total on the global superblock and
+		 * re-enable the counters. */
+		s = XFS_SB_LOCK(mp);
+		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
+		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
+		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
+
+		mp->m_sb.sb_icount += cntp->icsb_icount;
+		mp->m_sb.sb_ifree += cntp->icsb_ifree;
+		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
+
+		cntp->icsb_icount = 0;
+		cntp->icsb_ifree = 0;
+		cntp->icsb_fdblocks = 0;
+
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
+		XFS_SB_UNLOCK(mp, s);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 int
 xfs_icsb_init_counters(
 	xfs_mount_t	*mp)
@@ -1730,6 +1794,10 @@ xfs_icsb_init_counters(
 	if (mp->m_sb_cnts == NULL)
 		return -ENOMEM;
 
+	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
+	mp->m_icsb_notifier.priority = 0;
+	register_cpu_notifier(&mp->m_icsb_notifier);
+
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
 		spin_lock_init(&cntp->icsb_lock);
@@ -1746,8 +1814,10 @@ STATIC void
 xfs_icsb_destroy_counters(
 	xfs_mount_t	*mp)
 {
-	if (mp->m_sb_cnts)
+	if (mp->m_sb_cnts) {
+		unregister_cpu_notifier(&mp->m_icsb_notifier);
 		free_percpu(mp->m_sb_cnts);
+	}
 }
 
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7cca5110ca44..9d2ffbdc37a9 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -401,6 +401,7 @@ typedef struct xfs_mount {
 #ifdef HAVE_PERCPU_SB
 	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
 	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
+	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 #endif
 } xfs_mount_t;
 
-- 
cgit v1.2.3


From a780143ea53d26362b7cfb6666c8d04fb989bb7a Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:24:46 +1100
Subject: [XFS] UUID endianess fix.  uu_timelow is a 32bit field and needs to
 be swapped with be32_to_cpu.

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25232a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/support/uuid.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index a3d565a67734..e157015c70ff 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -21,13 +21,6 @@ static mutex_t	uuid_monitor;
 static int	uuid_table_size;
 static uuid_t	*uuid_table;
 
-void
-uuid_init(void)
-{
-	mutex_init(&uuid_monitor);
-}
-
-
 /* IRIX interpretation of an uuid_t */
 typedef struct {
 	__be32	uu_timelow;
@@ -50,7 +43,7 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 
 	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
 		   be16_to_cpu(uup->uu_timemid);
-	fsid[1] = be16_to_cpu(uup->uu_timelow);
+	fsid[1] = be32_to_cpu(uup->uu_timelow);
 }
 
 void
@@ -139,3 +132,9 @@ uuid_table_remove(uuid_t *uuid)
 	ASSERT(i < uuid_table_size);
 	mutex_unlock(&uuid_monitor);
 }
+
+void
+uuid_init(void)
+{
+	mutex_init(&uuid_monitor);
+}
-- 
cgit v1.2.3


From fcce0f1f9ae8d49fd27d418428034a505816d395 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Tue, 14 Mar 2006 13:25:02 +1100
Subject: [XFS] forgot a couple of calls to XLOG_VEC_SET_TYPE when porting from
 irix to linux.

SGI-PV: 931456
SGI-Modid: xfs-linux-melb:xfs-kern:25238a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot_item.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2ec6b441849c..e4e5f05b841b 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -79,9 +79,11 @@ xfs_qm_dquot_logitem_format(
 
 	logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
 	logvec->i_len  = sizeof(xfs_dq_logformat_t);
+	XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT);
 	logvec++;
 	logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
 	logvec->i_len  = sizeof(xfs_disk_dquot_t);
+	XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT);
 
 	ASSERT(2 == logitem->qli_item.li_desc->lid_size);
 	logitem->qli_format.qlf_size = 2;
-- 
cgit v1.2.3


From 02d7c92334c84897d7d2840fc25e5896535766f9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:09 +1100
Subject: [XFS] Use XFS_VFSTOM in more places instead of open coding it.

SGI-PV: 947206
SGI-Modid: xfs-linux-melb:xfs-kern:25310a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index c855d62e5344..a6b4084bda88 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -295,7 +295,7 @@ bhv_remove_all_vfsops(
 	bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
 	if (!freebase)
 		return;
-	mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+	mp = XFS_VFSTOM(vfsp);
 	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	xfs_mount_free(mp, 0);
 }
-- 
cgit v1.2.3


From f51623b21fe3068d12f0c5d39e02fd2549635a99 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:27 +1100
Subject: [XFS] Move some code around to avoid prototypes and prep for future
 writepages code.

SGI-PV: 950211
SGI-Modid: xfs-linux-melb:xfs-kern:25311a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 367 ++++++++++++++++++++++----------------------
 1 file changed, 183 insertions(+), 184 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 74d8be87f983..58fc7ade90b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -43,7 +43,32 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
-STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+
+STATIC void
+xfs_count_page_state(
+	struct page		*page,
+	int			*delalloc,
+	int			*unmapped,
+	int			*unwritten)
+{
+	struct buffer_head	*bh, *head;
+
+	*delalloc = *unmapped = *unwritten = 0;
+
+	bh = head = page_buffers(page);
+	do {
+		if (buffer_uptodate(bh) && !buffer_mapped(bh))
+			(*unmapped) = 1;
+		else if (buffer_unwritten(bh) && !buffer_delay(bh))
+			clear_buffer_unwritten(bh);
+		else if (buffer_unwritten(bh))
+			(*unwritten) = 1;
+		else if (buffer_delay(bh))
+			(*delalloc) = 1;
+	} while ((bh = bh->b_this_page) != head);
+}
+
+
 
 #if defined(XFS_RW_TRACE)
 void
@@ -1040,6 +1065,154 @@ error:
 	return err;
 }
 
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ *    state is cleared before we get here. In this case is it
+ *    conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+
+STATIC int
+linvfs_writepage(
+	struct page		*page,
+	struct writeback_control *wbc)
+{
+	int			error;
+	int			need_trans;
+	int			delalloc, unmapped, unwritten;
+	struct inode		*inode = page->mapping->host;
+
+	xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+
+	/*
+	 * We need a transaction if:
+	 *  1. There are delalloc buffers on the page
+	 *  2. The page is uptodate and we have unmapped buffers
+	 *  3. The page is uptodate and we have no buffers
+	 *  4. There are unwritten buffers on the page
+	 */
+
+	if (!page_has_buffers(page)) {
+		unmapped = 1;
+		need_trans = 1;
+	} else {
+		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+		if (!PageUptodate(page))
+			unmapped = 0;
+		need_trans = delalloc + unmapped + unwritten;
+	}
+
+	/*
+	 * If we need a transaction and the process flags say
+	 * we are already in a transaction, or no IO is allowed
+	 * then mark the page dirty again and leave the page
+	 * as is.
+	 */
+	if (PFLAGS_TEST_FSTRANS() && need_trans)
+		goto out_fail;
+
+	/*
+	 * Delay hooking up buffer heads until we have
+	 * made our go/no-go decision.
+	 */
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+	/*
+	 * Convert delayed allocate, unwritten or unmapped space
+	 * to real space and flush out to disk.
+	 */
+	error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
+	if (error == -EAGAIN)
+		goto out_fail;
+	if (unlikely(error < 0))
+		goto out_unlock;
+
+	return 0;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return 0;
+out_unlock:
+	unlock_page(page);
+	return error;
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ *    to via regular I/O. buffer heads will be dirty and possibly
+ *    delalloc. If no delalloc buffer heads in this case then we
+ *    can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ *    mmap, all we need to do is ensure there is no delalloc
+ *    state in the buffer heads, if not we can let the caller
+ *    free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+	struct page		*page,
+	gfp_t			gfp_mask)
+{
+	struct inode		*inode = page->mapping->host;
+	int			dirty, delalloc, unmapped, unwritten;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 1,
+	};
+
+	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+
+	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+	if (!delalloc && !unwritten)
+		goto free_buffers;
+
+	if (!(gfp_mask & __GFP_FS))
+		return 0;
+
+	/* If we are already inside a transaction or the thread cannot
+	 * do I/O, we cannot release this page.
+	 */
+	if (PFLAGS_TEST_FSTRANS())
+		return 0;
+
+	/*
+	 * Convert delalloc space to real space, do not flush the
+	 * data out to disk, that will be done by the caller.
+	 * Never need to allocate space here - we will always
+	 * come back to writepage in that case.
+	 */
+	dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
+	if (dirty == 0 && !unwritten)
+		goto free_buffers;
+	return 0;
+
+free_buffers:
+	return try_to_free_buffers(page);
+}
+
 STATIC int
 __linvfs_get_block(
 	struct inode		*inode,
@@ -1223,6 +1396,15 @@ linvfs_direct_IO(
 	return ret;
 }
 
+STATIC int
+linvfs_prepare_write(
+	struct file		*file,
+	struct page		*page,
+	unsigned int		from,
+	unsigned int		to)
+{
+	return block_prepare_write(page, from, to, linvfs_get_block);
+}
 
 STATIC sector_t
 linvfs_bmap(
@@ -1259,118 +1441,6 @@ linvfs_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
 }
 
-STATIC void
-xfs_count_page_state(
-	struct page		*page,
-	int			*delalloc,
-	int			*unmapped,
-	int			*unwritten)
-{
-	struct buffer_head	*bh, *head;
-
-	*delalloc = *unmapped = *unwritten = 0;
-
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_uptodate(bh) && !buffer_mapped(bh))
-			(*unmapped) = 1;
-		else if (buffer_unwritten(bh) && !buffer_delay(bh))
-			clear_buffer_unwritten(bh);
-		else if (buffer_unwritten(bh))
-			(*unwritten) = 1;
-		else if (buffer_delay(bh))
-			(*delalloc) = 1;
-	} while ((bh = bh->b_this_page) != head);
-}
-
-
-/*
- * writepage: Called from one of two places:
- *
- * 1. we are flushing a delalloc buffer head.
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- *    state is cleared before we get here. In this case is it
- *    conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-
-STATIC int
-linvfs_writepage(
-	struct page		*page,
-	struct writeback_control *wbc)
-{
-	int			error;
-	int			need_trans;
-	int			delalloc, unmapped, unwritten;
-	struct inode		*inode = page->mapping->host;
-
-	xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
-
-	/*
-	 * We need a transaction if:
-	 *  1. There are delalloc buffers on the page
-	 *  2. The page is uptodate and we have unmapped buffers
-	 *  3. The page is uptodate and we have no buffers
-	 *  4. There are unwritten buffers on the page
-	 */
-
-	if (!page_has_buffers(page)) {
-		unmapped = 1;
-		need_trans = 1;
-	} else {
-		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-		if (!PageUptodate(page))
-			unmapped = 0;
-		need_trans = delalloc + unmapped + unwritten;
-	}
-
-	/*
-	 * If we need a transaction and the process flags say
-	 * we are already in a transaction, or no IO is allowed
-	 * then mark the page dirty again and leave the page
-	 * as is.
-	 */
-	if (PFLAGS_TEST_FSTRANS() && need_trans)
-		goto out_fail;
-
-	/*
-	 * Delay hooking up buffer heads until we have
-	 * made our go/no-go decision.
-	 */
-	if (!page_has_buffers(page))
-		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
-	/*
-	 * Convert delayed allocate, unwritten or unmapped space
-	 * to real space and flush out to disk.
-	 */
-	error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
-	if (error == -EAGAIN)
-		goto out_fail;
-	if (unlikely(error < 0))
-		goto out_unlock;
-
-	return 0;
-
-out_fail:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-out_unlock:
-	unlock_page(page);
-	return error;
-}
-
 STATIC int
 linvfs_invalidate_page(
 	struct page		*page,
@@ -1381,77 +1451,6 @@ linvfs_invalidate_page(
 	return block_invalidatepage(page, offset);
 }
 
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
- * have buffer heads in this call.
- *
- * Returns 0 if the page is ok to release, 1 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- *    to via regular I/O. buffer heads will be dirty and possibly
- *    delalloc. If no delalloc buffer heads in this case then we
- *    can just return zero.
- *
- * 2. We are called to release a page which has been written via
- *    mmap, all we need to do is ensure there is no delalloc
- *    state in the buffer heads, if not we can let the caller
- *    free them and we should come back later via writepage.
- */
-STATIC int
-linvfs_release_page(
-	struct page		*page,
-	gfp_t			gfp_mask)
-{
-	struct inode		*inode = page->mapping->host;
-	int			dirty, delalloc, unmapped, unwritten;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 1,
-	};
-
-	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
-
-	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-	if (!delalloc && !unwritten)
-		goto free_buffers;
-
-	if (!(gfp_mask & __GFP_FS))
-		return 0;
-
-	/* If we are already inside a transaction or the thread cannot
-	 * do I/O, we cannot release this page.
-	 */
-	if (PFLAGS_TEST_FSTRANS())
-		return 0;
-
-	/*
-	 * Convert delalloc space to real space, do not flush the
-	 * data out to disk, that will be done by the caller.
-	 * Never need to allocate space here - we will always
-	 * come back to writepage in that case.
-	 */
-	dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
-	if (dirty == 0 && !unwritten)
-		goto free_buffers;
-	return 0;
-
-free_buffers:
-	return try_to_free_buffers(page);
-}
-
-STATIC int
-linvfs_prepare_write(
-	struct file		*file,
-	struct page		*page,
-	unsigned int		from,
-	unsigned int		to)
-{
-	return block_prepare_write(page, from, to, linvfs_get_block);
-}
-
 struct address_space_operations linvfs_aops = {
 	.readpage		= linvfs_readpage,
 	.readpages		= linvfs_readpages,
-- 
cgit v1.2.3


From 87cbc49cd4b773a972bce56c5dd09c4717f3285b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:43 +1100
Subject: [XFS] Add xfs_map_buffer helper, use it in a couple of places.

SGI-PV: 950211
SGI-Modid: xfs-linux-melb:xfs-kern:25312a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 49 +++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 58fc7ade90b5..4b6bfdb8251c 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -486,6 +486,26 @@ xfs_add_to_ioend(
 	ioend->io_size += bh->b_size;
 }
 
+STATIC void
+xfs_map_buffer(
+	struct buffer_head	*bh,
+	xfs_iomap_t		*mp,
+	xfs_off_t		offset,
+	uint			block_bits)
+{
+	sector_t		bn;
+
+	ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
+
+	bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) +
+	      ((offset - mp->iomap_offset) >> block_bits);
+
+	ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
+
+	bh->b_blocknr = bn;
+	set_buffer_mapped(bh);
+}
+
 STATIC void
 xfs_map_at_offset(
 	struct buffer_head	*bh,
@@ -493,22 +513,11 @@ xfs_map_at_offset(
 	int			block_bits,
 	xfs_iomap_t		*iomapp)
 {
-	xfs_daddr_t		bn;
-	int			sector_shift;
-
 	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
 	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
-	ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
-
-	sector_shift = block_bits - BBSHIFT;
-	bn = (iomapp->iomap_bn >> sector_shift) +
-	      ((offset - iomapp->iomap_offset) >> block_bits);
-
-	ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
-	ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
 
 	lock_buffer(bh);
-	bh->b_blocknr = bn;
+	xfs_map_buffer(bh, iomapp, offset, block_bits);
 	bh->b_bdev = iomapp->iomap_target->bt_bdev;
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
@@ -1246,21 +1255,13 @@ __linvfs_get_block(
 		return 0;
 
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
-		xfs_daddr_t	bn;
-		xfs_off_t	delta;
-
-		/* For unwritten extents do not report a disk address on
+		/*
+		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
 		 */
 		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
-			delta = offset - iomap.iomap_offset;
-			delta >>= inode->i_blkbits;
-
-			bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
-			bn += delta;
-			BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
-			bh_result->b_blocknr = bn;
-			set_buffer_mapped(bh_result);
+			xfs_map_buffer(bh_result, &iomap, offset,
+				       inode->i_blkbits);
 		}
 		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
 			if (direct)
-- 
cgit v1.2.3


From 01e1b69cfcdcfdd5b405165eaba29428f8b18a7c Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:29:16 +1100
Subject: [XFS] using a spinlock per cpu for superblock counter exclusion
 results in a preēmpt counter overflow at 256p and above. Change the exclusion
 mechanism to use atomic bit operations and busy wait loops to emulate the
 spin lock exclusion mechanism but without the preempt count issues.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SGI-PV: 950027
SGI-Modid: xfs-linux-melb:xfs-kern:25338a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |  1 +
 fs/xfs/xfs_mount.c           | 37 ++++++++++++++++++++++++-------------
 fs/xfs/xfs_mount.h           |  4 +++-
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 9fdc14cffb70..bd88ccb0cad0 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
 #include <linux/sort.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/delay.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a64110b9023b..d62aee027368 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1746,10 +1746,7 @@ xfs_icsb_cpu_notify(
 	case CPU_UP_PREPARE:
 		/* Easy Case - initialize the area and locks, and
 		 * then rebalance when online does everything else for us. */
-		spin_lock_init(&cntp->icsb_lock);
-		cntp->icsb_icount = 0;
-		cntp->icsb_ifree = 0;
-		cntp->icsb_fdblocks = 0;
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 		break;
 	case CPU_ONLINE:
 		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
@@ -1769,9 +1766,7 @@ xfs_icsb_cpu_notify(
 		mp->m_sb.sb_ifree += cntp->icsb_ifree;
 		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
 
-		cntp->icsb_icount = 0;
-		cntp->icsb_ifree = 0;
-		cntp->icsb_fdblocks = 0;
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 
 		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
 		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
@@ -1800,7 +1795,7 @@ xfs_icsb_init_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_lock_init(&cntp->icsb_lock);
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 	}
 	/*
 	 * start with all counters disabled so that the
@@ -1820,6 +1815,22 @@ xfs_icsb_destroy_counters(
 	}
 }
 
+STATIC inline void
+xfs_icsb_lock_cntr(
+	xfs_icsb_cnts_t	*icsbp)
+{
+	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
+		ndelay(1000);
+	}
+}
+
+STATIC inline void
+xfs_icsb_unlock_cntr(
+	xfs_icsb_cnts_t	*icsbp)
+{
+	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
+}
+
 
 STATIC inline void
 xfs_icsb_lock_all_counters(
@@ -1830,7 +1841,7 @@ xfs_icsb_lock_all_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_lock(&cntp->icsb_lock);
+		xfs_icsb_lock_cntr(cntp);
 	}
 }
 
@@ -1843,7 +1854,7 @@ xfs_icsb_unlock_all_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_unlock(&cntp->icsb_lock);
+		xfs_icsb_unlock_cntr(cntp);
 	}
 }
 
@@ -2070,7 +2081,7 @@ xfs_icsb_modify_counters_int(
 again:
 	cpu = get_cpu();
 	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
-	spin_lock(&icsbp->icsb_lock);
+	xfs_icsb_lock_cntr(icsbp);
 	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
 		goto slow_path;
 
@@ -2104,7 +2115,7 @@ again:
 		BUG();
 		break;
 	}
-	spin_unlock(&icsbp->icsb_lock);
+	xfs_icsb_unlock_cntr(icsbp);
 	put_cpu();
 	if (locked)
 		XFS_SB_UNLOCK(mp, s);
@@ -2120,7 +2131,7 @@ again:
 	 * manner.
 	 */
 slow_path:
-	spin_unlock(&icsbp->icsb_lock);
+	xfs_icsb_unlock_cntr(icsbp);
 	put_cpu();
 
 	/* need to hold superblock incase we need
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9d2ffbdc37a9..29cfcf0c11be 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -280,9 +280,11 @@ typedef struct xfs_icsb_cnts {
 	uint64_t	icsb_fdblocks;
 	uint64_t	icsb_ifree;
 	uint64_t	icsb_icount;
-	spinlock_t	icsb_lock;
+	unsigned long	icsb_flags;
 } xfs_icsb_cnts_t;
 
+#define XFS_ICSB_FLAG_LOCK	(1 << 0)	/* counter lock bit */
+
 #define XFS_ICSB_SB_LOCKED	(1 << 0)	/* sb already locked */
 #define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
 
-- 
cgit v1.2.3


From 9f989c9455aac417c34af9c505e6b169055251da Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:29:32 +1100
Subject: [XFS] Additional mount time superblock validation checks.

SGI-PV: 950491
SGI-Modid: xfs-linux-melb:xfs-kern:25354a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_mount.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d62aee027368..20e8abc16d18 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,9 +268,12 @@ xfs_mount_validate_sb(
 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
+	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
+	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
+	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
-	    sbp->sb_imax_pct > 100)) {
+	    (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) {
 		cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
 		XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
 				     XFS_ERRLEVEL_LOW, mp, sbp);
-- 
cgit v1.2.3


From 4eea22f01bb4fdba1aab4430c33adbe88d9d4985 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 13:29:52 +1100
Subject: [XFS] 929045 567344 This mod re-organizes some of the in-core file
 extent code to prepare for an upcoming mod which will introduce multi-level
 in-core extent allocations. Although the in-core extent management is using a
 new code path in this mod, the functionality remains the same. Major changes
 include:	- Introduce 10 new subroutines which re-orgainze the existing
 code but	do NOT change functionality: xfs_iext_get_ext()	  
 xfs_iext_insert()	     xfs_iext_add()  xfs_iext_remove()	  
 xfs_iext_remove_inline() xfs_iext_remove_direct()	
 xfs_iext_realloc_direct() xfs_iext_direct_to_inline()	   
 xfs_iext_inline_to_direct() xfs_iext_destroy() - Remove 2 subroutines
 (functionality moved to new subroutines above):	    xfs_iext_realloc()
 -replaced by xfs_iext_add() and xfs_iext_remove()	     
 xfs_bmap_insert_exlist() - replaced by xfs_iext_insert()	 
 xfs_bmap_delete_exlist() - replaced by xfs_iext_remove() - Replace all
 hard-coded (indexed) extent assignments with a call to	 xfs_iext_get_ext() -
 Replace all extent record pointer arithmetic (ep++, ep--, base + lastx,..)  
 with calls to xfs_iext_get_ext() - Update comments to remove the idea of a
 single "extent list" and   introduce "extent record" terminology instead

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207390a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm.c   |   9 +-
 fs/xfs/xfs_bmap.c       | 496 +++++++++++++++++++++---------------------------
 fs/xfs/xfs_bmap.h       |   4 +-
 fs/xfs/xfs_bmap_btree.c |  10 +-
 fs/xfs/xfs_inode.c      | 470 +++++++++++++++++++++++++++++++++------------
 fs/xfs/xfs_inode.h      |  18 +-
 6 files changed, 597 insertions(+), 410 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index fd4abb8a32c5..1fb757ef3f41 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1704,9 +1704,9 @@ xfs_qm_get_rtblks(
 	xfs_qcnt_t	*O_rtblks)
 {
 	xfs_filblks_t	rtblks;			/* total rt blks */
+	xfs_extnum_t	idx;			/* extent record index */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
 	xfs_extnum_t	nextents;		/* number of extent entries */
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
 	int		error;
 
@@ -1717,10 +1717,11 @@ xfs_qm_get_rtblks(
 			return error;
 	}
 	rtblks = 0;
-	nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
-	for (ep = base; ep < &base[nextents]; ep++)
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	for (idx = 0; idx < nextents; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		rtblks += xfs_bmbt_get_blockcount(ep);
+	}
 	*O_rtblks = (xfs_qcnt_t)rtblks;
 	return 0;
 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 70625e577c70..53c47a181f87 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -89,7 +89,7 @@ xfs_bmap_add_attrfork_local(
 	int			*flags);	/* inode logging flags */
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after allocating space (or doing a delayed allocation).
  */
 STATIC int				/* error */
@@ -97,7 +97,7 @@ xfs_bmap_add_extent(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
@@ -113,7 +113,7 @@ xfs_bmap_add_extent_delay_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
@@ -129,7 +129,7 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
 	int			rsvd);	/* OK to allocate reserved blocks */
 
@@ -142,7 +142,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork); /* data or attr fork */
 
@@ -155,7 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp); /* inode logging flags */
 
 /*
@@ -169,7 +169,7 @@ xfs_bmap_alloc(
 /*
  * Transform a btree format file with only one leaf node, where the
  * extents list will fit in the inode, into an extents format file.
- * Since the extent list is already in-core, all we have to do is
+ * Since the file extents are already in-core, all we have to do is
  * give up the space for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
@@ -191,7 +191,7 @@ xfs_bmap_check_extents(
 #endif
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
  */
 STATIC int				/* error */
@@ -201,7 +201,7 @@ xfs_bmap_del_extent(
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
 	int			whichfork, /* data or attr fork */
 	int			rsvd);	 /* OK to allocate reserved blocks */
@@ -216,18 +216,6 @@ xfs_bmap_del_free(
 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
 	xfs_bmap_free_item_t	*free);	/* list item to be freed */
 
-/*
- * Remove count entries from the extents array for inode "ip", starting
- * at index "idx".  Copies the remaining items down over the deleted ones,
- * and gives back the excess memory.
- */
-STATIC void
-xfs_bmap_delete_exlist(
-	xfs_inode_t	*ip,		/* incode inode pointer */
-	xfs_extnum_t	idx,		/* starting delete index */
-	xfs_extnum_t	count,		/* count of items to delete */
-	int		whichfork);	/* data or attr fork */
-
 /*
  * Convert an extents-format file into a btree-format file.
  * The new file will have a root block (in the inode) and a single child block.
@@ -243,18 +231,6 @@ xfs_bmap_extents_to_btree(
 	int			*logflagsp,	/* inode logging flags */
 	int			whichfork);	/* data or attr fork */
 
-/*
- * Insert new item(s) in the extent list for inode "ip".
- * Count new items are inserted at offset idx.
- */
-STATIC void
-xfs_bmap_insert_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting index of new items */
-	xfs_extnum_t	count,		/* number of inserted items */
-	xfs_bmbt_irec_t	*new,		/* items to insert */
-	int		whichfork);	/* data or attr fork */
-
 /*
  * Convert a local file to an extents file.
  * This code is sort of bogus, since the file data needs to get
@@ -316,7 +292,7 @@ xfs_bmap_trace_addentry(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
+ * Add bmap trace entry prior to a call to xfs_iext_remove.
  */
 STATIC void
 xfs_bmap_trace_delete(
@@ -328,7 +304,7 @@ xfs_bmap_trace_delete(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
+ * Add bmap trace entry prior to a call to xfs_iext_insert, or
  * reading in the extents list from the disk (in the btree).
  */
 STATIC void
@@ -343,7 +319,7 @@ xfs_bmap_trace_insert(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry after updating an extent list entry in place.
+ * Add bmap trace entry after updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_post_update(
@@ -354,7 +330,7 @@ xfs_bmap_trace_post_update(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to updating an extent list entry in place.
+ * Add bmap trace entry prior to updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_pre_update(
@@ -413,19 +389,24 @@ STATIC int
 xfs_bmap_count_tree(
 	xfs_mount_t     *mp,
 	xfs_trans_t     *tp,
+	xfs_ifork_t	*ifp,
 	xfs_fsblock_t   blockno,
 	int             levelin,
 	int		*count);
 
 STATIC int
 xfs_bmap_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	int			numrecs,
 	int			*count);
 
 STATIC int
 xfs_bmap_disk_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_mount_t		*mp,
+	xfs_extnum_t		idx,
+	xfs_bmbt_block_t	*block,
 	int			numrecs,
 	int			*count);
 
@@ -537,7 +518,7 @@ xfs_bmap_add_attrfork_local(
 }
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after allocating space (or doing a delayed allocation).
  */
 STATIC int				/* error */
@@ -545,7 +526,7 @@ xfs_bmap_add_extent(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
@@ -578,7 +559,7 @@ xfs_bmap_add_extent(
 	if (nextents == 0) {
 		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
 			NULL, whichfork);
-		xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork);
+		xfs_iext_insert(ifp, 0, 1, new);
 		ASSERT(cur == NULL);
 		ifp->if_lastex = 0;
 		if (!ISNULLSTARTBLOCK(new->br_startblock)) {
@@ -614,7 +595,7 @@ xfs_bmap_add_extent(
 		/*
 		 * Get the record referred to by idx.
 		 */
-		xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev);
 		/*
 		 * If it's a real allocation record, and the new allocation ends
 		 * after the start of the referred to record, then we're filling
@@ -714,14 +695,13 @@ xfs_bmap_add_extent_delay_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
 	int			rsvd)	/* OK to use reserved data block allocation */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	int			diff;	/* temp value */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
@@ -730,6 +710,7 @@ xfs_bmap_add_extent_delay_real(
 	static char		fname[] = "xfs_bmap_add_extent_delay_real";
 #endif
 	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -763,8 +744,8 @@ xfs_bmap_add_extent_delay_real(
 	 * Set up a bunch of variables to make the tests simpler.
 	 */
 	cur = *curp;
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &PREV);
 	new_endoff = new->br_startoff + new->br_blockcount;
 	ASSERT(PREV.br_startoff <= new->br_startoff);
@@ -781,7 +762,7 @@ xfs_bmap_add_extent_delay_real(
 	 * Don't set contiguous if the combined extent would be too large.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &LEFT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
 	}
 	STATE_SET(LEFT_CONTIG,
@@ -798,7 +779,7 @@ xfs_bmap_add_extent_delay_real(
 	if (STATE_SET_TEST(RIGHT_VALID,
 			idx <
 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
-		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
 	}
 	STATE_SET(RIGHT_CONTIG,
@@ -825,14 +806,14 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -867,14 +848,14 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
 		else {
@@ -908,7 +889,7 @@ xfs_bmap_add_extent_delay_real(
 		ip->i_df.if_lastex = idx;
 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx + 1, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
 		else {
@@ -964,7 +945,7 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
@@ -1010,7 +991,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1039,8 +1020,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock) -
 			(cur ? cur->bc_private.b.allocated : 0));
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx + 1];
+		ep = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
 			XFS_DATA_FORK);
@@ -1058,7 +1038,8 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount,
 			RIGHT.br_state);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
@@ -1098,7 +1079,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
 			new, NULL, XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1127,8 +1108,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock) -
 			(cur ? cur->bc_private.b.allocated : 0));
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx];
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
@@ -1149,7 +1129,7 @@ xfs_bmap_add_extent_delay_real(
 		r[1].br_blockcount = temp2;
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1204,13 +1184,13 @@ xfs_bmap_add_extent_delay_real(
 				}
 			}
 		}
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx];
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
+			NULLSTARTBLOCK((int)temp2));
 		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
 			XFS_DATA_FORK);
 		*dnew = temp + temp2;
@@ -1254,10 +1234,9 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp) /* inode logging flags */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
@@ -1265,6 +1244,7 @@ xfs_bmap_add_extent_unwritten_real(
 	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
 #endif
 	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_exntst_t		newext;	/* new extent state */
 	xfs_exntst_t		oldext;	/* old extent state */
@@ -1298,8 +1278,8 @@ xfs_bmap_add_extent_unwritten_real(
 	 */
 	error = 0;
 	cur = *curp;
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &PREV);
 	newext = new->br_state;
 	oldext = (newext == XFS_EXT_UNWRITTEN) ?
@@ -1320,7 +1300,7 @@ xfs_bmap_add_extent_unwritten_real(
 	 * Don't set contiguous if the combined extent would be too large.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &LEFT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
 	}
 	STATE_SET(LEFT_CONTIG,
@@ -1337,7 +1317,7 @@ xfs_bmap_add_extent_unwritten_real(
 	if (STATE_SET_TEST(RIGHT_VALID,
 			idx <
 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
-		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
 	}
 	STATE_SET(RIGHT_CONTIG,
@@ -1363,14 +1343,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents -= 2;
 		if (cur == NULL)
@@ -1409,14 +1389,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1456,7 +1436,7 @@ xfs_bmap_add_extent_unwritten_real(
 		ip->i_df.if_lastex = idx;
 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx + 1, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1516,7 +1496,7 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
@@ -1571,7 +1551,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1609,7 +1589,8 @@ xfs_bmap_add_extent_unwritten_real(
 			PREV.br_blockcount - new->br_blockcount);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount, newext);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
@@ -1649,7 +1630,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
 			new, NULL, XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1696,7 +1677,7 @@ xfs_bmap_add_extent_unwritten_real(
 		r[1].br_state = oldext;
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents += 2;
 		if (cur == NULL)
@@ -1770,15 +1751,15 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			rsvd)		/* OK to allocate reserved blocks */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
-	xfs_bmbt_rec_t		*ep;	/* extent list entry for idx */
+	xfs_bmbt_rec_t		*ep;	/* extent record for idx */
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
 #endif
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
 	xfs_filblks_t		oldlen=0;	/* old indirect size */
@@ -1799,15 +1780,15 @@ xfs_bmap_add_extent_hole_delay(
 				       ((state &= ~MASK(b)), 0))
 #define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
 
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	state = 0;
 	ASSERT(ISNULLSTARTBLOCK(new->br_startblock));
 	/*
 	 * Check and set flags if this segment has a left neighbor
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &left);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
 	}
 	/*
@@ -1844,23 +1825,24 @@ xfs_bmap_add_extent_hole_delay(
 		/*
 		 * New allocation is contiguous with delayed allocations
 		 * on the left and on the right.
-		 * Merge all three into a single extent list entry.
+		 * Merge all three into a single extent record.
 		 */
 		temp = left.br_blockcount + new->br_blockcount +
 			right.br_blockcount;
 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1, temp);
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
 			STARTBLOCKVAL(new->br_startblock) +
 			STARTBLOCKVAL(right.br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
-		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+			NULLSTARTBLOCK((int)newlen));
 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		ip->i_df.if_lastex = idx - 1;
 		break;
 
@@ -1873,11 +1855,12 @@ xfs_bmap_add_extent_hole_delay(
 		temp = left.br_blockcount + new->br_blockcount;
 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1, temp);
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
 			STARTBLOCKVAL(new->br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
-		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+			NULLSTARTBLOCK((int)newlen));
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
@@ -1909,7 +1892,7 @@ xfs_bmap_add_extent_hole_delay(
 		oldlen = newlen = 0;
 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		break;
 	}
@@ -1940,7 +1923,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork) /* data or attr fork */
 {
@@ -1970,13 +1953,13 @@ xfs_bmap_add_extent_hole_real(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
-	ep = &ifp->if_u1.if_extents[idx];
+	ep = xfs_iext_get_ext(ifp, idx);
 	state = 0;
 	/*
 	 * Check and set flags if this segment has a left neighbor.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &left);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
 	}
 	/*
@@ -2019,18 +2002,18 @@ xfs_bmap_add_extent_hole_real(
 		/*
 		 * New allocation is contiguous with real allocations on the
 		 * left and on the right.
-		 * Merge all three into a single extent list entry.
+		 * Merge all three into a single extent record.
 		 */
 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
 			whichfork);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount +
 			right.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
 			whichfork);
 		xfs_bmap_trace_delete(fname, "LC|RC", ip,
 			idx, 1, whichfork);
-		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx - 1;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2062,7 +2045,7 @@ xfs_bmap_add_extent_hole_real(
 		 * Merge the new allocation with the left neighbor.
 		 */
 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
 		ifp->if_lastex = idx - 1;
@@ -2116,7 +2099,7 @@ xfs_bmap_add_extent_hole_real(
 		 */
 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
 			whichfork);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ifp->if_lastex = idx;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2811,7 +2794,7 @@ xfs_bmap_alloc(
 /*
  * Transform a btree format file with only one leaf node, where the
  * extents list will fit in the inode, into an extents format file.
- * Since the extent list is already in-core, all we have to do is
+ * Since the file extents are already in-core, all we have to do is
  * give up the space for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
@@ -2868,7 +2851,7 @@ xfs_bmap_btree_to_extents(
 }
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
  */
 STATIC int				/* error */
@@ -2878,7 +2861,7 @@ xfs_bmap_del_extent(
 	xfs_extnum_t		idx,	/* extent number to update/delete */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*del,	/* data to remove from extent list */
+	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork, /* data or attr fork */
 	int			rsvd)	/* OK to allocate reserved blocks */
@@ -2903,7 +2886,6 @@ xfs_bmap_del_extent(
 	xfs_filblks_t		nblks;	/* quota/sb block count */
 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
 	/* REFERENCED */
-	xfs_extnum_t		nextents;	/* number of extents in list */
 	uint			qfield;	/* quota field to update */
 	xfs_filblks_t		temp;	/* for indirect length calculations */
 	xfs_filblks_t		temp2;	/* for indirect length calculations */
@@ -2911,10 +2893,10 @@ xfs_bmap_del_extent(
 	XFS_STATS_INC(xs_del_exlist);
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT(idx >= 0 && idx < nextents);
+	ASSERT((idx >= 0) && (idx < ifp->if_bytes /
+		(uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(del->br_blockcount > 0);
-	ep = &ifp->if_u1.if_extents[idx];
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &got);
 	ASSERT(got.br_startoff <= del->br_startoff);
 	del_endoff = del->br_startoff + del->br_blockcount;
@@ -2990,7 +2972,7 @@ xfs_bmap_del_extent(
 		 * Matches the whole extent.  Delete the entry.
 		 */
 		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
-		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx;
 		if (delay)
 			break;
@@ -3160,7 +3142,7 @@ xfs_bmap_del_extent(
 		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
 			whichfork);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork);
+		xfs_iext_insert(ifp, idx + 1, 1, &new);
 		ifp->if_lastex = idx + 1;
 		break;
 	}
@@ -3212,31 +3194,6 @@ xfs_bmap_del_free(
 	kmem_zone_free(xfs_bmap_free_item_zone, free);
 }
 
-/*
- * Remove count entries from the extents array for inode "ip", starting
- * at index "idx".  Copies the remaining items down over the deleted ones,
- * and gives back the excess memory.
- */
-STATIC void
-xfs_bmap_delete_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting delete index */
-	xfs_extnum_t	count,		/* count of items to delete */
-	int		whichfork)	/* data or attr fork */
-{
-	xfs_bmbt_rec_t	*base;		/* base of extent list */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_extnum_t	nextents;	/* number of extents in list after */
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	base = ifp->if_u1.if_extents;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
-	memmove(&base[idx], &base[idx + count],
-		(nextents - idx) * sizeof(*base));
-	xfs_iext_realloc(ip, -count, whichfork);
-}
-
 /*
  * Convert an extents-format file into a btree-format file.
  * The new file will have a root block (in the inode) and a single child block.
@@ -3258,13 +3215,13 @@ xfs_bmap_extents_to_btree(
 	xfs_bmbt_rec_t		*arp;		/* child record pointer */
 	xfs_bmbt_block_t	*block;		/* btree root block */
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
-	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
+	xfs_bmbt_rec_t		*ep;		/* extent record pointer */
 	int			error;		/* error return value */
-	xfs_extnum_t		i, cnt;		/* extent list index */
+	xfs_extnum_t		i, cnt;		/* extent record index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
 	xfs_mount_t		*mp;		/* mount structure */
-	xfs_extnum_t		nextents;	/* extent list size */
+	xfs_extnum_t		nextents;	/* number of file extents */
 	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -3343,7 +3300,8 @@ xfs_bmap_extents_to_btree(
 	ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
+	for (cnt = i = 0; i < nextents; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
 			arp->l0 = INT_GET(ep->l0, ARCH_CONVERT);
 			arp->l1 = INT_GET(ep->l1, ARCH_CONVERT);
@@ -3372,34 +3330,6 @@ xfs_bmap_extents_to_btree(
 	return 0;
 }
 
-/*
- * Insert new item(s) in the extent list for inode "ip".
- * Count new items are inserted at offset idx.
- */
-STATIC void
-xfs_bmap_insert_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting index of new items */
-	xfs_extnum_t	count,		/* number of inserted items */
-	xfs_bmbt_irec_t	*new,		/* items to insert */
-	int		whichfork)	/* data or attr fork */
-{
-	xfs_bmbt_rec_t	*base;		/* extent list base */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_extnum_t	nextents;	/* extent list size */
-	xfs_extnum_t	to;		/* extent list index */
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	xfs_iext_realloc(ip, count, whichfork);
-	base = ifp->if_u1.if_extents;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	memmove(&base[idx + count], &base[idx],
-		(nextents - (idx + count)) * sizeof(*base));
-	for (to = idx; to < idx + count; to++, new++)
-		xfs_bmbt_set_all(&base[to], new);
-}
-
 /*
  * Helper routine to reset inode di_forkoff field when switching
  * attribute fork from local to extent format - we reset it where
@@ -3457,8 +3387,8 @@ xfs_bmap_local_to_extents(
 	error = 0;
 	if (ifp->if_bytes) {
 		xfs_alloc_arg_t	args;	/* allocation arguments */
-		xfs_buf_t	*bp;	/* buffer for extent list block */
-		xfs_bmbt_rec_t	*ep;	/* extent list pointer */
+		xfs_buf_t	*bp;	/* buffer for extent block */
+		xfs_bmbt_rec_t	*ep;	/* extent record pointer */
 
 		args.tp = tp;
 		args.mp = ip->i_mount;
@@ -3492,8 +3422,8 @@ xfs_bmap_local_to_extents(
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
-		xfs_iext_realloc(ip, 1, whichfork);
-		ep = ifp->if_u1.if_extents;
+		xfs_iext_add(ifp, 0, 1);
+		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
@@ -3518,7 +3448,7 @@ xfs_bmbt_rec_t *			/* pointer to found extent entry */
 xfs_bmap_do_search_extents(
 	xfs_bmbt_rec_t	*base,		/* base of extent list */
 	xfs_extnum_t	lastx,		/* last extent index used */
-	xfs_extnum_t	nextents,	/* extent list size */
+	xfs_extnum_t	nextents,	/* number of file extents */
 	xfs_fileoff_t	bno,		/* block number searched for */
 	int		*eofp,		/* out: end of file found */
 	xfs_extnum_t	*lastxp,	/* out: last extent index */
@@ -3569,9 +3499,9 @@ xfs_bmap_do_search_extents(
 		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
 		*eofp = 0;
 	} else {
-		/* binary search the extents array */
 		low = 0;
 		high = nextents - 1;
+		/* binary search the extents array */
 		while (low <= high) {
 			XFS_STATS_INC(xs_cmp_exlist);
 			lastx = (low + high) >> 1;
@@ -3641,8 +3571,8 @@ xfs_bmap_search_extents(
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_bmbt_rec_t  *base;          /* base of extent list */
 	xfs_extnum_t    lastx;          /* last extent index used */
-	xfs_extnum_t    nextents;       /* extent list size */
-	xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
+	xfs_extnum_t    nextents;       /* number of file extents */
+	xfs_bmbt_rec_t  *ep;            /* extent record pointer */
 	int		rt;		/* realtime flag    */
 
 	XFS_STATS_INC(xs_look_exlist);
@@ -3732,7 +3662,7 @@ xfs_bmap_trace_addentry(
 }
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
+ * Add bmap trace entry prior to a call to xfs_iext_remove.
  */
 STATIC void
 xfs_bmap_trace_delete(
@@ -3747,13 +3677,13 @@ xfs_bmap_trace_delete(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
-		cnt, &ifp->if_u1.if_extents[idx],
-		cnt == 2 ? &ifp->if_u1.if_extents[idx + 1] : NULL,
+		cnt, xfs_iext_get_ext(ifp, idx),
+		cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL,
 		whichfork);
 }
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
+ * Add bmap trace entry prior to a call to xfs_iext_insert, or
  * reading in the extents list from the disk (in the btree).
  */
 STATIC void
@@ -3783,7 +3713,7 @@ xfs_bmap_trace_insert(
 }
 
 /*
- * Add bmap trace entry after updating an extent list entry in place.
+ * Add bmap trace entry after updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_post_update(
@@ -3797,11 +3727,11 @@ xfs_bmap_trace_post_update(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
-		1, &ifp->if_u1.if_extents[idx], NULL, whichfork);
+		1, xfs_iext_get_ext(ifp, idx), NULL, whichfork);
 }
 
 /*
- * Add bmap trace entry prior to updating an extent list entry in place.
+ * Add bmap trace entry prior to updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_pre_update(
@@ -3815,7 +3745,7 @@ xfs_bmap_trace_pre_update(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
-		&ifp->if_u1.if_extents[idx], NULL, whichfork);
+		xfs_iext_get_ext(ifp, idx), NULL, whichfork);
 }
 #endif	/* XFS_BMAP_TRACE */
 
@@ -3892,7 +3822,7 @@ xfs_bmap_add_attrfork(
 	int			rsvd)		/* xact may use reserved blks */
 {
 	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
-	xfs_bmap_free_t		flist;		/* freed extent list */
+	xfs_bmap_free_t		flist;		/* freed extent records */
 	xfs_mount_t		*mp;		/* mount structure */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	unsigned long		s;		/* spinlock spl value */
@@ -4146,7 +4076,7 @@ xfs_bmap_finish(
 	xfs_efd_log_item_t	*efd;		/* extent free data */
 	xfs_efi_log_item_t	*efi;		/* extent free intention */
 	int			error;		/* error return value */
-	xfs_bmap_free_item_t	*free;		/* free extent list item */
+	xfs_bmap_free_item_t	*free;		/* free extent item */
 	unsigned int		logres;		/* new log reservation */
 	unsigned int		logcount;	/* new log count */
 	xfs_mount_t		*mp;		/* filesystem mount structure */
@@ -4242,9 +4172,9 @@ xfs_bmap_first_unused(
 	xfs_fileoff_t	*first_unused,		/* unused block */
 	int		whichfork)		/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
 	int		error;			/* error return value */
+	int		idx;			/* extent record index */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
 	xfs_fileoff_t	lastaddr;		/* last block number seen */
 	xfs_fileoff_t	lowest;			/* lowest useful block */
@@ -4265,10 +4195,8 @@ xfs_bmap_first_unused(
 		return error;
 	lowest = *first_unused;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
-	for (lastaddr = 0, max = lowest, ep = base;
-	     ep < &base[nextents];
-	     ep++) {
+	for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		off = xfs_bmbt_get_startoff(ep);
 		/*
 		 * See if the hole before this extent will work.
@@ -4287,8 +4215,8 @@ xfs_bmap_first_unused(
 /*
  * Returns the file-relative block number of the last block + 1 before
  * last_block (input value) in the file.
- * This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
+ * This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
  */
 int						/* error */
 xfs_bmap_last_before(
@@ -4335,8 +4263,8 @@ xfs_bmap_last_before(
 
 /*
  * Returns the file-relative block number of the first block past eof in
- * the file.  This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
+ * the file.  This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
  */
 int						/* error */
 xfs_bmap_last_offset(
@@ -4345,7 +4273,6 @@ xfs_bmap_last_offset(
 	xfs_fileoff_t	*last_block,		/* last block */
 	int		whichfork)		/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
 	int		error;			/* error return value */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
@@ -4368,9 +4295,7 @@ xfs_bmap_last_offset(
 		*last_block = 0;
 		return 0;
 	}
-	base = &ifp->if_u1.if_extents[0];
-	ASSERT(base != NULL);
-	ep = &base[nextents - 1];
+	ep = xfs_iext_get_ext(ifp, nextents - 1);
 	*last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
 	return 0;
 }
@@ -4400,7 +4325,7 @@ xfs_bmap_one_block(
 		return 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	ep = ifp->if_u1.if_extents;
+	ep = xfs_iext_get_ext(ifp, 0);
 	xfs_bmbt_get_all(ep, &s);
 	rval = s.br_startoff == 0 && s.br_blockcount == 1;
 	if (rval && whichfork == XFS_DATA_FORK)
@@ -4435,7 +4360,6 @@ xfs_bmap_read_extents(
 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
 	/* REFERENCED */
 	xfs_extnum_t		room;	/* number of entries there's room for */
-	xfs_bmbt_rec_t		*trp;	/* target record pointer */
 
 	bno = NULLFSBLOCK;
 	mp = ip->i_mount;
@@ -4478,16 +4402,16 @@ xfs_bmap_read_extents(
 	/*
 	 * Here with bp and block set to the leftmost leaf node in the tree.
 	 */
-	room = ifp->if_bytes / (uint)sizeof(*trp);
-	trp = ifp->if_u1.if_extents;
+	room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	i = 0;
 	/*
-	 * Loop over all leaf nodes.  Copy information to the extent list.
+	 * Loop over all leaf nodes.  Copy information to the extent records.
 	 */
 	for (;;) {
-		xfs_bmbt_rec_t	*frp, *temp;
+		xfs_bmbt_rec_t	*frp, *trp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
+		xfs_extnum_t	start;
 
 
 		num_recs = be16_to_cpu(block->bb_numrecs);
@@ -4511,12 +4435,13 @@ xfs_bmap_read_extents(
 		if (nextbno != NULLFSBLOCK)
 			xfs_btree_reada_bufl(mp, nextbno, 1);
 		/*
-		 * Copy records into the extent list.
+		 * Copy records into the extent records.
 		 */
 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-		temp = trp;
-		for (j = 0; j < num_recs; j++, frp++, trp++) {
+		start = i;
+		for (j = 0; j < num_recs; j++, i++, frp++) {
+			trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = INT_GET(frp->l0, ARCH_CONVERT);
 			trp->l1 = INT_GET(frp->l1, ARCH_CONVERT);
 		}
@@ -4526,14 +4451,14 @@ xfs_bmap_read_extents(
 			 * any "older" data bmap btree records for a
 			 * set bit in the "extent flag" position.
 			 */
-			if (unlikely(xfs_check_nostate_extents(temp, num_recs))) {
+			if (unlikely(xfs_check_nostate_extents(ifp,
+					start, num_recs))) {
 				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
 						 XFS_ERRLEVEL_LOW,
 						 ip->i_mount);
 				goto error0;
 			}
 		}
-		i += num_recs;
 		xfs_trans_brelse(tp, bp);
 		bno = nextbno;
 		/*
@@ -4546,7 +4471,7 @@ xfs_bmap_read_extents(
 			return error;
 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
 	}
-	ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp));
+	ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
 	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
 	return 0;
@@ -4557,7 +4482,7 @@ error0:
 
 #ifdef XFS_BMAP_TRACE
 /*
- * Add bmap trace insert entries for all the contents of the extent list.
+ * Add bmap trace insert entries for all the contents of the extent records.
  */
 void
 xfs_bmap_trace_exlist(
@@ -4566,16 +4491,15 @@ xfs_bmap_trace_exlist(
 	xfs_extnum_t	cnt,		/* count of entries in the list */
 	int		whichfork)	/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;		/* base of extent list */
-	xfs_bmbt_rec_t	*ep;		/* current entry in extent list */
-	xfs_extnum_t	idx;		/* extent list entry number */
+	xfs_bmbt_rec_t	*ep;		/* current extent record */
+	xfs_extnum_t	idx;		/* extent record index */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_irec_t	s;		/* extent list record */
+	xfs_bmbt_irec_t	s;		/* file extent record */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(cnt == ifp->if_bytes / (uint)sizeof(*base));
-	base = ifp->if_u1.if_extents;
-	for (idx = 0, ep = base; idx < cnt; idx++, ep++) {
+	ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+	for (idx = 0; idx < cnt; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_get_all(ep, &s);
 		xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
 			whichfork);
@@ -4661,14 +4585,14 @@ xfs_bmapi(
 	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
 	xfs_fileoff_t	end;		/* end of mapped file region */
-	int		eof;		/* we've hit the end of extent list */
+	int		eof;		/* we've hit the end of extents */
 	char		contig;		/* allocation must be one extent */
 	char		delay;		/* this request is for delayed alloc */
 	char		exact;		/* don't do all of wasdelayed extent */
 	char		convert;	/* unwritten extent I/O completion */
-	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
 	int		error;		/* error return */
-	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_bmbt_irec_t	got;		/* current file extent record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extlen_t	indlen;		/* indirect blocks length */
 	xfs_extnum_t	lastx;		/* last useful extent number */
@@ -4680,7 +4604,7 @@ xfs_bmapi(
 	int		nallocs;	/* number of extents alloc\'d */
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	xfs_fileoff_t	obno;		/* old block number (offset) */
-	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+	xfs_bmbt_irec_t	prev;		/* previous file extent record */
 	int		tmp_logflags;	/* temp flags holder */
 	int		whichfork;	/* data or attr fork */
 	char		inhole;		/* current location is hole in file */
@@ -4805,7 +4729,7 @@ xfs_bmapi(
 				alen = (xfs_extlen_t)got.br_blockcount;
 				aoff = got.br_startoff;
 				if (lastx != NULLEXTNUM && lastx) {
-					ep = &ifp->if_u1.if_extents[lastx - 1];
+					ep = xfs_iext_get_ext(ifp, lastx - 1);
 					xfs_bmbt_get_all(ep, &prev);
 				}
 			} else if (wasdelay) {
@@ -5016,7 +4940,7 @@ xfs_bmapi(
 			if (error)
 				goto error0;
 			lastx = ifp->if_lastex;
-			ep = &ifp->if_u1.if_extents[lastx];
+			ep = xfs_iext_get_ext(ifp, lastx);
 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 			xfs_bmbt_get_all(ep, &got);
 			ASSERT(got.br_startoff <= aoff);
@@ -5112,7 +5036,7 @@ xfs_bmapi(
 			if (error)
 				goto error0;
 			lastx = ifp->if_lastex;
-			ep = &ifp->if_u1.if_extents[lastx];
+			ep = xfs_iext_get_ext(ifp, lastx);
 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 			xfs_bmbt_get_all(ep, &got);
 			/*
@@ -5168,8 +5092,7 @@ xfs_bmapi(
 		/*
 		 * Else go on to the next record.
 		 */
-		ep++;
-		lastx++;
+		ep = xfs_iext_get_ext(ifp, ++lastx);
 		if (lastx >= nextents) {
 			eof = 1;
 			prev = got;
@@ -5199,7 +5122,7 @@ xfs_bmapi(
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent list if we've converted to btree format.
+	 * logging the extent records if we've converted to btree format.
 	 */
 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
@@ -5252,12 +5175,12 @@ xfs_bmapi_single(
 	xfs_fsblock_t	*fsb,		/* output: mapped block */
 	xfs_fileoff_t	bno)		/* starting file offs. mapped */
 {
-	int		eof;		/* we've hit the end of extent list */
+	int		eof;		/* we've hit the end of extents */
 	int		error;		/* error return */
-	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_bmbt_irec_t	got;		/* current file extent record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extnum_t	lastx;		/* last useful extent number */
-	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+	xfs_bmbt_irec_t	prev;		/* previous file extent record */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (unlikely(
@@ -5312,18 +5235,18 @@ xfs_bunmapi(
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 	xfs_bmbt_irec_t		del;		/* extent being deleted */
 	int			eof;		/* is deleting at eof */
-	xfs_bmbt_rec_t		*ep;		/* extent list entry pointer */
+	xfs_bmbt_rec_t		*ep;		/* extent record pointer */
 	int			error;		/* error return value */
 	xfs_extnum_t		extno;		/* extent number in list */
-	xfs_bmbt_irec_t		got;		/* current extent list entry */
+	xfs_bmbt_irec_t		got;		/* current extent record */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	int			isrt;		/* freeing in rt area */
 	xfs_extnum_t		lastx;		/* last extent index used */
 	int			logflags;	/* transaction logging flags */
 	xfs_extlen_t		mod;		/* rt extent offset */
 	xfs_mount_t		*mp;		/* mount structure */
-	xfs_extnum_t		nextents;	/* size of extent list */
-	xfs_bmbt_irec_t		prev;		/* previous extent list entry */
+	xfs_extnum_t		nextents;	/* number of file extents */
+	xfs_bmbt_irec_t		prev;		/* previous extent record */
 	xfs_fileoff_t		start;		/* first file offset deleted */
 	int			tmp_logflags;	/* partial logging flags */
 	int			wasdel;		/* was a delayed alloc extent */
@@ -5369,7 +5292,7 @@ xfs_bunmapi(
 	 * file, back up to the last block if so...
 	 */
 	if (eof) {
-		ep = &ifp->if_u1.if_extents[--lastx];
+		ep = xfs_iext_get_ext(ifp, --lastx);
 		xfs_bmbt_get_all(ep, &got);
 		bno = got.br_startoff + got.br_blockcount - 1;
 	}
@@ -5393,7 +5316,7 @@ xfs_bunmapi(
 		if (got.br_startoff > bno) {
 			if (--lastx < 0)
 				break;
-			ep--;
+			ep = xfs_iext_get_ext(ifp, lastx);
 			xfs_bmbt_get_all(ep, &got);
 		}
 		/*
@@ -5440,7 +5363,8 @@ xfs_bunmapi(
 					del.br_blockcount : mod;
 				if (bno < got.br_startoff) {
 					if (--lastx >= 0)
-						xfs_bmbt_get_all(--ep, &got);
+						xfs_bmbt_get_all(xfs_iext_get_ext(
+							ifp, lastx), &got);
 				}
 				continue;
 			}
@@ -5500,7 +5424,8 @@ xfs_bunmapi(
 				 * try again.
 				 */
 				ASSERT(lastx > 0);
-				xfs_bmbt_get_all(ep - 1, &prev);
+				xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+						lastx - 1), &prev);
 				ASSERT(prev.br_state == XFS_EXT_NORM);
 				ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock));
 				ASSERT(del.br_startblock ==
@@ -5587,12 +5512,12 @@ nodelete:
 		 * If not done go on to the next (previous) record.
 		 * Reset ep in case the extents array was re-alloced.
 		 */
-		ep = &ifp->if_u1.if_extents[lastx];
+		ep = xfs_iext_get_ext(ifp, lastx);
 		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
 			if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
 			    xfs_bmbt_get_startoff(ep) > bno) {
-				lastx--;
-				ep--;
+				if (--lastx >= 0)
+					ep = xfs_iext_get_ext(ifp, lastx);
 			}
 			if (lastx >= 0)
 				xfs_bmbt_get_all(ep, &got);
@@ -5636,7 +5561,7 @@ nodelete:
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent list if we've converted to btree format.
+	 * logging the extent records if we've converted to btree format.
 	 */
 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
@@ -5892,9 +5817,9 @@ xfs_bmap_isaeof(
 {
 	int		error;		/* error return value */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
-	xfs_extnum_t	nextents;	/* size of extent list */
-	xfs_bmbt_irec_t	s;		/* expanded extent list entry */
+	xfs_bmbt_rec_t	*lastrec;	/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of file extents */
+	xfs_bmbt_irec_t	s;		/* expanded extent record */
 
 	ASSERT(whichfork == XFS_DATA_FORK);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -5909,7 +5834,7 @@ xfs_bmap_isaeof(
 	/*
 	 * Go to the last extent
 	 */
-	lastrec = &ifp->if_u1.if_extents[nextents - 1];
+	lastrec = xfs_iext_get_ext(ifp, nextents - 1);
 	xfs_bmbt_get_all(lastrec, &s);
 	/*
 	 * Check we are allocating in the last extent (for delayed allocations)
@@ -5936,8 +5861,8 @@ xfs_bmap_eof(
 	xfs_fsblock_t	blockcount;	/* extent block count */
 	int		error;		/* error return value */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
-	xfs_extnum_t	nextents;	/* size of extent list */
+	xfs_bmbt_rec_t	*lastrec;	/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of file extents */
 	xfs_fileoff_t	startoff;	/* extent starting file offset */
 
 	ASSERT(whichfork == XFS_DATA_FORK);
@@ -5953,7 +5878,7 @@ xfs_bmap_eof(
 	/*
 	 * Go to the last extent
 	 */
-	lastrec = &ifp->if_u1.if_extents[nextents - 1];
+	lastrec = xfs_iext_get_ext(ifp, nextents - 1);
 	startoff = xfs_bmbt_get_startoff(lastrec);
 	blockcount = xfs_bmbt_get_blockcount(lastrec);
 	*eof = endoff >= startoff + blockcount;
@@ -5969,18 +5894,21 @@ xfs_bmap_check_extents(
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	int			whichfork)	/* data or attr fork */
 {
-	xfs_bmbt_rec_t		*base;		/* base of extents list */
 	xfs_bmbt_rec_t		*ep;		/* current extent entry */
+	xfs_extnum_t		idx;		/* extent record index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_extnum_t		nextents;	/* number of extents in list */
+	xfs_bmbt_rec_t		*nextp;		/* next extent entry */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = base; ep < &base[nextents - 1]; ep++) {
+	ep = xfs_iext_get_ext(ifp, 0);
+	for (idx = 0; idx < nextents - 1; idx++) {
+		nextp = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
-			(void *)(ep + 1));
+			(void *)(nextp));
+		ep = nextp;
 	}
 }
 
@@ -6119,12 +6047,14 @@ xfs_bmap_check_leaf_extents(
 	xfs_fsblock_t		bno;	/* block # of "block" */
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
-	xfs_extnum_t		i=0;	/* index into the extents list */
+	xfs_extnum_t		i=0, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
-	xfs_bmbt_rec_t		*ep, *lastp;	/* extent pointers in block entry */
+	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
+	xfs_bmbt_rec_t		*lastp; /* pointer to previous extent */
+	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 	int			bp_release = 0;
 
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
@@ -6194,7 +6124,6 @@ xfs_bmap_check_leaf_extents(
 	 */
 	lastp = NULL;
 	for (;;) {
-		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
 
@@ -6213,18 +6142,20 @@ xfs_bmap_check_leaf_extents(
 		 * conform with the first entry in this one.
 		 */
 
-		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+		ep = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-
-		for (ep = frp;ep < frp + (num_recs - 1); ep++) {
+		for (j = 1; j < num_recs; j++) {
+			nextp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+				block, j + 1, mp->m_bmap_dmxr[0]);
 			if (lastp) {
 				xfs_btree_check_rec(XFS_BTNUM_BMAP,
 					(void *)lastp, (void *)ep);
 			}
 			xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
-				(void *)(ep + 1));
+				(void *)(nextp));
+			lastp = ep;
+			ep = nextp;
 		}
-		lastp = frp + num_recs - 1; /* For the next iteration */
 
 		i += num_recs;
 		if (bp_release) {
@@ -6288,7 +6219,7 @@ xfs_bmap_count_blocks(
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		if (unlikely(xfs_bmap_count_leaves(ifp->if_u1.if_extents,
+		if (unlikely(xfs_bmap_count_leaves(ifp, 0,
 			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
 			count) < 0)) {
 			XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
@@ -6310,7 +6241,7 @@ xfs_bmap_count_blocks(
 	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
 	bno = INT_GET(*pp, ARCH_CONVERT);
 
-	if (unlikely(xfs_bmap_count_tree(mp, tp, bno, level, count) < 0)) {
+	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
 		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
 				 mp);
 		return XFS_ERROR(EFSCORRUPTED);
@@ -6327,6 +6258,7 @@ int                                     /* error */
 xfs_bmap_count_tree(
 	xfs_mount_t     *mp,            /* file system mount point */
 	xfs_trans_t     *tp,            /* transaction pointer */
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_fsblock_t   blockno,	/* file system block number */
 	int             levelin,	/* level in btree */
 	int		*count)		/* Count of blocks */
@@ -6339,7 +6271,6 @@ xfs_bmap_count_tree(
 	xfs_fsblock_t		nextbno;
 	xfs_bmbt_block_t        *block, *nextblock;
 	int			numrecs;
-	xfs_bmbt_rec_t		*frp;
 
 	if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
 		return error;
@@ -6364,7 +6295,7 @@ xfs_bmap_count_tree(
 			xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
 		bno = INT_GET(*pp, ARCH_CONVERT);
 		if (unlikely((error =
-		     xfs_bmap_count_tree(mp, tp, bno, level, count)) < 0)) {
+		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
 			xfs_trans_brelse(tp, bp);
 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -6376,9 +6307,8 @@ xfs_bmap_count_tree(
 		for (;;) {
 			nextbno = be64_to_cpu(block->bb_rightsib);
 			numrecs = be16_to_cpu(block->bb_numrecs);
-			frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
-				xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
-			if (unlikely(xfs_bmap_disk_count_leaves(frp, numrecs, count) < 0)) {
+			if (unlikely(xfs_bmap_disk_count_leaves(ifp, mp,
+					0, block, numrecs, count) < 0)) {
 				xfs_trans_brelse(tp, bp);
 				XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
 						 XFS_ERRLEVEL_LOW, mp);
@@ -6399,33 +6329,45 @@ xfs_bmap_count_tree(
 }
 
 /*
- * Count leaf blocks given a pointer to an extent list.
+ * Count leaf blocks given a range of extent records.
  */
 int
 xfs_bmap_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	int			numrecs,
 	int			*count)
 {
 	int		b;
+	xfs_bmbt_rec_t	*frp;
 
-	for ( b = 1; b <= numrecs; b++, frp++)
+	for (b = 0; b < numrecs; b++) {
+		frp = xfs_iext_get_ext(ifp, idx + b);
 		*count += xfs_bmbt_get_blockcount(frp);
+	}
 	return 0;
 }
 
 /*
- * Count leaf blocks given a pointer to an extent list originally in btree format.
+ * Count leaf blocks given a range of extent records originally
+ * in btree format.
  */
 int
 xfs_bmap_disk_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_mount_t		*mp,
+	xfs_extnum_t		idx,
+	xfs_bmbt_block_t	*block,
 	int			numrecs,
 	int			*count)
 {
 	int		b;
+	xfs_bmbt_rec_t	*frp;
 
-	for ( b = 1; b <= numrecs; b++, frp++)
+	for (b = 1; b <= numrecs; b++) {
+		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
+			xfs_bmbt, block, idx + b, mp->m_bmap_dmxr[0]);
 		*count += xfs_bmbt_disk_get_blockcount(frp);
+	}
 	return 0;
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 12cc63dfc2c4..4c05f95452c2 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -20,6 +20,7 @@
 
 struct getbmap;
 struct xfs_bmbt_irec;
+struct xfs_ifork;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
@@ -347,7 +348,8 @@ xfs_bmap_count_blocks(
  */
 int
 xfs_check_nostate_extents(
-	xfs_bmbt_rec_t		*ep,
+	struct xfs_ifork	*ifp,
+	xfs_extnum_t		idx,
 	xfs_extnum_t		num);
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 3f1383d160e8..bea44709afbe 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2754,7 +2754,7 @@ xfs_bmbt_update(
 }
 
 /*
- * Check an extent list, which has just been read, for
+ * Check extent records, which have just been read, for
  * any bit in the extent flag field. ASSERT on debug
  * kernels, as this condition should not occur.
  * Return an error condition (1) if any flags found,
@@ -2763,10 +2763,14 @@ xfs_bmbt_update(
 
 int
 xfs_check_nostate_extents(
-	xfs_bmbt_rec_t		*ep,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	xfs_extnum_t		num)
 {
-	for (; num > 0; num--, ep++) {
+	xfs_bmbt_rec_t		*ep;
+
+	for (; num > 0; num--, idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		if ((ep->l0 >>
 		     (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
 			ASSERT(0);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1d7f5a7e063e..6459395a0e40 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -76,16 +76,18 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
  */
 STATIC void
 xfs_validate_extents(
-	xfs_bmbt_rec_t		*ep,
+	xfs_ifork_t		*ifp,
 	int			nrecs,
 	int			disk,
 	xfs_exntfmt_t		fmt)
 {
+	xfs_bmbt_rec_t		*ep;
 	xfs_bmbt_irec_t		irec;
 	xfs_bmbt_rec_t		rec;
 	int			i;
 
 	for (i = 0; i < nrecs; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		rec.l0 = get_unaligned((__uint64_t*)&ep->l0);
 		rec.l1 = get_unaligned((__uint64_t*)&ep->l1);
 		if (disk)
@@ -94,11 +96,10 @@ xfs_validate_extents(
 			xfs_bmbt_get_all(&rec, &irec);
 		if (fmt == XFS_EXTFMT_NOSTATE)
 			ASSERT(irec.br_state == XFS_EXT_NORM);
-		ep++;
 	}
 }
 #else /* DEBUG */
-#define xfs_validate_extents(ep, nrecs, disk, fmt)
+#define xfs_validate_extents(ifp, nrecs, disk, fmt)
 #endif /* DEBUG */
 
 /*
@@ -597,7 +598,6 @@ xfs_iformat_extents(
 	xfs_bmbt_rec_t	*ep, *dp;
 	xfs_ifork_t	*ifp;
 	int		nex;
-	int		real_size;
 	int		size;
 	int		i;
 
@@ -619,23 +619,20 @@ xfs_iformat_extents(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
-	real_size = 0;
+	ifp->if_real_bytes = 0;
 	if (nex == 0)
 		ifp->if_u1.if_extents = NULL;
 	else if (nex <= XFS_INLINE_EXTS)
 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-	else {
-		ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
-		ASSERT(ifp->if_u1.if_extents != NULL);
-		real_size = size;
-	}
+	else
+		xfs_iext_add(ifp, 0, nex);
+
 	ifp->if_bytes = size;
-	ifp->if_real_bytes = real_size;
 	if (size) {
 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
-		xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip));
-		ep = ifp->if_u1.if_extents;
-		for (i = 0; i < nex; i++, ep++, dp++) {
+		xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip));
+		for (i = 0; i < nex; i++, dp++) {
+			ep = xfs_iext_get_ext(ifp, i);
 			ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0),
 								ARCH_CONVERT);
 			ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
@@ -646,7 +643,7 @@ xfs_iformat_extents(
 		if (whichfork != XFS_DATA_FORK ||
 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
 				if (unlikely(xfs_check_nostate_extents(
-				    ifp->if_u1.if_extents, nex))) {
+				    ifp, 0, nex))) {
 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
 							 XFS_ERRLEVEL_LOW,
 							 ip->i_mount);
@@ -1015,6 +1012,7 @@ xfs_iread_extents(
 {
 	int		error;
 	xfs_ifork_t	*ifp;
+	xfs_extnum_t	nextents;
 	size_t		size;
 
 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
@@ -1022,26 +1020,24 @@ xfs_iread_extents(
 				 ip->i_mount);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
-	size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t);
+	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
+	size = nextents * sizeof(xfs_bmbt_rec_t);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
+
 	/*
 	 * We know that the size is valid (it's checked in iformat_btree)
 	 */
-	ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
-	ASSERT(ifp->if_u1.if_extents != NULL);
 	ifp->if_lastex = NULLEXTNUM;
-	ifp->if_bytes = ifp->if_real_bytes = (int)size;
+	ifp->if_bytes = ifp->if_real_bytes = 0;
 	ifp->if_flags |= XFS_IFEXTENTS;
+	xfs_iext_add(ifp, 0, nextents);
 	error = xfs_bmap_read_extents(tp, ip, whichfork);
 	if (error) {
-		kmem_free(ifp->if_u1.if_extents, size);
-		ifp->if_u1.if_extents = NULL;
-		ifp->if_bytes = ifp->if_real_bytes = 0;
+		xfs_iext_destroy(ifp);
 		ifp->if_flags &= ~XFS_IFEXTENTS;
 		return error;
 	}
-	xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents,
-		XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip));
 	return 0;
 }
 
@@ -2475,92 +2471,6 @@ xfs_iroot_realloc(
 }
 
 
-/*
- * This is called when the amount of space needed for if_extents
- * is increased or decreased.  The change in size is indicated by
- * the number of extents that need to be added or deleted in the
- * ext_diff parameter.
- *
- * If the amount of space needed has decreased below the size of the
- * inline buffer, then switch to using the inline buffer.  Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
- * to what is needed.
- *
- * ip -- the inode whose if_extents area is changing
- * ext_diff -- the change in the number of extents, positive or negative,
- *	 requested for the if_extents array.
- */
-void
-xfs_iext_realloc(
-	xfs_inode_t	*ip,
-	int		ext_diff,
-	int		whichfork)
-{
-	int		byte_diff;
-	xfs_ifork_t	*ifp;
-	int		new_size;
-	uint		rnew_size;
-
-	if (ext_diff == 0) {
-		return;
-	}
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t);
-	new_size = (int)ifp->if_bytes + byte_diff;
-	ASSERT(new_size >= 0);
-
-	if (new_size == 0) {
-		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
-			ASSERT(ifp->if_real_bytes != 0);
-			kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
-		}
-		ifp->if_u1.if_extents = NULL;
-		rnew_size = 0;
-	} else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) {
-		/*
-		 * If the valid extents can fit in if_inline_ext,
-		 * copy them from the malloc'd vector and free it.
-		 */
-		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
-			/*
-			 * For now, empty files are format EXTENTS,
-			 * so the if_extents pointer is null.
-			 */
-			if (ifp->if_u1.if_extents) {
-				memcpy(ifp->if_u2.if_inline_ext,
-					ifp->if_u1.if_extents, new_size);
-				kmem_free(ifp->if_u1.if_extents,
-					  ifp->if_real_bytes);
-			}
-			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-		}
-		rnew_size = 0;
-	} else {
-		rnew_size = new_size;
-		if ((rnew_size & (rnew_size - 1)) != 0)
-			rnew_size = xfs_iroundup(rnew_size);
-		/*
-		 * Stuck with malloc/realloc.
-		 */
-		if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
-			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
-				kmem_alloc(rnew_size, KM_SLEEP);
-			memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
-			      sizeof(ifp->if_u2.if_inline_ext));
-		} else if (rnew_size != ifp->if_real_bytes) {
-			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
-			  kmem_realloc(ifp->if_u1.if_extents,
-					rnew_size,
-					ifp->if_real_bytes,
-					KM_NOFS);
-		}
-	}
-	ifp->if_real_bytes = rnew_size;
-	ifp->if_bytes = new_size;
-}
-
-
 /*
  * This is called when the amount of space needed for if_data
  * is increased or decreased.  The change in size is indicated by
@@ -2723,9 +2633,7 @@ xfs_idestroy_fork(
 		   (ifp->if_u1.if_extents != NULL) &&
 		   (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
 		ASSERT(ifp->if_real_bytes != 0);
-		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
-		ifp->if_u1.if_extents = NULL;
-		ifp->if_real_bytes = 0;
+		xfs_iext_destroy(ifp);
 	}
 	ASSERT(ifp->if_u1.if_extents == NULL ||
 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
@@ -2902,16 +2810,15 @@ xfs_iextents_copy(
 	 * the delayed ones.  There must be at least one
 	 * non-delayed extent.
 	 */
-	ep = ifp->if_u1.if_extents;
 	dest_ep = buffer;
 	copied = 0;
 	for (i = 0; i < nrecs; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		start_block = xfs_bmbt_get_startblock(ep);
 		if (ISNULLSTARTBLOCK(start_block)) {
 			/*
 			 * It's a delayed allocation extent, so skip it.
 			 */
-			ep++;
 			continue;
 		}
 
@@ -2921,11 +2828,10 @@ xfs_iextents_copy(
 		put_unaligned(INT_GET(ep->l1, ARCH_CONVERT),
 			      (__uint64_t*)&dest_ep->l1);
 		dest_ep++;
-		ep++;
 		copied++;
 	}
 	ASSERT(copied != 0);
-	xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip));
 
 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
 }
@@ -2995,8 +2901,10 @@ xfs_iflush_fork(
 	case XFS_DINODE_FMT_EXTENTS:
 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
 		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
-		ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0));
-		ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0));
+		ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
+			(ifp->if_bytes == 0));
+		ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
+			(ifp->if_bytes > 0));
 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -3704,3 +3612,327 @@ xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
 		     NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
 }
 #endif
+
+/*
+ * Return a pointer to the extent record at file index idx.
+ */
+xfs_bmbt_rec_t *
+xfs_iext_get_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx)		/* index of target extent */
+{
+	ASSERT(idx >= 0);
+	if (ifp->if_bytes) {
+		return &ifp->if_u1.if_extents[idx];
+	} else {
+		return NULL;
+	}
+}
+
+/*
+ * Insert new item(s) into the extent records for incore inode
+ * fork 'ifp'.  'count' new items are inserted at index 'idx'.
+ */
+void
+xfs_iext_insert(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* starting index of new items */
+	xfs_extnum_t	count,		/* number of inserted items */
+	xfs_bmbt_irec_t	*new)		/* items to insert */
+{
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_extnum_t	i;		/* extent record index */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	xfs_iext_add(ifp, idx, count);
+	for (i = idx; i < idx + count; i++, new++) {
+		ep = xfs_iext_get_ext(ifp, i);
+		xfs_bmbt_set_all(ep, new);
+	}
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be increased. The ext_diff parameter stores the
+ * number of new extents being added and the idx parameter contains
+ * the extent index where the new extents will be added. If the new
+ * extents are being appended, then we just need to (re)allocate and
+ * initialize the space. Otherwise, if the new extents are being
+ * inserted into the middle of the existing entries, a bit more work
+ * is required to make room for the new extents to be inserted. The
+ * caller is responsible for filling in the new extent entries upon
+ * return.
+ */
+void
+xfs_iext_add(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin adding exts */
+	int		ext_diff)	/* nubmer of extents to add */
+{
+	int		byte_diff;	/* new bytes being added */
+	int		new_size;	/* size of extents after adding */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT((idx >= 0) && (idx <= nextents));
+	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+	new_size = ifp->if_bytes + byte_diff;
+	/*
+	 * If the new number of extents (nextents + ext_diff)
+	 * fits inside the inode, then continue to use the inline
+	 * extent buffer.
+	 */
+	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
+		if (idx < nextents) {
+			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
+				&ifp->if_u2.if_inline_ext[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
+		}
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		ifp->if_real_bytes = 0;
+	}
+	/*
+	 * Otherwise use a linear (direct) extent list.
+	 * If the extents are currently inside the inode,
+	 * xfs_iext_realloc_direct will switch us from
+	 * inline to direct extent allocation mode.
+	 */
+	else {
+		xfs_iext_realloc_direct(ifp, new_size);
+		if (idx < nextents) {
+			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
+				&ifp->if_u1.if_extents[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
+		}
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be decreased. The ext_diff parameter stores the
+ * number of extents to be removed and the idx parameter contains
+ * the extent index where the extents will be removed from.
+ */
+void
+xfs_iext_remove(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	ASSERT(ext_diff > 0);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (ifp->if_real_bytes) {
+		xfs_iext_remove_direct(ifp, idx, ext_diff);
+	} else {
+		xfs_iext_remove_inline(ifp, idx, ext_diff);
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This removes ext_diff extents from the inline buffer, beginning
+ * at extent index idx.
+ */
+void
+xfs_iext_remove_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	int		nextents;	/* number of extents in file */
+
+	ASSERT(idx < XFS_INLINE_EXTS);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(((nextents - ext_diff) > 0) &&
+		(nextents - ext_diff) < XFS_INLINE_EXTS);
+
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u2.if_inline_ext[idx],
+			&ifp->if_u2.if_inline_ext[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
+			0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	} else {
+		memset(&ifp->if_u2.if_inline_ext[idx], 0,
+			ext_diff * sizeof(xfs_bmbt_rec_t));
+	}
+}
+
+/*
+ * This removes ext_diff extents from a linear (direct) extent list,
+ * beginning at extent index idx. If the extents are being removed
+ * from the end of the list (ie. truncate) then we just need to re-
+ * allocate the list to remove the extra space. Otherwise, if the
+ * extents are being removed from the middle of the existing extent
+ * entries, then we first need to move the extent records beginning
+ * at idx + ext_diff up in the list to overwrite the records being
+ * removed, then remove the extra space via kmem_realloc.
+ */
+void
+xfs_iext_remove_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	new_size = ifp->if_bytes -
+		(ext_diff * sizeof(xfs_bmbt_rec_t));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+		return;
+	}
+	/* Move extents up in the list (if needed) */
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u1.if_extents[idx],
+			&ifp->if_u1.if_extents[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+	}
+	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
+		0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	/*
+	 * Reallocate the direct extent list. If the extents
+	 * will fit inside the inode then xfs_iext_realloc_direct
+	 * will switch from direct to inline extent allocation
+	 * mode for us.
+	 */
+	xfs_iext_realloc_direct(ifp, new_size);
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * Create, destroy, or resize a linear (direct) block of extents.
+ */
+void
+xfs_iext_realloc_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new size of extents */
+{
+	int		rnew_size;	/* real new size of extents */
+
+	rnew_size = new_size;
+
+	/* Free extent records */
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	}
+	/* Resize direct extent list and zero any new bytes */
+	else if (ifp->if_real_bytes) {
+		/* Check if extents will fit inside the inode */
+		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
+			xfs_iext_direct_to_inline(ifp, new_size /
+				(uint)sizeof(xfs_bmbt_rec_t));
+			ifp->if_bytes = new_size;
+			return;
+		}
+		if ((new_size & (new_size - 1)) != 0) {
+			rnew_size = xfs_iroundup(new_size);
+		}
+		if (rnew_size != ifp->if_real_bytes) {
+			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+				kmem_realloc(ifp->if_u1.if_extents,
+						rnew_size,
+						ifp->if_real_bytes,
+						KM_SLEEP);
+		}
+		if (rnew_size > ifp->if_real_bytes) {
+			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
+				(uint)sizeof(xfs_bmbt_rec_t)], 0,
+				rnew_size - ifp->if_real_bytes);
+		}
+	}
+	/*
+	 * Switch from the inline extent buffer to a direct
+	 * extent list. Be sure to include the inline extent
+	 * bytes in new_size.
+	 */
+	else {
+		new_size += ifp->if_bytes;
+		if ((new_size & (new_size - 1)) != 0) {
+			rnew_size = xfs_iroundup(new_size);
+		}
+		xfs_iext_inline_to_direct(ifp, rnew_size);
+	}
+	ifp->if_real_bytes = rnew_size;
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * Switch from linear (direct) extent records to inline buffer.
+ */
+void
+xfs_iext_direct_to_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	nextents)	/* number of extents in file */
+{
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	ASSERT(nextents <= XFS_INLINE_EXTS);
+	/*
+	 * The inline buffer was zeroed when we switched
+	 * from inline to direct extent allocation mode,
+	 * so we don't need to clear it here.
+	 */
+	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+		nextents * sizeof(xfs_bmbt_rec_t));
+	kmem_free(ifp->if_u1.if_extents, KM_SLEEP);
+	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	ifp->if_real_bytes = 0;
+}
+
+/*
+ * Switch from inline buffer to linear (direct) extent records.
+ * new_size should already be rounded up to the next power of 2
+ * by the caller (when appropriate), so use new_size as it is.
+ * However, since new_size may be rounded up, we can't update
+ * if_bytes here. It is the caller's responsibility to update
+ * if_bytes upon return.
+ */
+void
+xfs_iext_inline_to_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* number of extents in file */
+{
+	ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+		kmem_alloc(new_size, KM_SLEEP);
+	memset(ifp->if_u1.if_extents, 0, new_size);
+	if (ifp->if_bytes) {
+		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
+			ifp->if_bytes);
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_real_bytes = new_size;
+}
+
+/*
+ * Free incore file extents.
+ */
+void
+xfs_iext_destroy(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	if (ifp->if_real_bytes) {
+		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+	} else if (ifp->if_bytes) {
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_u1.if_extents = NULL;
+	ifp->if_real_bytes = 0;
+	ifp->if_bytes = 0;
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1cfbcf18ce86..740b73fabd2f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -70,12 +70,6 @@ typedef struct xfs_ifork {
  */
 #define	XFS_IMAP_LOOKUP		0x1
 
-/*
- * Maximum number of extent pointers in if_u1.if_extents.
- */
-#define	XFS_MAX_INCORE_EXTENTS	32768
-
-
 #ifdef __KERNEL__
 struct bhv_desc;
 struct cred;
@@ -440,6 +434,18 @@ xfs_inode_t	*xfs_vtoi(struct vnode *vp);
 
 void		xfs_synchronize_atime(xfs_inode_t *);
 
+xfs_bmbt_rec_t	*xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
+void		xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
+				xfs_bmbt_irec_t *);
+void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
+void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
+void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
+void		xfs_iext_destroy(xfs_ifork_t *);
+
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
 #ifdef DEBUG
-- 
cgit v1.2.3


From 0293ce3a9fd1b34c933a96577a8ba737b681cf75 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 13:30:23 +1100
Subject: [XFS] 929045 567344 This mod introduces multi-level in-core file
 extent functionality, building upon the new layout introduced in mod
 xfs-linux:xfs-kern:207390a.  The new multi-level extent allocations are only
 required for heavily fragmented files, so the old-style linear extent list is
 used on files until the extents reach a pre-determined size of 4k. 4k buffers
 are used because this is the system page size on Linux i386 and systems with
 larger page sizes don't seem to gain much, if anything, by using their native
 page size as the extent buffer size. Also, using 4k extent buffers everywhere
 provides a consistent interface for CXFS across different platforms.  The 4k
 extent buffers are managed by an indirection array (xfs_ext_irec_t) which is
 basically just a pointer array with a bit of extra information to keep track
 of the number of extents in each buffer as well as the extent offset of each
 buffer.  Major changes include:  - Add multi-level in-core file extent
 functionality to the xfs_iext_ subroutines introduced in mod:
 xfs-linux:xfs-kern:207390a  - Introduce 13 new subroutines which add
 functionality for multi-level   in-core file extents:	
 xfs_iext_add_indirect_multi() xfs_iext_remove_indirect()	  
 xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct()	     
 xfs_iext_bno_to_irec() xfs_iext_idx_to_irec()	       xfs_iext_irec_init()
 xfs_iext_irec_new()	    xfs_iext_irec_remove() xfs_iext_irec_compact() 
 xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full()	    
 xfs_iext_irec_update_extoffs()

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207393a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c       |  59 +++-
 fs/xfs/xfs_bmap.h       |  18 ++
 fs/xfs/xfs_bmap_btree.h |   8 -
 fs/xfs/xfs_inode.c      | 715 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_inode.h      |  52 +++-
 5 files changed, 826 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c47a181f87..81a95b684b6b 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3423,6 +3423,7 @@ xfs_bmap_local_to_extents(
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 		xfs_iext_add(ifp, 0, 1);
+		ASSERT((ifp->if_flags & (XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFEXTENTS);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
@@ -3551,6 +3552,54 @@ xfs_bmap_do_search_extents(
 	return ep;
 }
 
+/*
+ * Call xfs_bmap_do_search_extents() to search for the extent
+ * record containing block bno. If in multi-level in-core extent
+ * allocation mode, find and extract the target extent buffer,
+ * otherwise just use the direct extent list.
+ */
+xfs_bmbt_rec_t *			/* pointer to found extent entry */
+xfs_bmap_search_multi_extents(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number searched for */
+	int		*eofp,		/* out: end of file found */
+	xfs_extnum_t	*lastxp,	/* out: last extent index */
+	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
+	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
+{
+	xfs_bmbt_rec_t	*base;		/* base of extent records */
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	xfs_extnum_t	lastx;		/* last extent index */
+	xfs_extnum_t	nextents;	/* number of file extents */
+
+	/*
+	 * For multi-level extent allocation mode, find the
+	 * target extent list and pass only the contiguous
+	 * list to xfs_bmap_do_search_extents. Convert lastx
+	 * from a file extent index to an index within the
+	 * target extent list.
+	 */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		int	erp_idx = 0;
+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+		base = erp->er_extbuf;
+		nextents = erp->er_extcount;
+		lastx = ifp->if_lastex - erp->er_extoff;
+	} else {
+		base = &ifp->if_u1.if_extents[0];
+		nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+		lastx = ifp->if_lastex;
+	}
+	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno,
+					eofp, lastxp, gotp, prevp);
+	/* Convert lastx back to file-based index */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		*lastxp += erp->er_extoff;
+	}
+	return ep;
+}
+
 /*
  * Search the extents list for the inode, for the extent containing bno.
  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
@@ -3569,20 +3618,14 @@ xfs_bmap_search_extents(
 	xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
 {
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t  *base;          /* base of extent list */
-	xfs_extnum_t    lastx;          /* last extent index used */
-	xfs_extnum_t    nextents;       /* number of file extents */
 	xfs_bmbt_rec_t  *ep;            /* extent record pointer */
 	int		rt;		/* realtime flag    */
 
 	XFS_STATS_INC(xs_look_exlist);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	lastx = ifp->if_lastex;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
 
-	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
-					  lastxp, gotp, prevp);
+	ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
+
 	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
 	if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
                 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4c05f95452c2..011ccaa9a1c0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -352,6 +352,24 @@ xfs_check_nostate_extents(
 	xfs_extnum_t		idx,
 	xfs_extnum_t		num);
 
+/*
+ * Call xfs_bmap_do_search_extents() to search for the extent
+ * record containing block bno. If in multi-level in-core extent
+ * allocation mode, find and extract the target extent buffer,
+ * otherwise just use the direct extent list.
+ */
+xfs_bmbt_rec_t *
+xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
+			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+
+/*
+ * Search an extent list for the extent which includes block
+ * bno.
+ */
+xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
+			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
+			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index e095a2d344ae..6478cfa0e539 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -372,14 +372,6 @@ extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
 				xfs_exntst_t *, int *);
 #endif
 
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
-			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
-			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6459395a0e40..580fa0758039 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2630,8 +2630,9 @@ xfs_idestroy_fork(
 			ifp->if_real_bytes = 0;
 		}
 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
-		   (ifp->if_u1.if_extents != NULL) &&
-		   (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
+		   ((ifp->if_flags & XFS_IFEXTIREC) ||
+		    ((ifp->if_u1.if_extents != NULL) &&
+		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
 		ASSERT(ifp->if_real_bytes != 0);
 		xfs_iext_destroy(ifp);
 	}
@@ -3622,7 +3623,16 @@ xfs_iext_get_ext(
 	xfs_extnum_t	idx)		/* index of target extent */
 {
 	ASSERT(idx >= 0);
-	if (ifp->if_bytes) {
+	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+		return ifp->if_u1.if_ext_irec->er_extbuf;
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_ext_irec_t	*erp;		/* irec pointer */
+		int		erp_idx = 0;	/* irec index */
+		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
+
+		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
+		return &erp->er_extbuf[page_idx];
+	} else if (ifp->if_bytes) {
 		return &ifp->if_u1.if_extents[idx];
 	} else {
 		return NULL;
@@ -3691,6 +3701,7 @@ xfs_iext_add(
 		}
 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 		ifp->if_real_bytes = 0;
+		ifp->if_lastex = nextents + ext_diff;
 	}
 	/*
 	 * Otherwise use a linear (direct) extent list.
@@ -3698,7 +3709,7 @@ xfs_iext_add(
 	 * xfs_iext_realloc_direct will switch us from
 	 * inline to direct extent allocation mode.
 	 */
-	else {
+	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
 		xfs_iext_realloc_direct(ifp, new_size);
 		if (idx < nextents) {
 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
@@ -3707,14 +3718,182 @@ xfs_iext_add(
 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
 		}
 	}
+	/* Indirection array */
+	else {
+		xfs_ext_irec_t	*erp;
+		int		erp_idx = 0;
+		int		page_idx = idx;
+
+		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
+		if (ifp->if_flags & XFS_IFEXTIREC) {
+			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
+		} else {
+			xfs_iext_irec_init(ifp);
+			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+			erp = ifp->if_u1.if_ext_irec;
+		}
+		/* Extents fit in target extent page */
+		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
+			if (page_idx < erp->er_extcount) {
+				memmove(&erp->er_extbuf[page_idx + ext_diff],
+					&erp->er_extbuf[page_idx],
+					(erp->er_extcount - page_idx) *
+					sizeof(xfs_bmbt_rec_t));
+				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
+			}
+			erp->er_extcount += ext_diff;
+			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		}
+		/* Insert a new extent page */
+		else if (erp) {
+			xfs_iext_add_indirect_multi(ifp,
+				erp_idx, page_idx, ext_diff);
+		}
+		/*
+		 * If extent(s) are being appended to the last page in
+		 * the indirection array and the new extent(s) don't fit
+		 * in the page, then erp is NULL and erp_idx is set to
+		 * the next index needed in the indirection array.
+		 */
+		else {
+			int	count = ext_diff;
+
+			while (count) {
+				erp = xfs_iext_irec_new(ifp, erp_idx);
+				erp->er_extcount = count;
+				count -= MIN(count, (int)XFS_LINEAR_EXTS);
+				if (count) {
+					erp_idx++;
+				}
+			}
+		}
+	}
 	ifp->if_bytes = new_size;
 }
 
+/*
+ * This is called when incore extents are being added to the indirection
+ * array and the new extents do not fit in the target extent list. The
+ * erp_idx parameter contains the irec index for the target extent list
+ * in the indirection array, and the idx parameter contains the extent
+ * index within the list. The number of extents being added is stored
+ * in the count parameter.
+ *
+ *    |-------|   |-------|
+ *    |       |   |       |    idx - number of extents before idx
+ *    |  idx  |   | count |
+ *    |       |   |       |    count - number of extents being inserted at idx
+ *    |-------|   |-------|
+ *    | count |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_add_indirect_multi(
+	xfs_ifork_t	*ifp,			/* inode fork pointer */
+	int		erp_idx,		/* target extent irec index */
+	xfs_extnum_t	idx,			/* index within target list */
+	int		count)			/* new extents being added */
+{
+	int		byte_diff;		/* new bytes being added */
+	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
+	xfs_extnum_t	ext_diff;		/* number of extents to add */
+	xfs_extnum_t	ext_cnt;		/* new extents still needed */
+	xfs_extnum_t	nex2;			/* extents after idx + count */
+	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
+	int		nlists;			/* number of irec's (lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	nex2 = erp->er_extcount - idx;
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/*
+	 * Save second part of target extent list
+	 * (all extents past */
+	if (nex2) {
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
+		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
+		erp->er_extcount -= nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
+		memset(&erp->er_extbuf[idx], 0, byte_diff);
+	}
+
+	/*
+	 * Add the new extents to the end of the target
+	 * list, then allocate new irec record(s) and
+	 * extent buffer(s) as needed to store the rest
+	 * of the new extents.
+	 */
+	ext_cnt = count;
+	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
+	if (ext_diff) {
+		erp->er_extcount += ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+	while (ext_cnt) {
+		erp_idx++;
+		erp = xfs_iext_irec_new(ifp, erp_idx);
+		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
+		erp->er_extcount = ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+
+	/* Add nex2 extents back to indirection array */
+	if (nex2) {
+		xfs_extnum_t	ext_avail;
+		int		i;
+
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+		i = 0;
+		/*
+		 * If nex2 extents fit in the current page, append
+		 * nex2_ep after the new extents.
+		 */
+		if (nex2 <= ext_avail) {
+			i = erp->er_extcount;
+		}
+		/*
+		 * Otherwise, check if space is available in the
+		 * next page.
+		 */
+		else if ((erp_idx < nlists - 1) &&
+			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
+			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
+			erp_idx++;
+			erp++;
+			/* Create a hole for nex2 extents */
+			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
+				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
+		}
+		/*
+		 * Final choice, create a new extent page for
+		 * nex2 extents.
+		 */
+		else {
+			erp_idx++;
+			erp = xfs_iext_irec_new(ifp, erp_idx);
+		}
+		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
+		kmem_free(nex2_ep, byte_diff);
+		erp->er_extcount += nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
+	}
+}
+
 /*
  * This is called when the amount of space required for incore file
  * extents needs to be decreased. The ext_diff parameter stores the
  * number of extents to be removed and the idx parameter contains
  * the extent index where the extents will be removed from.
+ *
+ * If the amount of space needed has decreased below the linear
+ * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
+ * extent array.  Otherwise, use kmem_realloc() to adjust the
+ * size to what is needed.
  */
 void
 xfs_iext_remove(
@@ -3731,6 +3910,8 @@ xfs_iext_remove(
 
 	if (new_size == 0) {
 		xfs_iext_destroy(ifp);
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_iext_remove_indirect(ifp, idx, ext_diff);
 	} else if (ifp->if_real_bytes) {
 		xfs_iext_remove_direct(ifp, idx, ext_diff);
 	} else {
@@ -3751,6 +3932,7 @@ xfs_iext_remove_inline(
 {
 	int		nextents;	/* number of extents in file */
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
 	ASSERT(idx < XFS_INLINE_EXTS);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	ASSERT(((nextents - ext_diff) > 0) &&
@@ -3788,6 +3970,7 @@ xfs_iext_remove_direct(
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	int		new_size;	/* size of extents after removal */
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
 	new_size = ifp->if_bytes -
 		(ext_diff * sizeof(xfs_bmbt_rec_t));
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3815,6 +3998,84 @@ xfs_iext_remove_direct(
 	ifp->if_bytes = new_size;
 }
 
+/*
+ * This is called when incore extents are being removed from the
+ * indirection array and the extents being removed span multiple extent
+ * buffers. The idx parameter contains the file extent index where we
+ * want to begin removing extents, and the count parameter contains
+ * how many extents need to be removed.
+ *
+ *    |-------|   |-------|
+ *    | nex1  |   |       |    nex1 - number of extents before idx
+ *    |-------|   | count |
+ *    |       |   |       |    count - number of extents being removed at idx
+ *    | count |   |-------|
+ *    |       |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_remove_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing extents */
+	int		count)		/* number of extents to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		erp_idx = 0;	/* indirection array index */
+	xfs_extnum_t	ext_cnt;	/* extents left to remove */
+	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
+	xfs_extnum_t	nex1;		/* number of extents before idx */
+	xfs_extnum_t	nex2;		/* extents after idx + count */
+	int		nlists;		/* entries in indirecton array */
+	int		page_idx = idx;	/* index in target extent list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
+	ASSERT(erp != NULL);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	nex1 = page_idx;
+	ext_cnt = count;
+	while (ext_cnt) {
+		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
+		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
+		/*
+		 * Check for deletion of entire list;
+		 * xfs_iext_irec_remove() updates extent offsets.
+		 */
+		if (ext_diff == erp->er_extcount) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+			ext_cnt -= ext_diff;
+			nex1 = 0;
+			if (ext_cnt) {
+				ASSERT(erp_idx < ifp->if_real_bytes /
+					XFS_IEXT_BUFSZ);
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+				nex1 = 0;
+				continue;
+			} else {
+				break;
+			}
+		}
+		/* Move extents up (if needed) */
+		if (nex2) {
+			memmove(&erp->er_extbuf[nex1],
+				&erp->er_extbuf[nex1 + ext_diff],
+				nex2 * sizeof(xfs_bmbt_rec_t));
+		}
+		/* Zero out rest of page */
+		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
+			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
+		/* Update remaining counters */
+		erp->er_extcount -= ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
+		ext_cnt -= ext_diff;
+		nex1 = 0;
+		erp_idx++;
+		erp++;
+	}
+	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
+	xfs_iext_irec_compact(ifp);
+}
+
 /*
  * Create, destroy, or resize a linear (direct) block of extents.
  */
@@ -3827,6 +4088,10 @@ xfs_iext_realloc_direct(
 
 	rnew_size = new_size;
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
+		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
+		 (new_size != ifp->if_real_bytes)));
+
 	/* Free extent records */
 	if (new_size == 0) {
 		xfs_iext_destroy(ifp);
@@ -3919,6 +4184,60 @@ xfs_iext_inline_to_direct(
 	ifp->if_real_bytes = new_size;
 }
 
+/*
+ * Resize an extent indirection array to new_size bytes.
+ */
+void
+xfs_iext_realloc_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new indirection array size */
+{
+	int		nlists;		/* number of irec's (ex lists) */
+	int		size;		/* current indirection array size */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	size = nlists * sizeof(xfs_ext_irec_t);
+	ASSERT(ifp->if_real_bytes);
+	ASSERT((new_size >= 0) && (new_size != size));
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else {
+		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
+			kmem_realloc(ifp->if_u1.if_ext_irec,
+				new_size, size, KM_SLEEP);
+	}
+}
+
+/*
+ * Switch from indirection array to linear (direct) extent allocations.
+ */
+void
+xfs_iext_indirect_to_direct(
+	 xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		size;		/* size of file extents */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+	size = nextents * sizeof(xfs_bmbt_rec_t);
+
+	xfs_iext_irec_compact_full(ifp);
+	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
+
+	ep = ifp->if_u1.if_ext_irec->er_extbuf;
+	kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+	ifp->if_flags &= ~XFS_IFEXTIREC;
+	ifp->if_u1.if_extents = ep;
+	ifp->if_bytes = size;
+	if (nextents < XFS_LINEAR_EXTS) {
+		xfs_iext_realloc_direct(ifp, size);
+	}
+}
+
 /*
  * Free incore file extents.
  */
@@ -3926,7 +4245,16 @@ void
 xfs_iext_destroy(
 	xfs_ifork_t	*ifp)		/* inode fork pointer */
 {
-	if (ifp->if_real_bytes) {
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		int	erp_idx;
+		int	nlists;
+
+		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+		}
+		ifp->if_flags &= ~XFS_IFEXTIREC;
+	} else if (ifp->if_real_bytes) {
 		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
 	} else if (ifp->if_bytes) {
 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
@@ -3936,3 +4264,380 @@ xfs_iext_destroy(
 	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 }
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record for filesystem block bno. Store the index of the
+ * target irec in *erp_idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_bno_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	int		*erp_idxp)	/* irec index of target ext list */
+{
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
+	xfs_extnum_t	erp_idx;	/* indirection array index */
+	int		nlists;		/* number of extent irec's (lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
+		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
+			high = erp_idx - 1;
+		} else if (erp_next && bno >=
+			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
+			low = erp_idx + 1;
+		} else {
+			break;
+		}
+	}
+	*erp_idxp = erp_idx;
+	return erp;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record at file extent index *idxp. Store the index of the
+ * target irec in *erp_idxp and store the page index of the target
+ * extent record in *idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_idx_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
+	int		*erp_idxp,	/* pointer to target irec */
+	int		realloc)	/* new bytes were just added */
+{
+	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
+	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
+	int		erp_idx;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	ASSERT(page_idx >= 0 && page_idx <=
+		ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+
+	/* Binary search extent irec's */
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		prev = erp_idx > 0 ? erp - 1 : NULL;
+		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
+		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
+			high = erp_idx - 1;
+		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
+			   (page_idx == erp->er_extoff + erp->er_extcount &&
+			    !realloc)) {
+			low = erp_idx + 1;
+		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
+			   erp->er_extcount == XFS_LINEAR_EXTS) {
+			ASSERT(realloc);
+			page_idx = 0;
+			erp_idx++;
+			erp = erp_idx < nlists ? erp + 1 : NULL;
+			break;
+		} else {
+			page_idx -= erp->er_extoff;
+			break;
+		}
+	}
+	*idxp = page_idx;
+	*erp_idxp = erp_idx;
+	return(erp);
+}
+
+/*
+ * Allocate and initialize an indirection array once the space needed
+ * for incore extents increases above XFS_IEXT_BUFSZ.
+ */
+void
+xfs_iext_irec_init(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+
+	erp = (xfs_ext_irec_t *)
+		kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
+
+	if (nextents == 0) {
+		ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+			kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+	} else if (!ifp->if_real_bytes) {
+		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
+	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
+		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
+	}
+	erp->er_extbuf = ifp->if_u1.if_extents;
+	erp->er_extcount = nextents;
+	erp->er_extoff = 0;
+
+	ifp->if_flags |= XFS_IFEXTIREC;
+	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
+	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
+	ifp->if_u1.if_ext_irec = erp;
+
+	return;
+}
+
+/*
+ * Allocate and initialize a new entry in the indirection array.
+ */
+xfs_ext_irec_t *
+xfs_iext_irec_new(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* index for new irec */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/* Resize indirection array */
+	xfs_iext_realloc_indirect(ifp, ++nlists *
+				  sizeof(xfs_ext_irec_t));
+	/*
+	 * Move records down in the array so the
+	 * new page can use erp_idx.
+	 */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = nlists - 1; i > erp_idx; i--) {
+		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
+	}
+	ASSERT(i == erp_idx);
+
+	/* Initialize new extent record */
+	erp = ifp->if_u1.if_ext_irec;
+	erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *)
+		kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
+	erp[erp_idx].er_extcount = 0;
+	erp[erp_idx].er_extoff = erp_idx > 0 ?
+		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
+	return (&erp[erp_idx]);
+}
+
+/*
+ * Remove a record from the indirection array.
+ */
+void
+xfs_iext_irec_remove(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* irec index to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	if (erp->er_extbuf) {
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
+			-erp->er_extcount);
+		kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+	}
+	/* Compact extent records */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = erp_idx; i < nlists - 1; i++) {
+		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
+	}
+	/*
+	 * Manually free the last extent record from the indirection
+	 * array.  A call to xfs_iext_realloc_indirect() with a size
+	 * of zero would result in a call to xfs_iext_destroy() which
+	 * would in turn call this function again, creating a nasty
+	 * infinite loop.
+	 */
+	if (--nlists) {
+		xfs_iext_realloc_indirect(ifp,
+			nlists * sizeof(xfs_ext_irec_t));
+	} else {
+		kmem_free(ifp->if_u1.if_ext_irec,
+			sizeof(xfs_ext_irec_t));
+	}
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+}
+
+/*
+ * This is called to clean up large amounts of unused memory allocated
+ * by the indirection array.  Before compacting anything though, verify
+ * that the indirection array is still needed and switch back to the
+ * linear extent list (or even the inline buffer) if possible.  The
+ * compaction policy is as follows:
+ *
+ *    Full Compaction: Extents fit into a single page (or inline buffer)
+ *    Full Compaction: Extents occupy less than 10% of allocated space
+ * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ *      No Compaction: Extents occupy at least 50% of allocated space
+ */
+void
+xfs_iext_irec_compact(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (nextents == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (nextents <= XFS_INLINE_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+		xfs_iext_direct_to_inline(ifp, nextents);
+	} else if (nextents <= XFS_LINEAR_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
+		xfs_iext_irec_compact_full(ifp);
+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
+		xfs_iext_irec_compact_pages(ifp);
+	}
+}
+
+/*
+ * Combine extents from neighboring extent pages.
+ */
+void
+xfs_iext_irec_compact_pages(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
+	int		erp_idx = 0;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	while (erp_idx < nlists - 1) {
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp + 1;
+		if (erp_next->er_extcount <=
+		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
+			memmove(&erp->er_extbuf[erp->er_extcount],
+				erp_next->er_extbuf, erp_next->er_extcount *
+				sizeof(xfs_bmbt_rec_t));
+			erp->er_extcount += erp_next->er_extcount;
+			/*
+			 * Free page before removing extent record
+			 * so er_extoffs don't get modified in
+			 * xfs_iext_irec_remove.
+			 */
+			kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+			erp_next->er_extbuf = NULL;
+			xfs_iext_irec_remove(ifp, erp_idx + 1);
+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		} else {
+			erp_idx++;
+		}
+	}
+}
+
+/*
+ * Fully compact the extent records managed by the indirection array.
+ */
+void
+xfs_iext_irec_compact_full(
+	xfs_ifork_t	*ifp)			/* inode fork pointer */
+{
+	xfs_bmbt_rec_t	*ep, *ep_next;		/* extent record pointers */
+	xfs_ext_irec_t	*erp, *erp_next;	/* extent irec pointers */
+	int		erp_idx = 0;		/* extent irec index */
+	int		ext_avail;		/* empty entries in ex list */
+	int		ext_diff;		/* number of exts to add */
+	int		nlists;			/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp = ifp->if_u1.if_ext_irec;
+	ep = &erp->er_extbuf[erp->er_extcount];
+	erp_next = erp + 1;
+	ep_next = erp_next->er_extbuf;
+	while (erp_idx < nlists - 1) {
+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+		ext_diff = MIN(ext_avail, erp_next->er_extcount);
+		memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
+		erp->er_extcount += ext_diff;
+		erp_next->er_extcount -= ext_diff;
+		/* Remove next page */
+		if (erp_next->er_extcount == 0) {
+			/*
+			 * Free page before removing extent record
+			 * so er_extoffs don't get modified in
+			 * xfs_iext_irec_remove.
+			 */
+			kmem_free(erp_next->er_extbuf,
+				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+			erp_next->er_extbuf = NULL;
+			xfs_iext_irec_remove(ifp, erp_idx + 1);
+			erp = &ifp->if_u1.if_ext_irec[erp_idx];
+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		/* Update next page */
+		} else {
+			/* Move rest of page up to become next new page */
+			memmove(erp_next->er_extbuf, ep_next,
+				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+			ep_next = erp_next->er_extbuf;
+			memset(&ep_next[erp_next->er_extcount], 0,
+				(XFS_LINEAR_EXTS - erp_next->er_extcount) *
+				sizeof(xfs_bmbt_rec_t));
+		}
+		if (erp->er_extcount == XFS_LINEAR_EXTS) {
+			erp_idx++;
+			if (erp_idx < nlists)
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+			else
+				break;
+		}
+		ep = &erp->er_extbuf[erp->er_extcount];
+		erp_next = erp + 1;
+		ep_next = erp_next->er_extbuf;
+	}
+}
+
+/*
+ * This is called to update the er_extoff field in the indirection
+ * array when extents have been added or removed from one of the
+ * extent lists. erp_idx contains the irec index to begin updating
+ * at and ext_diff contains the number of extents that were added
+ * or removed.
+ */
+void
+xfs_iext_irec_update_extoffs(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx,	/* irec index to update */
+	int		ext_diff)	/* number of new extents */
+{
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	for (i = erp_idx; i < nlists; i++) {
+		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
+	}
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 740b73fabd2f..3c1df1d642fa 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -24,11 +24,38 @@
 #define	XFS_DATA_FORK	0
 #define	XFS_ATTR_FORK	1
 
+/*
+ * The following xfs_ext_irec_t struct introduces a second (top) level
+ * to the in-core extent allocation scheme. These structs are allocated
+ * in a contiguous block, creating an indirection array where each entry
+ * (irec) contains a pointer to a buffer of in-core extent records which
+ * it manages. Each extent buffer is 4k in size, since 4k is the system
+ * page size on Linux i386 and systems with larger page sizes don't seem
+ * to gain much, if anything, by using their native page size as the
+ * extent buffer size. Also, using 4k extent buffers everywhere provides
+ * a consistent interface for CXFS across different platforms.
+ *
+ * There is currently no limit on the number of irec's (extent lists)
+ * allowed, so heavily fragmented files may require an indirection array
+ * which spans multiple system pages of memory. The number of extents
+ * which would require this amount of contiguous memory is very large
+ * and should not cause problems in the foreseeable future. However,
+ * if the memory needed for the contiguous array ever becomes a problem,
+ * it is possible that a third level of indirection may be required.
+ */
+typedef struct xfs_ext_irec {
+	xfs_bmbt_rec_t	*er_extbuf;	/* block of extent records */
+	xfs_extnum_t	er_extoff;	/* extent offset in file */
+	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
+} xfs_ext_irec_t;
+
 /*
  * File incore extent information, present for each of data & attr forks.
  */
-#define	XFS_INLINE_EXTS	2
-#define	XFS_INLINE_DATA	32
+#define	XFS_IEXT_BUFSZ		4096
+#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
+#define	XFS_INLINE_EXTS		2
+#define	XFS_INLINE_DATA		32
 typedef struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
 	int			if_real_bytes;	/* bytes allocated in if_u1 */
@@ -39,6 +66,7 @@ typedef struct xfs_ifork {
 	xfs_extnum_t		if_lastex;	/* last if_extents used */
 	union {
 		xfs_bmbt_rec_t	*if_extents;	/* linear map file exts */
+		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
 		char		*if_data;	/* inline file data */
 	} if_u1;
 	union {
@@ -61,9 +89,10 @@ typedef struct xfs_ifork {
 /*
  * Per-fork incore inode flags.
  */
-#define	XFS_IFINLINE	0x0001	/* Inline data is read in */
-#define	XFS_IFEXTENTS	0x0002	/* All extent pointers are read in */
-#define	XFS_IFBROOT	0x0004	/* i_broot points to the bmap b-tree root */
+#define	XFS_IFINLINE	0x01	/* Inline data is read in */
+#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
+#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
+#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
 
 /*
  * Flags for xfs_imap() and xfs_dilocate().
@@ -438,13 +467,26 @@ xfs_bmbt_rec_t	*xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
 				xfs_bmbt_irec_t *);
 void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
 void		xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
+void		xfs_iext_realloc_indirect(xfs_ifork_t *, int);
+void		xfs_iext_indirect_to_direct(xfs_ifork_t *);
 void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
 void		xfs_iext_destroy(xfs_ifork_t *);
+xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
+xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
+void		xfs_iext_irec_init(xfs_ifork_t *);
+xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
+void		xfs_iext_irec_remove(xfs_ifork_t *, int);
+void		xfs_iext_irec_compact(xfs_ifork_t *);
+void		xfs_iext_irec_compact_pages(xfs_ifork_t *);
+void		xfs_iext_irec_compact_full(xfs_ifork_t *);
+void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
 
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
-- 
cgit v1.2.3


From 1f6553f9f9b6e41375c605769a75bd1646685a1b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:30:48 +1100
Subject: [XFS] Dynamically allocate local kiocb structures in readv/writev
 routines to reduce stack footprint.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25358a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ced4404339c7..269995ddfbdf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -145,17 +145,22 @@ __linvfs_readv(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct		kiocb kiocb;
+	struct kiocb	*kiocb;
 	ssize_t		rval;
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
+	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
+	if (unlikely(!kiocb))
+		return -ENOMEM;
+
+	init_sync_kiocb(kiocb, file);
+	kiocb->ki_pos = *ppos;
 
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	VOP_READ(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb.ki_pos;
+	*ppos = kiocb->ki_pos;
+	kfree(kiocb);
 	return rval;
 }
 
@@ -190,17 +195,22 @@ __linvfs_writev(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct		kiocb kiocb;
+	struct kiocb	*kiocb;
 	ssize_t		rval;
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
+	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
+	if (unlikely(!kiocb))
+		return -ENOMEM;
+
+	init_sync_kiocb(kiocb, file);
+	kiocb->ki_pos = *ppos;
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 
-	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	VOP_WRITE(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb.ki_pos;
+	*ppos = kiocb->ki_pos;
+	kfree(kiocb);
 	return rval;
 }
 
@@ -435,7 +445,7 @@ linvfs_ioctl(
 	unsigned long	arg)
 {
 	int		error;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 
 	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
@@ -457,7 +467,7 @@ linvfs_ioctl_invis(
 	unsigned long	arg)
 {
 	int		error;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 
 	ASSERT(vp);
-- 
cgit v1.2.3


From f6d75cbed997dffb41e3d473bd4c0f899abc3776 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:24 +1100
Subject: [XFS] Dynamically allocate xfs_dir2_put_args_t structure to reduce
 stack pressure in xfs_dir2_leaf_getdents routine.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25359a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_leaf.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index d342b6b55239..e4e07c8f7a76 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -766,7 +766,7 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 	int			eof;		/* reached end of directory */
-	int			error=0;		/* error return value */
+	int			error = 0;	/* error return value */
 	int			i;		/* temporary loop index */
 	int			j;		/* temporary loop index */
 	int			length;		/* temporary length value */
@@ -778,8 +778,8 @@ xfs_dir2_leaf_getdents(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
 	int			nmap;		/* mappings to ask xfs_bmapi */
-	xfs_dir2_put_args_t	p;		/* formatting arg bundle */
-	char			*ptr=NULL;		/* pointer to current data */
+	xfs_dir2_put_args_t	*p;		/* formatting arg bundle */
+	char			*ptr = NULL;	/* pointer to current data */
 	int			ra_current;	/* number of read-ahead blks */
 	int			ra_index;	/* *map index for read-ahead */
 	int			ra_offset;	/* map entry offset for ra */
@@ -797,9 +797,10 @@ xfs_dir2_leaf_getdents(
 	/*
 	 * Setup formatting arguments.
 	 */
-	p.dbp = dbp;
-	p.put = put;
-	p.uio = uio;
+	p = kmem_alloc(sizeof(*p), KM_SLEEP);
+	p->dbp = dbp;
+	p->put = put;
+	p->uio = uio;
 	/*
 	 * Set up to bmap a number of blocks based on the caller's
 	 * buffer size, the directory block size, and the filesystem
@@ -1092,24 +1093,24 @@ xfs_dir2_leaf_getdents(
 		 */
 		dep = (xfs_dir2_data_entry_t *)ptr;
 
-		p.namelen = dep->namelen;
+		p->namelen = dep->namelen;
 
-		length = XFS_DIR2_DATA_ENTSIZE(p.namelen);
+		length = XFS_DIR2_DATA_ENTSIZE(p->namelen);
 
-		p.cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
+		p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
 
-		p.ino = INT_GET(dep->inumber, ARCH_CONVERT);
+		p->ino = INT_GET(dep->inumber, ARCH_CONVERT);
 #if XFS_BIG_INUMS
-		p.ino += mp->m_inoadd;
+		p->ino += mp->m_inoadd;
 #endif
-		p.name = (char *)dep->name;
+		p->name = (char *)dep->name;
 
-		error = p.put(&p);
+		error = p->put(p);
 
 		/*
 		 * Won't fit.  Return to caller.
 		 */
-		if (!p.done) {
+		if (!p->done) {
 			eof = 0;
 			break;
 		}
@@ -1129,6 +1130,7 @@ xfs_dir2_leaf_getdents(
 	else
 		uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
 	kmem_free(map, map_size * sizeof(*map));
+	kmem_free(p, sizeof(*p));
 	if (bp)
 		xfs_da_brelse(tp, bp);
 	return error;
-- 
cgit v1.2.3


From 8f79405527b50fe27cffcb7081890b5c68439b4f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:41 +1100
Subject: [XFS] Reduce complexity in xfs_trans_init by pushing complex macros
 out into functions and hence reduce the stack footprint there.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25360a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_trans.c | 187 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d3d714e6b32a..6a2a1e07505b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -55,9 +55,140 @@ STATIC void	xfs_trans_committed(xfs_trans_t *, int);
 STATIC void	xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
 STATIC void	xfs_trans_free(xfs_trans_t *);
 
-kmem_zone_t		*xfs_trans_zone;
+kmem_zone_t	*xfs_trans_zone;
 
 
+/*
+ * Reservation functions here avoid a huge stack in xfs_trans_init
+ * due to register overflow from temporaries in the calculations.
+ */
+
+STATIC uint
+xfs_calc_write_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_rename_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_link_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_LINK_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_remove_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_symlink_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_create_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_ifree_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_ichange_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_growdata_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWDATA_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_swrite_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_SWRITE_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_writeid_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_WRITEID_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_addafork_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_attrset_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+}
+
 /*
  * Initialize the precomputed transaction reservation values
  * in the mount structure.
@@ -69,39 +200,27 @@ xfs_trans_init(
 	xfs_trans_reservations_t	*resp;
 
 	resp = &(mp->m_reservations);
-	resp->tr_write =
-		(uint)(XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_itruncate =
-		(uint)(XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_rename =
-		(uint)(XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_link = (uint)XFS_CALC_LINK_LOG_RES(mp);
-	resp->tr_remove =
-		(uint)(XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_symlink =
-		(uint)(XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_create =
-		(uint)(XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_mkdir =
-		(uint)(XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_ifree =
-		(uint)(XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_ichange =
-		(uint)(XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_growdata = (uint)XFS_CALC_GROWDATA_LOG_RES(mp);
-	resp->tr_swrite = (uint)XFS_CALC_SWRITE_LOG_RES(mp);
-	resp->tr_writeid = (uint)XFS_CALC_WRITEID_LOG_RES(mp);
-	resp->tr_addafork =
-		(uint)(XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_attrinval = (uint)XFS_CALC_ATTRINVAL_LOG_RES(mp);
-	resp->tr_attrset =
-		(uint)(XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_attrrm =
-		(uint)(XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_clearagi = (uint)XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
-	resp->tr_growrtalloc = (uint)XFS_CALC_GROWRTALLOC_LOG_RES(mp);
-	resp->tr_growrtzero = (uint)XFS_CALC_GROWRTZERO_LOG_RES(mp);
-	resp->tr_growrtfree = (uint)XFS_CALC_GROWRTFREE_LOG_RES(mp);
+	resp->tr_write = xfs_calc_write_reservation(mp);
+	resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
+	resp->tr_rename = xfs_calc_rename_reservation(mp);
+	resp->tr_link = xfs_calc_link_reservation(mp);
+	resp->tr_remove = xfs_calc_remove_reservation(mp);
+	resp->tr_symlink = xfs_calc_symlink_reservation(mp);
+	resp->tr_create = xfs_calc_create_reservation(mp);
+	resp->tr_mkdir = xfs_calc_mkdir_reservation(mp);
+	resp->tr_ifree = xfs_calc_ifree_reservation(mp);
+	resp->tr_ichange = xfs_calc_ichange_reservation(mp);
+	resp->tr_growdata = xfs_calc_growdata_reservation(mp);
+	resp->tr_swrite = xfs_calc_swrite_reservation(mp);
+	resp->tr_writeid = xfs_calc_writeid_reservation(mp);
+	resp->tr_addafork = xfs_calc_addafork_reservation(mp);
+	resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
+	resp->tr_attrset = xfs_calc_attrset_reservation(mp);
+	resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
+	resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
+	resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
+	resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
+	resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
 }
 
 /*
-- 
cgit v1.2.3


From 9b94c2eddf407ad8faa5672ffa691e2076167564 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:54 +1100
Subject: [XFS] Take a dentry structure off the stack into the data segment.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25361a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 821bd12dd858..53ed99112365 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -25,6 +25,8 @@
 #include "xfs_mount.h"
 #include "xfs_export.h"
 
+STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
+
 /*
  * XFS encodes and decodes the fileid portion of NFS filehandles
  * itself instead of letting the generic NFS code do it.  This
@@ -160,11 +162,6 @@ linvfs_get_parent(
 	int			error;
 	vnode_t			*vp, *cvp;
 	struct dentry		*parent;
-	struct dentry		dotdot;
-
-	dotdot.d_name.name = "..";
-	dotdot.d_name.len = 2;
-	dotdot.d_inode = NULL;
 
 	cvp = NULL;
 	vp = LINVFS_GET_VP(child->d_inode);
-- 
cgit v1.2.3


From 220b5284139be6ecbc39b353fd76f0923eccc3d6 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:33:36 +1100
Subject: [XFS] Dynamically allocate vattr in places it makes sense to do so,
 to reduce stack use.  Also re-use vattr in some places so that multiple
 copies are not held on-stack.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25369a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c  |  13 +++-
 fs/xfs/linux-2.6/xfs_ioctl.c | 128 +++++++++++++++++-------------
 fs/xfs/linux-2.6/xfs_iops.c  | 180 +++++++++++++++++++++++++------------------
 fs/xfs/linux-2.6/xfs_vnode.c |  29 ++++---
 fs/xfs/linux-2.6/xfs_vnode.h |   1 +
 5 files changed, 209 insertions(+), 142 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 269995ddfbdf..ce8fe40e1628 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -420,7 +420,7 @@ linvfs_file_mmap(
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		va = { .va_mask = XFS_AT_UPDATIME };
+	vattr_t		*vattr;
 	int		error;
 
 	vma->vm_ops = &linvfs_file_vm_ops;
@@ -431,9 +431,14 @@ linvfs_file_mmap(
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
-	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
-	if (!error)
-		vn_revalidate(vp);	/* update Linux inode flags */
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+	vattr->va_mask = XFS_AT_UPDATIME;
+	VOP_SETATTR(vp, vattr, XFS_AT_UPDATIME, NULL, error);
+	if (likely(!error))
+		__vn_revalidate(vp, vattr);	/* update flags */
+	kfree(vattr);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4db47790415c..f182721ec9a2 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1160,105 +1160,129 @@ xfs_ioc_xattr(
 	void			__user *arg)
 {
 	struct fsxattr		fa;
-	vattr_t			va;
-	int			error;
+	struct vattr		*vattr;
+	int			error = 0;
 	int			attr_flags;
 	unsigned int		flags;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	switch (cmd) {
 	case XFS_IOC_FSGETXATTR: {
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
-			     XFS_AT_NEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, &va, 0, NULL, error);
-		if (error)
-			return -error;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+				 XFS_AT_NEXTENTS | XFS_AT_PROJID;
+		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		if (unlikely(error)) {
+			error = -error;
+			break;
+		}
 
-		fa.fsx_xflags	= va.va_xflags;
-		fa.fsx_extsize	= va.va_extsize;
-		fa.fsx_nextents = va.va_nextents;
-		fa.fsx_projid	= va.va_projid;
+		fa.fsx_xflags	= vattr->va_xflags;
+		fa.fsx_extsize	= vattr->va_extsize;
+		fa.fsx_nextents = vattr->va_nextents;
+		fa.fsx_projid	= vattr->va_projid;
 
-		if (copy_to_user(arg, &fa, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+		if (copy_to_user(arg, &fa, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
+		break;
 	}
 
 	case XFS_IOC_FSSETXATTR: {
-		if (copy_from_user(&fa, arg, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
+		if (copy_from_user(&fa, arg, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
 
 		attr_flags = 0;
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
-		va.va_xflags  = fa.fsx_xflags;
-		va.va_extsize = fa.fsx_extsize;
-		va.va_projid  = fa.fsx_projid;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
+		vattr->va_xflags  = fa.fsx_xflags;
+		vattr->va_extsize = fa.fsx_extsize;
+		vattr->va_projid  = fa.fsx_projid;
 
-		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
-		if (!error)
-			vn_revalidate(vp);	/* update Linux inode flags */
-		return -error;
+		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		if (likely(!error))
+			__vn_revalidate(vp, vattr);	/* update flags */
+		error = -error;
+		break;
 	}
 
 	case XFS_IOC_FSGETXATTRA: {
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
-			     XFS_AT_ANEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, &va, 0, NULL, error);
-		if (error)
-			return -error;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+				 XFS_AT_ANEXTENTS | XFS_AT_PROJID;
+		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		if (unlikely(error)) {
+			error = -error;
+			break;
+		}
 
-		fa.fsx_xflags	= va.va_xflags;
-		fa.fsx_extsize	= va.va_extsize;
-		fa.fsx_nextents = va.va_anextents;
-		fa.fsx_projid	= va.va_projid;
+		fa.fsx_xflags	= vattr->va_xflags;
+		fa.fsx_extsize	= vattr->va_extsize;
+		fa.fsx_nextents = vattr->va_anextents;
+		fa.fsx_projid	= vattr->va_projid;
 
-		if (copy_to_user(arg, &fa, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+		if (copy_to_user(arg, &fa, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
+		break;
 	}
 
 	case XFS_IOC_GETXFLAGS: {
 		flags = xfs_di2lxflags(ip->i_d.di_flags);
 		if (copy_to_user(arg, &flags, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+			error = -EFAULT;
+		break;
 	}
 
 	case XFS_IOC_SETXFLAGS: {
-		if (copy_from_user(&flags, arg, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
+		if (copy_from_user(&flags, arg, sizeof(flags))) {
+			error = -EFAULT;
+			break;
+		}
 
 		if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
 			      LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
-			      LINUX_XFLAG_SYNC))
-			return -XFS_ERROR(EOPNOTSUPP);
+			      LINUX_XFLAG_SYNC)) {
+			error = -EOPNOTSUPP;
+			break;
+		}
 
 		attr_flags = 0;
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS;
-		va.va_xflags = xfs_merge_ioc_xflags(flags,
-				xfs_ip2xflags(ip));
+		vattr->va_mask = XFS_AT_XFLAGS;
+		vattr->va_xflags = xfs_merge_ioc_xflags(flags,
+							xfs_ip2xflags(ip));
 
-		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
-		if (!error)
-			vn_revalidate(vp);	/* update Linux inode flags */
-		return -error;
+		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		if (likely(!error))
+			__vn_revalidate(vp, vattr);	/* update flags */
+		error = -error;
+		break;
 	}
 
 	case XFS_IOC_GETVERSION: {
 		flags = LINVFS_GET_IP(vp)->i_generation;
 		if (copy_to_user(arg, &flags, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+			error = -EFAULT;
+		break;
 	}
 
 	default:
-		return -ENOTTY;
+		error = -ENOTTY;
+		break;
 	}
+
+	kfree(vattr);
+	return error;
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d7f6f2d8ac8e..b1219195c6e2 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -198,22 +198,22 @@ xfs_ichgtime_fast(
  * Pull the link count and size up from the xfs inode to the linux inode
  */
 STATIC void
-validate_fields(
-	struct inode	*ip)
+__linvfs_validate_fields(
+	struct inode	*ip,
+	struct vattr	*vattr)
 {
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		va;
 	int		error;
 
-	va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
-	VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
-	if (likely(!error)) {
-		ip->i_nlink = va.va_nlink;
-		ip->i_blocks = va.va_nblocks;
+	vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+	VOP_GETATTR(vp, vattr, ATTR_LAZY, NULL, error);
+  	if (likely(!error)) {
+		ip->i_nlink = vattr->va_nlink;
+		ip->i_blocks = vattr->va_nblocks;
 
-		/* we're under i_mutex so i_size can't change under us */
-		if (i_size_read(ip) != va.va_size)
-			i_size_write(ip, va.va_size);
+		/* we're under i_sem so i_size can't change under us */
+		if (i_size_read(ip) != vattr->va_size)
+			i_size_write(ip, vattr->va_size);
 	}
 }
 
@@ -224,7 +224,7 @@ validate_fields(
  * inode, of course, such that log replay can't cause these to be lost).
  */
 STATIC int
-linvfs_init_security(
+__linvfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
@@ -257,23 +257,23 @@ linvfs_init_security(
  * XXX(hch):  nfsd is broken, better fix it instead.
  */
 STATIC inline int
-has_fs_struct(struct task_struct *task)
+__linvfs_has_fs_struct(struct task_struct *task)
 {
 	return (task->fs != init_task.fs);
 }
 
 STATIC inline void
-cleanup_inode(
+__linvfs_cleanup_inode(
 	vnode_t		*dvp,
 	vnode_t		*vp,
-	struct dentry	*dentry,	
+	struct dentry	*dentry,
 	int		mode)
 {
 	struct dentry   teardown = {};
-	int             err2;
+	int             error;
 
 	/* Oh, the horror.
-	 * If we can't add the ACL or we fail in 
+	 * If we can't add the ACL or we fail in
 	 * linvfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
@@ -281,9 +281,9 @@ cleanup_inode(
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
-	  	VOP_RMDIR(dvp, &teardown, NULL, err2);
+	  	VOP_RMDIR(dvp, &teardown, NULL, error);
 	else
-		VOP_REMOVE(dvp, &teardown, NULL, err2);
+		VOP_REMOVE(dvp, &teardown, NULL, error);
 	VN_RELE(vp);
 }
 
@@ -295,7 +295,7 @@ linvfs_mknod(
 	dev_t		rdev)
 {
 	struct inode	*ip;
-	vattr_t		va;
+	vattr_t		*vattr;
 	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
@@ -305,70 +305,76 @@ linvfs_mknod(
 	 * Irix uses Missed'em'V split, but doesn't want to see
 	 * the upper 5 bits of (14bit) major.
 	 */
-	if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	if (test_default_acl && test_default_acl(dvp)) {
-		if (!_ACL_ALLOC(default_acl))
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
+	if (unlikely(test_default_acl && test_default_acl(dvp))) {
+		if (!_ACL_ALLOC(default_acl)) {
+			kfree(vattr);
 			return -ENOMEM;
+		}
 		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
 			_ACL_FREE(default_acl);
 			default_acl = NULL;
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl && __linvfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
-	memset(&va, 0, sizeof(va));
-	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-	va.va_mode = mode;
+	memset(vattr, 0, sizeof(*vattr));
+	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr->va_mode = mode;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		va.va_rdev = sysv_encode_dev(rdev);
-		va.va_mask |= XFS_AT_RDEV;
+		vattr->va_rdev = sysv_encode_dev(rdev);
+		vattr->va_mask |= XFS_AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
-		VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+		VOP_CREATE(dvp, dentry, vattr, &vp, NULL, error);
 		break;
 	case S_IFDIR:
-		VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+		VOP_MKDIR(dvp, dentry, vattr, &vp, NULL, error);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
-	if (!error)
-	{
-		error = linvfs_init_security(vp, dir);
+	if (unlikely(!error)) {
+		error = __linvfs_init_security(vp, dir);
 		if (error)
-			cleanup_inode(dvp, vp, dentry, mode);
+			__linvfs_cleanup_inode(dvp, vp, dentry, mode);
 	}
 
-	if (default_acl) {
+	if (unlikely(default_acl)) {
 		if (!error) {
-			error = _ACL_INHERIT(vp, &va, default_acl);
-			if (!error) 
+			error = _ACL_INHERIT(vp, vattr, default_acl);
+			if (!error)
 				VMODIFY(vp);
 			else
-				cleanup_inode(dvp, vp, dentry, mode);
+				__linvfs_cleanup_inode(dvp, vp, dentry, mode);
 		}
 		_ACL_FREE(default_acl);
 	}
 
-	if (!error) {
+	if (likely(!error)) {
 		ASSERT(vp);
 		ip = LINVFS_GET_IP(vp);
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			validate_fields(ip);
+			__linvfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
-		validate_fields(dir);
+		__linvfs_validate_fields(dir, vattr);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -423,22 +429,28 @@ linvfs_link(
 	struct inode	*ip;	/* inode of guy being linked to */
 	vnode_t		*tdvp;	/* target directory for new name/link */
 	vnode_t		*vp;	/* vp of name being linked */
+	vattr_t		*vattr;
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	tdvp = LINVFS_GET_VP(dir);
 	vp = LINVFS_GET_VP(ip);
 
 	VOP_LINK(tdvp, vp, dentry, NULL, error);
-	if (!error) {
+	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		validate_fields(ip);
+		__linvfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -449,17 +461,22 @@ linvfs_unlink(
 {
 	struct inode	*inode;
 	vnode_t		*dvp;	/* directory containing name to remove */
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	inode = dentry->d_inode;
 	dvp = LINVFS_GET_VP(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
-	if (!error) {
-		validate_fields(dir);	/* For size only */
-		validate_fields(inode);
+	if (likely(!error)) {
+		__linvfs_validate_fields(dir, vattr);	/* size needs update */
+		__linvfs_validate_fields(inode, vattr);
 	}
-
+	kfree(vattr);
 	return -error;
 }
 
@@ -470,7 +487,7 @@ linvfs_symlink(
 	const char	*symname)
 {
 	struct inode	*ip;
-	vattr_t		va;
+	vattr_t		*vattr;
 	vnode_t		*dvp;	/* directory containing name of symlink */
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
@@ -478,22 +495,27 @@ linvfs_symlink(
 	dvp = LINVFS_GET_VP(dir);
 	cvp = NULL;
 
-	memset(&va, 0, sizeof(va));
-	va.va_mode = S_IFLNK |
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
+	memset(vattr, 0, sizeof(*vattr));
+	vattr->va_mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
-	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
-		error = linvfs_init_security(cvp, dir);
+		error = __linvfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			validate_fields(dir);
-			validate_fields(ip);
+			__linvfs_validate_fields(dir, vattr);
+			__linvfs_validate_fields(ip, vattr);
 		}
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -504,13 +526,19 @@ linvfs_rmdir(
 {
 	struct inode	*inode = dentry->d_inode;
 	vnode_t		*dvp = LINVFS_GET_VP(dir);
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	VOP_RMDIR(dvp, dentry, NULL, error);
-	if (!error) {
-		validate_fields(inode);
-		validate_fields(dir);
+	if (likely(!error)) {
+		__linvfs_validate_fields(inode, vattr);
+		__linvfs_validate_fields(dir, vattr);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -524,22 +552,26 @@ linvfs_rename(
 	struct inode	*new_inode = ndentry->d_inode;
 	vnode_t		*fvp;	/* from directory */
 	vnode_t		*tvp;	/* target directory */
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	fvp = LINVFS_GET_VP(odir);
 	tvp = LINVFS_GET_VP(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
-	if (error)
-		return -error;
-
-	if (new_inode)
-		validate_fields(new_inode);
-
-	validate_fields(odir);
-	if (ndir != odir)
-		validate_fields(ndir);
-	return 0;
+	if (likely(!error)) {
+		if (new_inode)
+			__linvfs_validate_fields(new_inode, vattr);
+		__linvfs_validate_fields(odir, vattr);
+		if (ndir != odir)
+			__linvfs_validate_fields(ndir, vattr);
+	}
+	kfree(vattr);
+	return -error;
 }
 
 /*
@@ -653,11 +685,10 @@ linvfs_setattr(
 	struct inode	*inode = dentry->d_inode;
 	unsigned int	ia_valid = attr->ia_valid;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	vattr_t		vattr;
+	vattr_t		vattr = { 0 };
 	int		flags = 0;
 	int		error;
 
-	memset(&vattr, 0, sizeof(vattr_t));
 	if (ia_valid & ATTR_UID) {
 		vattr.va_mask |= XFS_AT_UID;
 		vattr.va_uid = attr->ia_uid;
@@ -699,10 +730,9 @@ linvfs_setattr(
 #endif
 
 	VOP_SETATTR(vp, &vattr, flags, NULL, error);
-	if (error)
-		return -error;
-	vn_revalidate(vp);
-	return error;
+	if (likely(!error))
+		__vn_revalidate(vp, &vattr);
+	return -error;
 }
 
 STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 260dd8415dd7..225e7dd8b21d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -83,7 +83,7 @@ vn_initialize(
 	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
 #endif	/* XFS_VNODE_TRACE */
 
-	vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+	vn_trace_exit(vp, __FUNCTION__, (inst_t *)__return_address);
 	return vp;
 }
 
@@ -129,24 +129,31 @@ vn_revalidate_core(
  * Revalidate the Linux inode from the vnode.
  */
 int
-vn_revalidate(
-	struct vnode	*vp)
+__vn_revalidate(
+	struct vnode	*vp,
+	struct vattr	*vattr)
 {
-	vattr_t		va;
 	int		error;
 
-	vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
-	ASSERT(vp->v_fbhv != NULL);
-
-	va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
-	VOP_GETATTR(vp, &va, 0, NULL, error);
-	if (!error) {
-		vn_revalidate_core(vp, &va);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+	vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS;
+	VOP_GETATTR(vp, vattr, 0, NULL, error);
+	if (likely(!error)) {
+		vn_revalidate_core(vp, vattr);
 		VUNMODIFY(vp);
 	}
 	return -error;
 }
 
+int
+vn_revalidate(
+	struct vnode	*vp)
+{
+	vattr_t		vattr;
+
+	return __vn_revalidate(vp, &vattr);
+}
+
 /*
  * Add a reference to a referenced vnode.
  */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 0fe2419461d6..0cf92ca80ce7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -490,6 +490,7 @@ typedef struct vnode_map {
 			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
 
 extern int	vn_revalidate(struct vnode *);
+extern int	__vn_revalidate(struct vnode *, vattr_t *);
 extern void	vn_revalidate_core(struct vnode *, vattr_t *);
 
 extern void	vn_iowait(struct vnode *vp);
-- 
cgit v1.2.3


From 39269e29d4aad04252e0debec4c9b01bac16a257 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:33:50 +1100
Subject: [XFS] Reduce xfs_bmapi stack use by removing some local state
 variables, and directly testing flags instead.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25370a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 79 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 81a95b684b6b..da8fa0cd79c1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4629,10 +4629,6 @@ xfs_bmapi(
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
 	xfs_fileoff_t	end;		/* end of mapped file region */
 	int		eof;		/* we've hit the end of extents */
-	char		contig;		/* allocation must be one extent */
-	char		delay;		/* this request is for delayed alloc */
-	char		exact;		/* don't do all of wasdelayed extent */
-	char		convert;	/* unwritten extent I/O completion */
 	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
 	int		error;		/* error return */
 	xfs_bmbt_irec_t	got;		/* current file extent record */
@@ -4651,13 +4647,9 @@ xfs_bmapi(
 	int		tmp_logflags;	/* temp flags holder */
 	int		whichfork;	/* data or attr fork */
 	char		inhole;		/* current location is hole in file */
-	char		stateless;	/* ignore state flag set */
-	char		trim;		/* output trimmed to match range */
-	char		userdata;	/* allocating non-metadata */
 	char		wasdelay;	/* old extent was delayed */
 	char		wr;		/* this is a write request */
 	char		rt;		/* this is a realtime file */
-	char		rsvd;		/* OK to allocate reserved blocks */
 #ifdef DEBUG
 	xfs_fileoff_t	orig_bno;	/* original block number value */
 	int		orig_flags;	/* original flags arg value */
@@ -4694,15 +4686,8 @@ xfs_bmapi(
 		XFS_STATS_INC(xs_blk_mapw);
 	else
 		XFS_STATS_INC(xs_blk_mapr);
-	delay = (flags & XFS_BMAPI_DELAY) != 0;
-	trim = (flags & XFS_BMAPI_ENTIRE) == 0;
-	userdata = (flags & XFS_BMAPI_METADATA) == 0;
-	convert = (flags & XFS_BMAPI_CONVERT) != 0;
-	exact = (flags & XFS_BMAPI_EXACT) != 0;
-	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
-	contig = (flags & XFS_BMAPI_CONTIG) != 0;
 	/*
-	 * stateless is used to combine extents which
+	 * IGSTATE flag is used to combine extents which
 	 * differ only due to the state of the extents.
 	 * This technique is used from xfs_getbmap()
 	 * when the caller does not wish to see the
@@ -4718,10 +4703,9 @@ xfs_bmapi(
 	 * xfs_strat_comp(), where the xfs_bmapi() call
 	 * is transactioned, and the extents combined.
 	 */
-	stateless = (flags & XFS_BMAPI_IGSTATE) != 0;
-	if (stateless && wr)	/* if writing unwritten space, no */
-		wr = 0;		/* allocations are allowed */
-	ASSERT(wr || !delay);
+	if ((flags & XFS_BMAPI_IGSTATE) && wr)	/* if writing unwritten space */
+		wr = 0;				/* no allocations are allowed */
+	ASSERT(wr || !(flags & XFS_BMAPI_DELAY));
 	logflags = 0;
 	nallocs = 0;
 	cur = NULL;
@@ -4756,7 +4740,7 @@ xfs_bmapi(
 		if (eof && !wr)
 			got.br_startoff = end;
 		inhole = eof || got.br_startoff > bno;
-		wasdelay = wr && !inhole && !delay &&
+		wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) &&
 			ISNULLSTARTBLOCK(got.br_startblock);
 		/*
 		 * First, deal with the hole before the allocated space
@@ -4768,7 +4752,7 @@ xfs_bmapi(
 			 * allocate the stuff asked for in this bmap call
 			 * but that wouldn't be as good.
 			 */
-			if (wasdelay && !exact) {
+			if (wasdelay && !(flags & XFS_BMAPI_EXACT)) {
 				alen = (xfs_extlen_t)got.br_blockcount;
 				aoff = got.br_startoff;
 				if (lastx != NULLEXTNUM && lastx) {
@@ -4790,8 +4774,8 @@ xfs_bmapi(
 							got.br_startoff - bno);
 				aoff = bno;
 			}
-			minlen = contig ? alen : 1;
-			if (delay) {
+			minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1;
+			if (flags & XFS_BMAPI_DELAY) {
 				xfs_extlen_t	extsz;
 
 				/* Figure out the extent size, adjust alen */
@@ -4804,7 +4788,9 @@ xfs_bmapi(
 				if (extsz) {
 					error = xfs_bmap_extsize_align(mp,
 							&got, &prev, extsz,
-							rt, eof, delay, convert,
+							rt, eof,
+							flags&XFS_BMAPI_DELAY,
+							flags&XFS_BMAPI_CONVERT,
 							&aoff, &alen);
 					ASSERT(!error);
 				}
@@ -4842,24 +4828,29 @@ xfs_bmapi(
 				if (rt) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
-							-(extsz), rsvd);
+							-(extsz), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				} else {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							-(alen), rsvd);
+							-(alen), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				}
 				if (!error) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							-(indlen), rsvd);
+							-(indlen), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 					if (error && rt)
 						xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
-							extsz, rsvd);
+							extsz, (flags &
+							XFS_BMAPI_RSVBLOCKS));
 					else if (error)
 						xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							alen, rsvd);
+							alen, (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				}
 
 				if (error) {
@@ -4892,7 +4883,7 @@ xfs_bmapi(
 				/* Indicate if this is the first user data
 				 * in the file, or just any user data.
 				 */
-				if (userdata) {
+				if (!(flags & XFS_BMAPI_METADATA)) {
 					bma.userdata = (aoff == 0) ?
 						XFS_ALLOC_INITIAL_USER_DATA :
 						XFS_ALLOC_USERDATA;
@@ -4904,7 +4895,7 @@ xfs_bmapi(
 				bma.firstblock = *firstblock;
 				bma.alen = alen;
 				bma.off = aoff;
-				bma.conv = convert;
+				bma.conv = (flags & XFS_BMAPI_CONVERT);
 				bma.wasdel = wasdelay;
 				bma.minlen = minlen;
 				bma.low = flist->xbf_low;
@@ -4915,7 +4906,8 @@ xfs_bmapi(
 				 * is larger than a stripe unit.
 				 */
 				if (mp->m_dalign && alen >= mp->m_dalign &&
-				    userdata && whichfork == XFS_DATA_FORK) {
+				    (!(flags & XFS_BMAPI_METADATA)) &&
+				    (whichfork == XFS_DATA_FORK)) {
 					if ((error = xfs_bmap_isaeof(ip, aoff,
 							whichfork, &bma.aeof)))
 						goto error0;
@@ -4978,7 +4970,7 @@ xfs_bmapi(
 			}
 			error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
 				firstblock, flist, &tmp_logflags, whichfork,
-				rsvd);
+				(flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -4990,7 +4982,7 @@ xfs_bmapi(
 			ASSERT(got.br_startoff + got.br_blockcount >=
 				aoff + alen);
 #ifdef DEBUG
-			if (delay) {
+			if (flags & XFS_BMAPI_DELAY) {
 				ASSERT(ISNULLSTARTBLOCK(got.br_startblock));
 				ASSERT(STARTBLOCKVAL(got.br_startblock) > 0);
 			}
@@ -5019,14 +5011,15 @@ xfs_bmapi(
 		 * Then deal with the allocated space we found.
 		 */
 		ASSERT(ep != NULL);
-		if (trim && (got.br_startoff + got.br_blockcount > obno)) {
+		if (!(flags & XFS_BMAPI_ENTIRE) &&
+		    (got.br_startoff + got.br_blockcount > obno)) {
 			if (obno > bno)
 				bno = obno;
 			ASSERT((bno >= obno) || (n == 0));
 			ASSERT(bno < end);
 			mval->br_startoff = bno;
 			if (ISNULLSTARTBLOCK(got.br_startblock)) {
-				ASSERT(!wr || delay);
+				ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
 				mval->br_startblock = DELAYSTARTBLOCK;
 			} else
 				mval->br_startblock =
@@ -5048,7 +5041,7 @@ xfs_bmapi(
 		} else {
 			*mval = got;
 			if (ISNULLSTARTBLOCK(mval->br_startblock)) {
-				ASSERT(!wr || delay);
+				ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
 				mval->br_startblock = DELAYSTARTBLOCK;
 			}
 		}
@@ -5074,7 +5067,7 @@ xfs_bmapi(
 			mval->br_state = XFS_EXT_NORM;
 			error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
 				firstblock, flist, &tmp_logflags, whichfork,
-				rsvd);
+				(flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -5091,9 +5084,10 @@ xfs_bmapi(
 				continue;
 		}
 
-		ASSERT(!trim ||
+		ASSERT((flags & XFS_BMAPI_ENTIRE) ||
 		       ((mval->br_startoff + mval->br_blockcount) <= end));
-		ASSERT(!trim || (mval->br_blockcount <= len) ||
+		ASSERT((flags & XFS_BMAPI_ENTIRE) ||
+		       (mval->br_blockcount <= len) ||
 		       (mval->br_startoff < obno));
 		bno = mval->br_startoff + mval->br_blockcount;
 		len = end - bno;
@@ -5108,7 +5102,8 @@ xfs_bmapi(
 			   mval[-1].br_startblock != HOLESTARTBLOCK &&
 			   mval->br_startblock ==
 			   mval[-1].br_startblock + mval[-1].br_blockcount &&
-			   (stateless || mval[-1].br_state == mval->br_state)) {
+			   ((flags & XFS_BMAPI_IGSTATE) ||
+				mval[-1].br_state == mval->br_state)) {
 			ASSERT(mval->br_startoff ==
 			       mval[-1].br_startoff + mval[-1].br_blockcount);
 			mval[-1].br_blockcount += mval->br_blockcount;
-- 
cgit v1.2.3


From a365bdd5e8fae9c592b9e4851d931016f9fdd868 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:34:16 +1100
Subject: [XFS] Reduce stack usage within xfs_bmapi by rearranging some code,
 splitting realtime/btree allocators apart.  Based on Glens original patches.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25372a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 668 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 345 insertions(+), 323 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index da8fa0cd79c1..9f0ed4869d47 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2294,25 +2294,15 @@ xfs_bmap_extsize_align(
 
 #define XFS_ALLOC_GAP_UNITS	4
 
-/*
- * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
- * It figures out where to ask the underlying allocator to put the new extent.
- */
 STATIC int
-xfs_bmap_alloc(
+xfs_bmap_adjacent(
 	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
 {
 	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
-	xfs_alloctype_t	atype=0;	/* type for allocation routines */
-	int		error;		/* error return value */
 	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
 	xfs_mount_t	*mp;		/* mount point structure */
 	int		nullfb;		/* true if ap->firstblock isn't set */
 	int		rt;		/* true if inode is realtime */
-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
-	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtx;
 
 #define	ISVALID(x,y)	\
 	(rt ? \
@@ -2321,75 +2311,10 @@ xfs_bmap_alloc(
 		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
 		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
 
-	/*
-	 * Set up variables.
-	 */
 	mp = ap->ip->i_mount;
 	nullfb = ap->firstblock == NULLFSBLOCK;
 	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-	if (rt) {
-		align = ap->ip->i_d.di_extsize ?
-			ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
-		/* Set prod to match the extent size */
-		prod = align / mp->m_sb.sb_rextsize;
-
-		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-						align, rt, ap->eof, 0,
-						ap->conv, &ap->off, &ap->alen);
-		if (error)
-			return error;
-		ASSERT(ap->alen);
-		ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
-
-		/*
-		 * If the offset & length are not perfectly aligned
-		 * then kill prod, it will just get us in trouble.
-		 */
-		if (do_mod(ap->off, align) || ap->alen % align)
-			prod = 1;
-		/*
-		 * Set ralen to be the actual requested length in rtextents.
-		 */
-		ralen = ap->alen / mp->m_sb.sb_rextsize;
-		/*
-		 * If the old value was close enough to MAXEXTLEN that
-		 * we rounded up to it, cut it back so it's valid again.
-		 * Note that if it's a really large request (bigger than
-		 * MAXEXTLEN), we don't hear about that number, and can't
-		 * adjust the starting point to match it.
-		 */
-		if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
-			ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
-		/*
-		 * If it's an allocation to an empty file at offset 0,
-		 * pick an extent that will space things out in the rt area.
-		 */
-		if (ap->eof && ap->off == 0) {
-			error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
-			if (error)
-				return error;
-			ap->rval = rtx * mp->m_sb.sb_rextsize;
-		} else
-			ap->rval = 0;
-	} else {
-		align = (ap->userdata && ap->ip->i_d.di_extsize &&
-			(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
-			ap->ip->i_d.di_extsize : 0;
-		if (unlikely(align)) {
-			error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-							align, rt,
-							ap->eof, 0, ap->conv,
-							&ap->off, &ap->alen);
-			ASSERT(!error);
-			ASSERT(ap->alen);
-		}
-		if (nullfb)
-			ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-		else
-			ap->rval = ap->firstblock;
-	}
-
 	/*
 	 * If allocating at eof, and there's a previous real block,
 	 * try to use it's last block as our starting point.
@@ -2514,281 +2439,378 @@ xfs_bmap_alloc(
 		else if (gotbno != NULLFSBLOCK)
 			ap->rval = gotbno;
 	}
+#undef ISVALID
+	return 0;
+}
+
+STATIC int
+xfs_bmap_rtalloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
+	int		error;		/* error return value */
+	xfs_mount_t	*mp;		/* mount point structure */
+	xfs_extlen_t	prod = 0;	/* product factor for allocators */
+	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_rtblock_t	rtx;		/* realtime extent number */
+	xfs_rtblock_t	rtb;
+
+	mp = ap->ip->i_mount;
+	align = ap->ip->i_d.di_extsize ?
+		ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+	prod = align / mp->m_sb.sb_rextsize;
+	error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+					align, 1, ap->eof, 0,
+					ap->conv, &ap->off, &ap->alen);
+	if (error)
+		return error;
+	ASSERT(ap->alen);
+	ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
+	/*
+	 * If the offset & length are not perfectly aligned
+	 * then kill prod, it will just get us in trouble.
+	 */
+	if (do_mod(ap->off, align) || ap->alen % align)
+		prod = 1;
+	/*
+	 * Set ralen to be the actual requested length in rtextents.
+	 */
+	ralen = ap->alen / mp->m_sb.sb_rextsize;
+	/*
+	 * If the old value was close enough to MAXEXTLEN that
+	 * we rounded up to it, cut it back so it's valid again.
+	 * Note that if it's a really large request (bigger than
+	 * MAXEXTLEN), we don't hear about that number, and can't
+	 * adjust the starting point to match it.
+	 */
+	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
+		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
+	/*
+	 * If it's an allocation to an empty file at offset 0,
+	 * pick an extent that will space things out in the rt area.
+	 */
+	if (ap->eof && ap->off == 0) {
+		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
+		if (error)
+			return error;
+		ap->rval = rtx * mp->m_sb.sb_rextsize;
+	} else {
+		ap->rval = 0;
+	}
+
+	xfs_bmap_adjacent(ap);
+
+	/*
+	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 */
+	atype = ap->rval == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+	do_div(ap->rval, mp->m_sb.sb_rextsize);
+	rtb = ap->rval;
+	ap->alen = ralen;
+	if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
+				&ralen, atype, ap->wasdel, prod, &rtb)))
+		return error;
+	if (rtb == NULLFSBLOCK && prod > 1 &&
+	    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
+					   ap->alen, &ralen, atype,
+					   ap->wasdel, 1, &rtb)))
+		return error;
+	ap->rval = rtb;
+	if (ap->rval != NULLFSBLOCK) {
+		ap->rval *= mp->m_sb.sb_rextsize;
+		ralen *= mp->m_sb.sb_rextsize;
+		ap->alen = ralen;
+		ap->ip->i_d.di_nblocks += ralen;
+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+		if (ap->wasdel)
+			ap->ip->i_delayed_blks -= ralen;
+		/*
+		 * Adjust the disk quota also. This was reserved
+		 * earlier.
+		 */
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
+					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
+	} else {
+		ap->alen = 0;
+	}
+	return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	xfs_mount_t	*mp;		/* mount point structure */
+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_agnumber_t	ag;
+	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
+	xfs_agnumber_t	startag;
+	xfs_alloc_arg_t	args;
+	xfs_extlen_t	blen;
+	xfs_extlen_t	delta;
+	xfs_extlen_t	longest;
+	xfs_extlen_t	need;
+	xfs_extlen_t	nextminlen = 0;
+	xfs_perag_t	*pag;
+	int		nullfb;		/* true if ap->firstblock isn't set */
+	int		isaligned;
+	int		notinit;
+	int		tryagain;
+	int		error;
+
+	mp = ap->ip->i_mount;
+	align = (ap->userdata && ap->ip->i_d.di_extsize &&
+		(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+		ap->ip->i_d.di_extsize : 0;
+	if (unlikely(align)) {
+		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+						align, 0, ap->eof, 0, ap->conv,
+						&ap->off, &ap->alen);
+		ASSERT(!error);
+		ASSERT(ap->alen);
+	}
+	nullfb = ap->firstblock == NULLFSBLOCK;
+	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+	if (nullfb)
+		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+	else
+		ap->rval = ap->firstblock;
+
+	xfs_bmap_adjacent(ap);
+
 	/*
 	 * If allowed, use ap->rval; otherwise must use firstblock since
 	 * it's in the right allocation group.
 	 */
-	if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
 		;
 	else
 		ap->rval = ap->firstblock;
 	/*
-	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 * Normal allocation, done through xfs_alloc_vextent.
 	 */
-	if (rt) {
-#ifndef __KERNEL__
-		ASSERT(0);
-#else
-		xfs_rtblock_t	rtb;
-
-		atype = ap->rval == 0 ?
-			XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
-		do_div(ap->rval, mp->m_sb.sb_rextsize);
-		rtb = ap->rval;
-		ap->alen = ralen;
-		if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
-				&ralen, atype, ap->wasdel, prod, &rtb)))
-			return error;
-		if (rtb == NULLFSBLOCK && prod > 1 &&
-		    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
-						   ap->alen, &ralen, atype,
-						   ap->wasdel, 1, &rtb)))
-			return error;
-		ap->rval = rtb;
-		if (ap->rval != NULLFSBLOCK) {
-			ap->rval *= mp->m_sb.sb_rextsize;
-			ralen *= mp->m_sb.sb_rextsize;
-			ap->alen = ralen;
-			ap->ip->i_d.di_nblocks += ralen;
-			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-			if (ap->wasdel)
-				ap->ip->i_delayed_blks -= ralen;
+	tryagain = isaligned = 0;
+	args.tp = ap->tp;
+	args.mp = mp;
+	args.fsbno = ap->rval;
+	args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+	blen = 0;
+	if (nullfb) {
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.total = ap->total;
+		/*
+		 * Find the longest available space.
+		 * We're going to try for the whole allocation at once.
+		 */
+		startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+		notinit = 0;
+		down_read(&mp->m_peraglock);
+		while (blen < ap->alen) {
+			pag = &mp->m_perag[ag];
+			if (!pag->pagf_init &&
+			    (error = xfs_alloc_pagf_init(mp, args.tp,
+				    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
+				up_read(&mp->m_peraglock);
+				return error;
+			}
 			/*
-			 * Adjust the disk quota also. This was reserved
-			 * earlier.
+			 * See xfs_alloc_fix_freelist...
 			 */
-			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
-				ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
-						XFS_TRANS_DQ_RTBCOUNT,
-				(long) ralen);
-		} else
-			ap->alen = 0;
-#endif	/* __KERNEL__ */
+			if (pag->pagf_init) {
+				need = XFS_MIN_FREELIST_PAG(pag, mp);
+				delta = need > pag->pagf_flcount ?
+					need - pag->pagf_flcount : 0;
+				longest = (pag->pagf_longest > delta) ?
+					(pag->pagf_longest - delta) :
+					(pag->pagf_flcount > 0 ||
+					 pag->pagf_longest > 0);
+				if (blen < longest)
+					blen = longest;
+			} else
+				notinit = 1;
+			if (++ag == mp->m_sb.sb_agcount)
+				ag = 0;
+			if (ag == startag)
+				break;
+		}
+		up_read(&mp->m_peraglock);
+		/*
+		 * Since the above loop did a BUF_TRYLOCK, it is
+		 * possible that there is space for this request.
+		 */
+		if (notinit || blen < ap->minlen)
+			args.minlen = ap->minlen;
+		/*
+		 * If the best seen length is less than the request
+		 * length, use the best as the minimum.
+		 */
+		else if (blen < ap->alen)
+			args.minlen = blen;
+		/*
+		 * Otherwise we've seen an extent as big as alen,
+		 * use that as the minimum.
+		 */
+		else
+			args.minlen = ap->alen;
+	} else if (ap->low) {
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.total = args.minlen = ap->minlen;
+	} else {
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.total = ap->total;
+		args.minlen = ap->minlen;
+	}
+	if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+		    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
+		args.prod = ap->ip->i_d.di_extsize;
+		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
+			args.mod = (xfs_extlen_t)(args.prod - args.mod);
+	} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
+		args.prod = 1;
+		args.mod = 0;
+	} else {
+		args.prod = NBPP >> mp->m_sb.sb_blocklog;
+		if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
+			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	}
 	/*
-	 * Normal allocation, done through xfs_alloc_vextent.
+	 * If we are not low on available data blocks, and the
+	 * underlying logical volume manager is a stripe, and
+	 * the file offset is zero then try to allocate data
+	 * blocks on stripe unit boundary.
+	 * NOTE: ap->aeof is only set if the allocation length
+	 * is >= the stripe unit and the allocation offset is
+	 * at the end of file.
 	 */
-	else {
-		xfs_agnumber_t	ag;
-		xfs_alloc_arg_t	args;
-		xfs_extlen_t	blen;
-		xfs_extlen_t	delta;
-		int		isaligned;
-		xfs_extlen_t	longest;
-		xfs_extlen_t	need;
-		xfs_extlen_t	nextminlen=0;
-		int		notinit;
-		xfs_perag_t	*pag;
-		xfs_agnumber_t	startag;
-		int		tryagain;
-
-		tryagain = isaligned = 0;
-		args.tp = ap->tp;
-		args.mp = mp;
-		args.fsbno = ap->rval;
-		args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
-		blen = 0;
-		if (nullfb) {
-			args.type = XFS_ALLOCTYPE_START_BNO;
-			args.total = ap->total;
-			/*
-			 * Find the longest available space.
-			 * We're going to try for the whole allocation at once.
-			 */
-			startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
-			notinit = 0;
-			down_read(&mp->m_peraglock);
-			while (blen < ap->alen) {
-				pag = &mp->m_perag[ag];
-				if (!pag->pagf_init &&
-				    (error = xfs_alloc_pagf_init(mp, args.tp,
-					    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
-					up_read(&mp->m_peraglock);
-					return error;
-				}
-				/*
-				 * See xfs_alloc_fix_freelist...
-				 */
-				if (pag->pagf_init) {
-					need = XFS_MIN_FREELIST_PAG(pag, mp);
-					delta = need > pag->pagf_flcount ?
-						need - pag->pagf_flcount : 0;
-					longest = (pag->pagf_longest > delta) ?
-						(pag->pagf_longest - delta) :
-						(pag->pagf_flcount > 0 ||
-						 pag->pagf_longest > 0);
-					if (blen < longest)
-						blen = longest;
-				} else
-					notinit = 1;
-				if (++ag == mp->m_sb.sb_agcount)
-					ag = 0;
-				if (ag == startag)
-					break;
-			}
-			up_read(&mp->m_peraglock);
+	if (!ap->low && ap->aeof) {
+		if (!ap->off) {
+			args.alignment = mp->m_dalign;
+			atype = args.type;
+			isaligned = 1;
 			/*
-			 * Since the above loop did a BUF_TRYLOCK, it is
-			 * possible that there is space for this request.
+			 * Adjust for alignment
 			 */
-			if (notinit || blen < ap->minlen)
-				args.minlen = ap->minlen;
+			if (blen > args.alignment && blen <= ap->alen)
+				args.minlen = blen - args.alignment;
+			args.minalignslop = 0;
+		} else {
 			/*
-			 * If the best seen length is less than the request
-			 * length, use the best as the minimum.
+			 * First try an exact bno allocation.
+			 * If it fails then do a near or start bno
+			 * allocation with alignment turned on.
 			 */
-			else if (blen < ap->alen)
-				args.minlen = blen;
+			atype = args.type;
+			tryagain = 1;
+			args.type = XFS_ALLOCTYPE_THIS_BNO;
+			args.alignment = 1;
 			/*
-			 * Otherwise we've seen an extent as big as alen,
-			 * use that as the minimum.
+			 * Compute the minlen+alignment for the
+			 * next case.  Set slop so that the value
+			 * of minlen+alignment+slop doesn't go up
+			 * between the calls.
 			 */
+			if (blen > mp->m_dalign && blen <= ap->alen)
+				nextminlen = blen - mp->m_dalign;
 			else
-				args.minlen = ap->alen;
-		} else if (ap->low) {
-			args.type = XFS_ALLOCTYPE_FIRST_AG;
-			args.total = args.minlen = ap->minlen;
-		} else {
-			args.type = XFS_ALLOCTYPE_NEAR_BNO;
-			args.total = ap->total;
-			args.minlen = ap->minlen;
-		}
-		if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
-			    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
-			args.prod = ap->ip->i_d.di_extsize;
-			if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
-				args.mod = (xfs_extlen_t)(args.prod - args.mod);
-		} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
-			args.prod = 1;
-			args.mod = 0;
-		} else {
-			args.prod = NBPP >> mp->m_sb.sb_blocklog;
-			if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
-				args.mod = (xfs_extlen_t)(args.prod - args.mod);
+				nextminlen = args.minlen;
+			if (nextminlen + mp->m_dalign > args.minlen + 1)
+				args.minalignslop =
+					nextminlen + mp->m_dalign -
+					args.minlen - 1;
+			else
+				args.minalignslop = 0;
 		}
+	} else {
+		args.alignment = 1;
+		args.minalignslop = 0;
+	}
+	args.minleft = ap->minleft;
+	args.wasdel = ap->wasdel;
+	args.isfl = 0;
+	args.userdata = ap->userdata;
+	if ((error = xfs_alloc_vextent(&args)))
+		return error;
+	if (tryagain && args.fsbno == NULLFSBLOCK) {
 		/*
-		 * If we are not low on available data blocks, and the
-		 * underlying logical volume manager is a stripe, and
-		 * the file offset is zero then try to allocate data
-		 * blocks on stripe unit boundary.
-		 * NOTE: ap->aeof is only set if the allocation length
-		 * is >= the stripe unit and the allocation offset is
-		 * at the end of file.
+		 * Exact allocation failed. Now try with alignment
+		 * turned on.
 		 */
-		if (!ap->low && ap->aeof) {
-			if (!ap->off) {
-				args.alignment = mp->m_dalign;
-				atype = args.type;
-				isaligned = 1;
-				/*
-				 * Adjust for alignment
-				 */
-				if (blen > args.alignment && blen <= ap->alen)
-					args.minlen = blen - args.alignment;
-				args.minalignslop = 0;
-			} else {
-				/*
-				 * First try an exact bno allocation.
-				 * If it fails then do a near or start bno
-				 * allocation with alignment turned on.
-				 */
-				atype = args.type;
-				tryagain = 1;
-				args.type = XFS_ALLOCTYPE_THIS_BNO;
-				args.alignment = 1;
-				/*
-				 * Compute the minlen+alignment for the
-				 * next case.  Set slop so that the value
-				 * of minlen+alignment+slop doesn't go up
-				 * between the calls.
-				 */
-				if (blen > mp->m_dalign && blen <= ap->alen)
-					nextminlen = blen - mp->m_dalign;
-				else
-					nextminlen = args.minlen;
-				if (nextminlen + mp->m_dalign > args.minlen + 1)
-					args.minalignslop =
-						nextminlen + mp->m_dalign -
-						args.minlen - 1;
-				else
-					args.minalignslop = 0;
-			}
-		} else {
-			args.alignment = 1;
-			args.minalignslop = 0;
-		}
-		args.minleft = ap->minleft;
-		args.wasdel = ap->wasdel;
-		args.isfl = 0;
-		args.userdata = ap->userdata;
+		args.type = atype;
+		args.fsbno = ap->rval;
+		args.alignment = mp->m_dalign;
+		args.minlen = nextminlen;
+		args.minalignslop = 0;
+		isaligned = 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+	if (isaligned && args.fsbno == NULLFSBLOCK) {
+		/*
+		 * allocation failed, so turn off alignment and
+		 * try again.
+		 */
+		args.type = atype;
+		args.fsbno = ap->rval;
+		args.alignment = 0;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+	if (args.fsbno == NULLFSBLOCK && nullfb &&
+	    args.minlen > ap->minlen) {
+		args.minlen = ap->minlen;
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.fsbno = ap->rval;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
-		if (tryagain && args.fsbno == NULLFSBLOCK) {
-			/*
-			 * Exact allocation failed. Now try with alignment
-			 * turned on.
-			 */
-			args.type = atype;
-			args.fsbno = ap->rval;
-			args.alignment = mp->m_dalign;
-			args.minlen = nextminlen;
-			args.minalignslop = 0;
-			isaligned = 1;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (isaligned && args.fsbno == NULLFSBLOCK) {
-			/*
-			 * allocation failed, so turn off alignment and
-			 * try again.
-			 */
-			args.type = atype;
-			args.fsbno = ap->rval;
-			args.alignment = 0;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (args.fsbno == NULLFSBLOCK && nullfb &&
-		    args.minlen > ap->minlen) {
-			args.minlen = ap->minlen;
-			args.type = XFS_ALLOCTYPE_START_BNO;
-			args.fsbno = ap->rval;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (args.fsbno == NULLFSBLOCK && nullfb) {
-			args.fsbno = 0;
-			args.type = XFS_ALLOCTYPE_FIRST_AG;
-			args.total = ap->minlen;
-			args.minleft = 0;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-			ap->low = 1;
-		}
-		if (args.fsbno != NULLFSBLOCK) {
-			ap->firstblock = ap->rval = args.fsbno;
-			ASSERT(nullfb || fb_agno == args.agno ||
-			       (ap->low && fb_agno < args.agno));
-			ap->alen = args.len;
-			ap->ip->i_d.di_nblocks += args.len;
-			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-			if (ap->wasdel)
-				ap->ip->i_delayed_blks -= args.len;
-			/*
-			 * Adjust the disk quota also. This was reserved
-			 * earlier.
-			 */
-			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
-				ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
-						XFS_TRANS_DQ_BCOUNT,
-				(long) args.len);
-		} else {
-			ap->rval = NULLFSBLOCK;
-			ap->alen = 0;
-		}
+	}
+	if (args.fsbno == NULLFSBLOCK && nullfb) {
+		args.fsbno = 0;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.total = ap->minlen;
+		args.minleft = 0;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+		ap->low = 1;
+	}
+	if (args.fsbno != NULLFSBLOCK) {
+		ap->firstblock = ap->rval = args.fsbno;
+		ASSERT(nullfb || fb_agno == args.agno ||
+		       (ap->low && fb_agno < args.agno));
+		ap->alen = args.len;
+		ap->ip->i_d.di_nblocks += args.len;
+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+		if (ap->wasdel)
+			ap->ip->i_delayed_blks -= args.len;
+		/*
+		 * Adjust the disk quota also. This was reserved
+		 * earlier.
+		 */
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
+					XFS_TRANS_DQ_BCOUNT,
+			(long) args.len);
+	} else {
+		ap->rval = NULLFSBLOCK;
+		ap->alen = 0;
 	}
 	return 0;
-#undef	ISVALID
+}
+
+/*
+ * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
+ * It figures out where to ask the underlying allocator to put the new extent.
+ */
+STATIC int
+xfs_bmap_alloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	if ((ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata)
+		return xfs_bmap_rtalloc(ap);
+	return xfs_bmap_btalloc(ap);
 }
 
 /*
-- 
cgit v1.2.3


From b8b0f546569871b365a5e3b3cc3f667af658dd49 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:47:32 +1100
Subject: [XFS] Remove a couple of no-longer-used macros/types from XFS.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25377a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_mount.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 29cfcf0c11be..ebd73960e9db 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -505,11 +505,6 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 #define XFS_CORRUPT_INCORE	0x8	/* Corrupt in-memory data structures */
 #define XFS_SHUTDOWN_REMOTE_REQ 0x10	/* Shutdown came from remote cell */
 
-/*
- * xflags for xfs_syncsub
- */
-#define XFS_XSYNC_RELOC		0x01
-
 /*
  * Flags for xfs_mountfs
  */
-- 
cgit v1.2.3


From e4c573bb6a8477a26b3d5471fd116d258760a13a Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:54:26 +1100
Subject: [XFS] Switch over from linvfs names for address space ops for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25378a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c  | 63 +++++++++++++++++++++-----------------------
 fs/xfs/linux-2.6/xfs_aops.h  |  4 +--
 fs/xfs/linux-2.6/xfs_iops.c  |  2 +-
 fs/xfs/linux-2.6/xfs_super.c |  4 +--
 4 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4b6bfdb8251c..2b610c7ba322 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -43,7 +43,6 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
-
 STATIC void
 xfs_count_page_state(
 	struct page		*page,
@@ -68,8 +67,6 @@ xfs_count_page_state(
 	} while ((bh = bh->b_this_page) != head);
 }
 
-
-
 #if defined(XFS_RW_TRACE)
 void
 xfs_page_trace(
@@ -1095,7 +1092,7 @@ error:
  */
 
 STATIC int
-linvfs_writepage(
+xfs_vm_writepage(
 	struct page		*page,
 	struct writeback_control *wbc)
 {
@@ -1181,7 +1178,7 @@ out_unlock:
  *    free them and we should come back later via writepage.
  */
 STATIC int
-linvfs_release_page(
+xfs_vm_release_page(
 	struct page		*page,
 	gfp_t			gfp_mask)
 {
@@ -1223,7 +1220,7 @@ free_buffers:
 }
 
 STATIC int
-__linvfs_get_block(
+__xfs_get_block(
 	struct inode		*inode,
 	sector_t		iblock,
 	unsigned long		blocks,
@@ -1304,30 +1301,30 @@ __linvfs_get_block(
 }
 
 int
-linvfs_get_block(
+xfs_get_block(
 	struct inode		*inode,
 	sector_t		iblock,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __linvfs_get_block(inode, iblock, 0, bh_result,
+	return __xfs_get_block(inode, iblock, 0, bh_result,
 					create, 0, BMAPI_WRITE);
 }
 
 STATIC int
-linvfs_get_blocks_direct(
+xfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
 	unsigned long		max_blocks,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+	return __xfs_get_block(inode, iblock, max_blocks, bh_result,
 					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
 STATIC void
-linvfs_end_io_direct(
+xfs_end_io_direct(
 	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
@@ -1365,7 +1362,7 @@ linvfs_end_io_direct(
 }
 
 STATIC ssize_t
-linvfs_direct_IO(
+xfs_vm_direct_IO(
 	int			rw,
 	struct kiocb		*iocb,
 	const struct iovec	*iov,
@@ -1389,8 +1386,8 @@ linvfs_direct_IO(
 	ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
 		iomap.iomap_target->bt_bdev,
 		iov, offset, nr_segs,
-		linvfs_get_blocks_direct,
-		linvfs_end_io_direct);
+		xfs_get_blocks_direct,
+		xfs_end_io_direct);
 
 	if (unlikely(ret <= 0 && iocb->private))
 		xfs_destroy_ioend(iocb->private);
@@ -1398,17 +1395,17 @@ linvfs_direct_IO(
 }
 
 STATIC int
-linvfs_prepare_write(
+xfs_vm_prepare_write(
 	struct file		*file,
 	struct page		*page,
 	unsigned int		from,
 	unsigned int		to)
 {
-	return block_prepare_write(page, from, to, linvfs_get_block);
+	return block_prepare_write(page, from, to, xfs_get_block);
 }
 
 STATIC sector_t
-linvfs_bmap(
+xfs_vm_bmap(
 	struct address_space	*mapping,
 	sector_t		block)
 {
@@ -1416,34 +1413,34 @@ linvfs_bmap(
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 	int			error;
 
-	vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 
 	VOP_RWLOCK(vp, VRWLOCK_READ);
 	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
 	VOP_RWUNLOCK(vp, VRWLOCK_READ);
-	return generic_block_bmap(mapping, block, linvfs_get_block);
+	return generic_block_bmap(mapping, block, xfs_get_block);
 }
 
 STATIC int
-linvfs_readpage(
+xfs_vm_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, linvfs_get_block);
+	return mpage_readpage(page, xfs_get_block);
 }
 
 STATIC int
-linvfs_readpages(
+xfs_vm_readpages(
 	struct file		*unused,
 	struct address_space	*mapping,
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
 }
 
 STATIC int
-linvfs_invalidate_page(
+xfs_vm_invalidate_page(
 	struct page		*page,
 	unsigned long		offset)
 {
@@ -1452,16 +1449,16 @@ linvfs_invalidate_page(
 	return block_invalidatepage(page, offset);
 }
 
-struct address_space_operations linvfs_aops = {
-	.readpage		= linvfs_readpage,
-	.readpages		= linvfs_readpages,
-	.writepage		= linvfs_writepage,
+struct address_space_operations xfs_address_space_operations = {
+	.readpage		= xfs_vm_readpage,
+	.readpages		= xfs_vm_readpages,
+	.writepage		= xfs_vm_writepage,
 	.sync_page		= block_sync_page,
-	.releasepage		= linvfs_release_page,
-	.invalidatepage		= linvfs_invalidate_page,
-	.prepare_write		= linvfs_prepare_write,
+	.releasepage		= xfs_vm_release_page,
+	.invalidatepage		= xfs_vm_invalidate_page,
+	.prepare_write		= xfs_vm_prepare_write,
 	.commit_write		= generic_commit_write,
-	.bmap			= linvfs_bmap,
-	.direct_IO		= linvfs_direct_IO,
+	.bmap			= xfs_vm_bmap,
+	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
 };
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 55339dd5a30d..795699f121d2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -40,7 +40,7 @@ typedef struct xfs_ioend {
 	struct work_struct	io_work;	/* xfsdatad work queue */
 } xfs_ioend_t;
 
-extern struct address_space_operations linvfs_aops;
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index b1219195c6e2..52b02bd5c283 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -739,7 +739,7 @@ STATIC void
 linvfs_truncate(
 	struct inode	*inode)
 {
-	block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4d8613f65c2c..fb76c53f9c43 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -129,7 +129,7 @@ xfs_set_inodeops(
 	case S_IFREG:
 		inode->i_op = &linvfs_file_inode_operations;
 		inode->i_fop = &linvfs_file_operations;
-		inode->i_mapping->a_ops = &linvfs_aops;
+		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		inode->i_op = &linvfs_dir_inode_operations;
@@ -138,7 +138,7 @@ xfs_set_inodeops(
 	case S_IFLNK:
 		inode->i_op = &linvfs_symlink_inode_operations;
 		if (inode->i_blocks)
-			inode->i_mapping->a_ops = &linvfs_aops;
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	default:
 		inode->i_op = &linvfs_file_inode_operations;
-- 
cgit v1.2.3


From 3562fd45658fbb696f4546479332d5249c3ad90f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:00:35 +1100
Subject: [XFS] Switch over from linvfs names for file operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25379a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c  | 140 +++++++++++++++++++++----------------------
 fs/xfs/linux-2.6/xfs_ioctl.c |   2 +-
 fs/xfs/linux-2.6/xfs_iops.h  |   6 +-
 fs/xfs/linux-2.6/xfs_super.c |   4 +-
 4 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ce8fe40e1628..b050e4079427 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,13 +43,13 @@
 #include <linux/dcache.h>
 #include <linux/smp_lock.h>
 
-static struct vm_operations_struct linvfs_file_vm_ops;
+static struct vm_operations_struct xfs_file_vm_ops;
 #ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops;
+static struct vm_operations_struct xfs_dmapi_file_vm_ops;
 #endif
 
 STATIC inline ssize_t
-__linvfs_read(
+__xfs_file_read(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	int			ioflags,
@@ -71,28 +71,28 @@ __linvfs_read(
 
 
 STATIC ssize_t
-linvfs_aio_read(
+xfs_file_aio_read(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
 }
 
 STATIC ssize_t
-linvfs_aio_read_invis(
+xfs_file_aio_read_invis(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_write(
+__xfs_file_write(
 	struct kiocb	*iocb,
 	const char	__user *buf,
 	int		ioflags,
@@ -115,28 +115,28 @@ __linvfs_write(
 
 
 STATIC ssize_t
-linvfs_aio_write(
+xfs_file_aio_write(
 	struct kiocb		*iocb,
 	const char		__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
 }
 
 STATIC ssize_t
-linvfs_aio_write_invis(
+xfs_file_aio_write_invis(
 	struct kiocb		*iocb,
 	const char		__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_readv(
+__xfs_file_readv(
 	struct file		*file,
 	const struct iovec 	*iov,
 	int			ioflags,
@@ -165,28 +165,28 @@ __linvfs_readv(
 }
 
 STATIC ssize_t
-linvfs_readv(
+xfs_file_readv(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+	return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_readv_invis(
+xfs_file_readv_invis(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+	return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_writev(
+__xfs_file_writev(
 	struct file		*file,
 	const struct iovec 	*iov,
 	int			ioflags,
@@ -216,27 +216,27 @@ __linvfs_writev(
 
 
 STATIC ssize_t
-linvfs_writev(
+xfs_file_writev(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+	return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_writev_invis(
+xfs_file_writev_invis(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+	return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_sendfile(
+xfs_file_sendfile(
 	struct file		*filp,
 	loff_t			*ppos,
 	size_t			count,
@@ -252,7 +252,7 @@ linvfs_sendfile(
 
 
 STATIC int
-linvfs_open(
+xfs_file_open(
 	struct inode	*inode,
 	struct file	*filp)
 {
@@ -269,7 +269,7 @@ linvfs_open(
 
 
 STATIC int
-linvfs_release(
+xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
 {
@@ -283,7 +283,7 @@ linvfs_release(
 
 
 STATIC int
-linvfs_fsync(
+xfs_file_fsync(
 	struct file	*filp,
 	struct dentry	*dentry,
 	int		datasync)
@@ -302,7 +302,7 @@ linvfs_fsync(
 }
 
 /*
- * linvfs_readdir maps to VOP_READDIR().
+ * xfs_file_readdir maps to VOP_READDIR().
  * We need to build a uio, cred, ...
  */
 
@@ -311,7 +311,7 @@ linvfs_fsync(
 #ifdef CONFIG_XFS_DMAPI
 
 STATIC struct page *
-linvfs_filemap_nopage(
+xfs_vm_nopage(
 	struct vm_area_struct	*area,
 	unsigned long		address,
 	int			*type)
@@ -334,7 +334,7 @@ linvfs_filemap_nopage(
 
 
 STATIC int
-linvfs_readdir(
+xfs_file_readdir(
 	struct file	*filp,
 	void		*dirent,
 	filldir_t	filldir)
@@ -414,7 +414,7 @@ done:
 
 
 STATIC int
-linvfs_file_mmap(
+xfs_file_mmap(
 	struct file	*filp,
 	struct vm_area_struct *vma)
 {
@@ -423,11 +423,11 @@ linvfs_file_mmap(
 	vattr_t		*vattr;
 	int		error;
 
-	vma->vm_ops = &linvfs_file_vm_ops;
+	vma->vm_ops = &xfs_file_vm_ops;
 
 #ifdef CONFIG_XFS_DMAPI
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-		vma->vm_ops = &linvfs_dmapi_file_vm_ops;
+		vma->vm_ops = &xfs_dmapi_file_vm_ops;
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
@@ -444,7 +444,7 @@ linvfs_file_mmap(
 
 
 STATIC long
-linvfs_ioctl(
+xfs_file_ioctl(
 	struct file	*filp,
 	unsigned int	cmd,
 	unsigned long	arg)
@@ -466,7 +466,7 @@ linvfs_ioctl(
 }
 
 STATIC long
-linvfs_ioctl_invis(
+xfs_file_ioctl_invis(
 	struct file	*filp,
 	unsigned int	cmd,
 	unsigned long	arg)
@@ -491,7 +491,7 @@ linvfs_ioctl_invis(
 #ifdef CONFIG_XFS_DMAPI
 #ifdef HAVE_VMOP_MPROTECT
 STATIC int
-linvfs_mprotect(
+xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
@@ -518,7 +518,7 @@ linvfs_mprotect(
  * it back online.
  */
 STATIC int
-linvfs_open_exec(
+xfs_file_open_exec(
 	struct inode	*inode)
 {
 	vnode_t		*vp = LINVFS_GET_VP(inode);
@@ -542,69 +542,69 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-struct file_operations linvfs_file_operations = {
+struct file_operations xfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.readv		= linvfs_readv,
-	.writev		= linvfs_writev,
-	.aio_read	= linvfs_aio_read,
-	.aio_write	= linvfs_aio_write,
-	.sendfile	= linvfs_sendfile,
-	.unlocked_ioctl	= linvfs_ioctl,
+	.readv		= xfs_file_readv,
+	.writev		= xfs_file_writev,
+	.aio_read	= xfs_file_aio_read,
+	.aio_write	= xfs_file_aio_write,
+	.sendfile	= xfs_file_sendfile,
+	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_ioctl,
+	.compat_ioctl	= xfs_file_compat_ioctl,
 #endif
-	.mmap		= linvfs_file_mmap,
-	.open		= linvfs_open,
-	.release	= linvfs_release,
-	.fsync		= linvfs_fsync,
+	.mmap		= xfs_file_mmap,
+	.open		= xfs_file_open,
+	.release	= xfs_file_release,
+	.fsync		= xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
-	.open_exec	= linvfs_open_exec,
+	.open_exec	= xfs_file_open_exec,
 #endif
 };
 
-struct file_operations linvfs_invis_file_operations = {
+struct file_operations xfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.readv		= linvfs_readv_invis,
-	.writev		= linvfs_writev_invis,
-	.aio_read	= linvfs_aio_read_invis,
-	.aio_write	= linvfs_aio_write_invis,
-	.sendfile	= linvfs_sendfile,
-	.unlocked_ioctl	= linvfs_ioctl_invis,
+	.readv		= xfs_file_readv_invis,
+	.writev		= xfs_file_writev_invis,
+	.aio_read	= xfs_file_aio_read_invis,
+	.aio_write	= xfs_file_aio_write_invis,
+	.sendfile	= xfs_file_sendfile,
+	.unlocked_ioctl	= xfs_file_ioctl_invis,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_invis_ioctl,
+	.compat_ioctl	= xfs_file_compat_invis_ioctl,
 #endif
-	.mmap		= linvfs_file_mmap,
-	.open		= linvfs_open,
-	.release	= linvfs_release,
-	.fsync		= linvfs_fsync,
+	.mmap		= xfs_file_mmap,
+	.open		= xfs_file_open,
+	.release	= xfs_file_release,
+	.fsync		= xfs_file_fsync,
 };
 
 
-struct file_operations linvfs_dir_operations = {
+struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
-	.readdir	= linvfs_readdir,
-	.unlocked_ioctl	= linvfs_ioctl,
+	.readdir	= xfs_file_readdir,
+	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_ioctl,
+	.compat_ioctl	= xfs_file_compat_ioctl,
 #endif
-	.fsync		= linvfs_fsync,
+	.fsync		= xfs_file_fsync,
 };
 
-static struct vm_operations_struct linvfs_file_vm_ops = {
+static struct vm_operations_struct xfs_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
 };
 
 #ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
-	.nopage		= linvfs_filemap_nopage,
+static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
+	.nopage		= xfs_vm_nopage,
 	.populate	= filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
-	.mprotect	= linvfs_mprotect,
+	.mprotect	= xfs_vm_mprotect,
 #endif
 };
 #endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f182721ec9a2..e435ad17419d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -344,7 +344,7 @@ xfs_open_by_handle(
 		return -XFS_ERROR(-PTR_ERR(filp));
 	}
 	if (inode->i_mode & S_IFREG)
-		filp->f_op = &linvfs_invis_file_operations;
+		filp->f_op = &xfs_invis_file_operations;
 
 	fd_install(new_fd, filp);
 	return new_fd;
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 6899a6b4a50a..8b5275e4b83e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,9 +22,9 @@ extern struct inode_operations linvfs_file_inode_operations;
 extern struct inode_operations linvfs_dir_inode_operations;
 extern struct inode_operations linvfs_symlink_inode_operations;
 
-extern struct file_operations linvfs_file_operations;
-extern struct file_operations linvfs_invis_file_operations;
-extern struct file_operations linvfs_dir_operations;
+extern struct file_operations xfs_file_operations;
+extern struct file_operations xfs_dir_file_operations;
+extern struct file_operations xfs_invis_file_operations;
 
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fb76c53f9c43..9eac4b49a199 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -128,12 +128,12 @@ xfs_set_inodeops(
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &linvfs_file_inode_operations;
-		inode->i_fop = &linvfs_file_operations;
+		inode->i_fop = &xfs_file_operations;
 		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		inode->i_op = &linvfs_dir_inode_operations;
-		inode->i_fop = &linvfs_dir_operations;
+		inode->i_fop = &xfs_dir_file_operations;
 		break;
 	case S_IFLNK:
 		inode->i_op = &linvfs_symlink_inode_operations;
-- 
cgit v1.2.3


From 416c6d5bcfe8ac2c65a955be62bc42d8b8d5b014 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:00:51 +1100
Subject: [XFS] Switch over from linvfs names for inode operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25381a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c  | 160 +++++++++++++++++++++----------------------
 fs/xfs/linux-2.6/xfs_iops.h  |   6 +-
 fs/xfs/linux-2.6/xfs_super.c |   8 +--
 3 files changed, 87 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 52b02bd5c283..93b9e6e43f20 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -198,7 +198,7 @@ xfs_ichgtime_fast(
  * Pull the link count and size up from the xfs inode to the linux inode
  */
 STATIC void
-__linvfs_validate_fields(
+xfs_validate_fields(
 	struct inode	*ip,
 	struct vattr	*vattr)
 {
@@ -224,7 +224,7 @@ __linvfs_validate_fields(
  * inode, of course, such that log replay can't cause these to be lost).
  */
 STATIC int
-__linvfs_init_security(
+xfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
@@ -257,13 +257,13 @@ __linvfs_init_security(
  * XXX(hch):  nfsd is broken, better fix it instead.
  */
 STATIC inline int
-__linvfs_has_fs_struct(struct task_struct *task)
+xfs_has_fs_struct(struct task_struct *task)
 {
 	return (task->fs != init_task.fs);
 }
 
 STATIC inline void
-__linvfs_cleanup_inode(
+xfs_cleanup_inode(
 	vnode_t		*dvp,
 	vnode_t		*vp,
 	struct dentry	*dentry,
@@ -274,7 +274,7 @@ __linvfs_cleanup_inode(
 
 	/* Oh, the horror.
 	 * If we can't add the ACL or we fail in
-	 * linvfs_init_security we must back out.
+	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
 	teardown.d_inode = LINVFS_GET_IP(vp);
@@ -288,7 +288,7 @@ __linvfs_cleanup_inode(
 }
 
 STATIC int
-linvfs_mknod(
+xfs_vn_mknod(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode,
@@ -323,7 +323,7 @@ linvfs_mknod(
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && __linvfs_has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
 	memset(vattr, 0, sizeof(*vattr));
@@ -347,9 +347,9 @@ linvfs_mknod(
 	}
 
 	if (unlikely(!error)) {
-		error = __linvfs_init_security(vp, dir);
+		error = xfs_init_security(vp, dir);
 		if (error)
-			__linvfs_cleanup_inode(dvp, vp, dentry, mode);
+			xfs_cleanup_inode(dvp, vp, dentry, mode);
 	}
 
 	if (unlikely(default_acl)) {
@@ -358,7 +358,7 @@ linvfs_mknod(
 			if (!error)
 				VMODIFY(vp);
 			else
-				__linvfs_cleanup_inode(dvp, vp, dentry, mode);
+				xfs_cleanup_inode(dvp, vp, dentry, mode);
 		}
 		_ACL_FREE(default_acl);
 	}
@@ -370,35 +370,35 @@ linvfs_mknod(
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			__linvfs_validate_fields(ip, vattr);
+			xfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
-		__linvfs_validate_fields(dir, vattr);
+		xfs_validate_fields(dir, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_create(
+xfs_vn_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode,
 	struct nameidata *nd)
 {
-	return linvfs_mknod(dir, dentry, mode, 0);
+	return xfs_vn_mknod(dir, dentry, mode, 0);
 }
 
 STATIC int
-linvfs_mkdir(
+xfs_vn_mkdir(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode)
 {
-	return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 }
 
 STATIC struct dentry *
-linvfs_lookup(
+xfs_vn_lookup(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	struct nameidata *nd)
@@ -421,7 +421,7 @@ linvfs_lookup(
 }
 
 STATIC int
-linvfs_link(
+xfs_vn_link(
 	struct dentry	*old_dentry,
 	struct inode	*dir,
 	struct dentry	*dentry)
@@ -447,7 +447,7 @@ linvfs_link(
 	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		__linvfs_validate_fields(ip, vattr);
+		xfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
 	}
 	kfree(vattr);
@@ -455,7 +455,7 @@ linvfs_link(
 }
 
 STATIC int
-linvfs_unlink(
+xfs_vn_unlink(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
@@ -473,15 +473,15 @@ linvfs_unlink(
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		__linvfs_validate_fields(dir, vattr);	/* size needs update */
-		__linvfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, vattr);	/* size needs update */
+		xfs_validate_fields(inode, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_symlink(
+xfs_vn_symlink(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	const char	*symname)
@@ -507,12 +507,12 @@ linvfs_symlink(
 	error = 0;
 	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
-		error = __linvfs_init_security(cvp, dir);
+		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			__linvfs_validate_fields(dir, vattr);
-			__linvfs_validate_fields(ip, vattr);
+			xfs_validate_fields(dir, vattr);
+			xfs_validate_fields(ip, vattr);
 		}
 	}
 	kfree(vattr);
@@ -520,7 +520,7 @@ linvfs_symlink(
 }
 
 STATIC int
-linvfs_rmdir(
+xfs_vn_rmdir(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
@@ -535,15 +535,15 @@ linvfs_rmdir(
 
 	VOP_RMDIR(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		__linvfs_validate_fields(inode, vattr);
-		__linvfs_validate_fields(dir, vattr);
+		xfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_rename(
+xfs_vn_rename(
 	struct inode	*odir,
 	struct dentry	*odentry,
 	struct inode	*ndir,
@@ -565,10 +565,10 @@ linvfs_rename(
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
 		if (new_inode)
-			__linvfs_validate_fields(new_inode, vattr);
-		__linvfs_validate_fields(odir, vattr);
+			xfs_validate_fields(new_inode, vattr);
+		xfs_validate_fields(odir, vattr);
 		if (ndir != odir)
-			__linvfs_validate_fields(ndir, vattr);
+			xfs_validate_fields(ndir, vattr);
 	}
 	kfree(vattr);
 	return -error;
@@ -580,7 +580,7 @@ linvfs_rename(
  * uio is kmalloced for this reason...
  */
 STATIC void *
-linvfs_follow_link(
+xfs_vn_follow_link(
 	struct dentry		*dentry,
 	struct nameidata	*nd)
 {
@@ -631,7 +631,7 @@ linvfs_follow_link(
 }
 
 STATIC void
-linvfs_put_link(
+xfs_vn_put_link(
 	struct dentry	*dentry,
 	struct nameidata *nd,
 	void		*p)
@@ -644,7 +644,7 @@ linvfs_put_link(
 
 #ifdef CONFIG_XFS_POSIX_ACL
 STATIC int
-linvfs_permission(
+xfs_vn_permission(
 	struct inode	*inode,
 	int		mode,
 	struct nameidata *nd)
@@ -657,11 +657,11 @@ linvfs_permission(
 	return -error;
 }
 #else
-#define linvfs_permission NULL
+#define xfs_vn_permission NULL
 #endif
 
 STATIC int
-linvfs_getattr(
+xfs_vn_getattr(
 	struct vfsmount	*mnt,
 	struct dentry	*dentry,
 	struct kstat	*stat)
@@ -678,7 +678,7 @@ linvfs_getattr(
 }
 
 STATIC int
-linvfs_setattr(
+xfs_vn_setattr(
 	struct dentry	*dentry,
 	struct iattr	*attr)
 {
@@ -736,14 +736,14 @@ linvfs_setattr(
 }
 
 STATIC void
-linvfs_truncate(
+xfs_vn_truncate(
 	struct inode	*inode)
 {
 	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
 }
 
 STATIC int
-linvfs_setxattr(
+xfs_vn_setxattr(
 	struct dentry	*dentry,
 	const char	*name,
 	const void	*data,
@@ -774,7 +774,7 @@ linvfs_setxattr(
 }
 
 STATIC ssize_t
-linvfs_getxattr(
+xfs_vn_getxattr(
 	struct dentry	*dentry,
 	const char	*name,
 	void		*data,
@@ -804,7 +804,7 @@ linvfs_getxattr(
 }
 
 STATIC ssize_t
-linvfs_listxattr(
+xfs_vn_listxattr(
 	struct dentry		*dentry,
 	char			*data,
 	size_t			size)
@@ -824,7 +824,7 @@ linvfs_listxattr(
 }
 
 STATIC int
-linvfs_removexattr(
+xfs_vn_removexattr(
 	struct dentry	*dentry,
 	const char	*name)
 {
@@ -846,45 +846,45 @@ linvfs_removexattr(
 }
 
 
-struct inode_operations linvfs_file_inode_operations = {
-	.permission		= linvfs_permission,
-	.truncate		= linvfs_truncate,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+struct inode_operations xfs_inode_operations = {
+	.permission		= xfs_vn_permission,
+	.truncate		= xfs_vn_truncate,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
 
-struct inode_operations linvfs_dir_inode_operations = {
-	.create			= linvfs_create,
-	.lookup			= linvfs_lookup,
-	.link			= linvfs_link,
-	.unlink			= linvfs_unlink,
-	.symlink		= linvfs_symlink,
-	.mkdir			= linvfs_mkdir,
-	.rmdir			= linvfs_rmdir,
-	.mknod			= linvfs_mknod,
-	.rename			= linvfs_rename,
-	.permission		= linvfs_permission,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+struct inode_operations xfs_dir_inode_operations = {
+	.create			= xfs_vn_create,
+	.lookup			= xfs_vn_lookup,
+	.link			= xfs_vn_link,
+	.unlink			= xfs_vn_unlink,
+	.symlink		= xfs_vn_symlink,
+	.mkdir			= xfs_vn_mkdir,
+	.rmdir			= xfs_vn_rmdir,
+	.mknod			= xfs_vn_mknod,
+	.rename			= xfs_vn_rename,
+	.permission		= xfs_vn_permission,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
 
-struct inode_operations linvfs_symlink_inode_operations = {
+struct inode_operations xfs_symlink_inode_operations = {
 	.readlink		= generic_readlink,
-	.follow_link		= linvfs_follow_link,
-	.put_link		= linvfs_put_link,
-	.permission		= linvfs_permission,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+	.follow_link		= xfs_vn_follow_link,
+	.put_link		= xfs_vn_put_link,
+	.permission		= xfs_vn_permission,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b5275e4b83e..a8417d7af5f9 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,9 +18,9 @@
 #ifndef __XFS_IOPS_H__
 #define __XFS_IOPS_H__
 
-extern struct inode_operations linvfs_file_inode_operations;
-extern struct inode_operations linvfs_dir_inode_operations;
-extern struct inode_operations linvfs_symlink_inode_operations;
+extern struct inode_operations xfs_inode_operations;
+extern struct inode_operations xfs_dir_inode_operations;
+extern struct inode_operations xfs_symlink_inode_operations;
 
 extern struct file_operations xfs_file_operations;
 extern struct file_operations xfs_dir_file_operations;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9eac4b49a199..cdbaea9e8573 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -127,21 +127,21 @@ xfs_set_inodeops(
 {
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
-		inode->i_op = &linvfs_file_inode_operations;
+		inode->i_op = &xfs_inode_operations;
 		inode->i_fop = &xfs_file_operations;
 		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
-		inode->i_op = &linvfs_dir_inode_operations;
+		inode->i_op = &xfs_dir_inode_operations;
 		inode->i_fop = &xfs_dir_file_operations;
 		break;
 	case S_IFLNK:
-		inode->i_op = &linvfs_symlink_inode_operations;
+		inode->i_op = &xfs_symlink_inode_operations;
 		if (inode->i_blocks)
 			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	default:
-		inode->i_op = &linvfs_file_inode_operations;
+		inode->i_op = &xfs_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 		break;
 	}
-- 
cgit v1.2.3


From a50cd2692617cfb796140a62c0082bce0a7306c7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:06:18 +1100
Subject: [XFS] Switch over from linvfs names for sb/quotactl operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25382a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c  |  18 +++----
 fs/xfs/linux-2.6/xfs_ioctl32.c |  10 ++--
 fs/xfs/linux-2.6/xfs_ioctl32.h |   4 +-
 fs/xfs/linux-2.6/xfs_super.c   | 104 ++++++++++++++++++++---------------------
 fs/xfs/linux-2.6/xfs_super.h   |   2 +-
 5 files changed, 69 insertions(+), 69 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 53ed99112365..391c1353cd42 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -39,7 +39,7 @@ STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
  */
 
 STATIC struct dentry *
-linvfs_decode_fh(
+xfs_fs_decode_fh(
 	struct super_block	*sb,
 	__u32			*fh,
 	int			fh_len,
@@ -85,7 +85,7 @@ linvfs_decode_fh(
 
 
 STATIC int
-linvfs_encode_fh(
+xfs_fs_encode_fh(
 	struct dentry		*dentry,
 	__u32			*fh,
 	int			*max_len,
@@ -132,7 +132,7 @@ linvfs_encode_fh(
 }
 
 STATIC struct dentry *
-linvfs_get_dentry(
+xfs_fs_get_dentry(
 	struct super_block	*sb,
 	void			*data)
 {
@@ -156,7 +156,7 @@ linvfs_get_dentry(
 }
 
 STATIC struct dentry *
-linvfs_get_parent(
+xfs_fs_get_parent(
 	struct dentry		*child)
 {
 	int			error;
@@ -177,9 +177,9 @@ linvfs_get_parent(
 	return parent;
 }
 
-struct export_operations linvfs_export_ops = {
-	.decode_fh		= linvfs_decode_fh,
-	.encode_fh		= linvfs_encode_fh,
-	.get_parent		= linvfs_get_parent,
-	.get_dentry		= linvfs_get_dentry,
+struct export_operations xfs_export_operations = {
+	.decode_fh		= xfs_fs_decode_fh,
+	.encode_fh		= xfs_fs_encode_fh,
+	.get_parent		= xfs_fs_get_parent,
+	.get_dentry		= xfs_fs_get_dentry,
 };
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a7c9ba1a9f7b..e9da0bde36a6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -107,7 +107,7 @@ xfs_ioctl32_bulkstat(
 #endif
 
 STATIC long
-__linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 {
 	int		error;
 	struct		inode *inode = f->f_dentry->d_inode;
@@ -196,19 +196,19 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 }
 
 long
-linvfs_compat_ioctl(
+xfs_file_compat_ioctl(
 	struct file		*f,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return __linvfs_compat_ioctl(0, f, cmd, arg);
+	return xfs_compat_ioctl(0, f, cmd, arg);
 }
 
 long
-linvfs_compat_invis_ioctl(
+xfs_file_compat_invis_ioctl(
 	struct file		*f,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return __linvfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+	return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 011c273bec50..8bdb33ffc032 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,7 @@
 #ifndef __XFS_IOCTL32_H__
 #define __XFS_IOCTL32_H__
 
-extern long linvfs_compat_ioctl(struct file *, unsigned, unsigned long);
-extern long linvfs_compat_invis_ioctl(struct file *f, unsigned, unsigned long);
+extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
+extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned);
 
 #endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cdbaea9e8573..352aa3d40c1f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -59,8 +59,8 @@
 #include <linux/writeback.h>
 #include <linux/kthread.h>
 
-STATIC struct quotactl_ops linvfs_qops;
-STATIC struct super_operations linvfs_sops;
+STATIC struct quotactl_ops xfs_quotactl_operations;
+STATIC struct super_operations xfs_super_operations;
 STATIC kmem_zone_t *xfs_vnode_zone;
 STATIC kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
@@ -332,7 +332,7 @@ xfs_blkdev_issue_flush(
 }
 
 STATIC struct inode *
-linvfs_alloc_inode(
+xfs_fs_alloc_inode(
 	struct super_block	*sb)
 {
 	vnode_t			*vp;
@@ -344,14 +344,14 @@ linvfs_alloc_inode(
 }
 
 STATIC void
-linvfs_destroy_inode(
+xfs_fs_destroy_inode(
 	struct inode		*inode)
 {
 	kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
 }
 
 STATIC void
-linvfs_inode_init_once(
+xfs_fs_inode_init_once(
 	void			*vnode,
 	kmem_zone_t		*zonep,
 	unsigned long		flags)
@@ -367,7 +367,7 @@ xfs_init_zones(void)
 	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
 					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
 					KM_ZONE_SPREAD,
-					linvfs_inode_init_once);
+					xfs_fs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
 
@@ -405,7 +405,7 @@ xfs_destroy_zones(void)
  * since this is when the inode itself becomes flushable.
  */
 STATIC int
-linvfs_write_inode(
+xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
@@ -429,13 +429,13 @@ linvfs_write_inode(
 }
 
 STATIC void
-linvfs_clear_inode(
+xfs_fs_clear_inode(
 	struct inode		*inode)
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 	int			error, cache;
 
-	vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
@@ -608,7 +608,7 @@ xfssyncd(
 }
 
 STATIC int
-linvfs_start_syncd(
+xfs_fs_start_syncd(
 	vfs_t			*vfsp)
 {
 	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
@@ -620,20 +620,20 @@ linvfs_start_syncd(
 }
 
 STATIC void
-linvfs_stop_syncd(
+xfs_fs_stop_syncd(
 	vfs_t			*vfsp)
 {
 	kthread_stop(vfsp->vfs_sync_task);
 }
 
 STATIC void
-linvfs_put_super(
+xfs_fs_put_super(
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
 	int			error;
 
-	linvfs_stop_syncd(vfsp);
+	xfs_fs_stop_syncd(vfsp);
 	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
 	if (!error)
 		VFS_UNMOUNT(vfsp, 0, NULL, error);
@@ -647,7 +647,7 @@ linvfs_put_super(
 }
 
 STATIC void
-linvfs_write_super(
+xfs_fs_write_super(
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
@@ -663,7 +663,7 @@ linvfs_write_super(
 }
 
 STATIC int
-linvfs_sync_super(
+xfs_fs_sync_super(
 	struct super_block	*sb,
 	int			wait)
 {
@@ -702,7 +702,7 @@ linvfs_sync_super(
 }
 
 STATIC int
-linvfs_statfs(
+xfs_fs_statfs(
 	struct super_block	*sb,
 	struct kstatfs		*statp)
 {
@@ -714,7 +714,7 @@ linvfs_statfs(
 }
 
 STATIC int
-linvfs_remount(
+xfs_fs_remount(
 	struct super_block	*sb,
 	int			*flags,
 	char			*options)
@@ -731,14 +731,14 @@ linvfs_remount(
 }
 
 STATIC void
-linvfs_freeze_fs(
+xfs_fs_lockfs(
 	struct super_block	*sb)
 {
 	VFS_FREEZE(LINVFS_GET_VFS(sb));
 }
 
 STATIC int
-linvfs_show_options(
+xfs_fs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
@@ -750,7 +750,7 @@ linvfs_show_options(
 }
 
 STATIC int
-linvfs_quotasync(
+xfs_fs_quotasync(
 	struct super_block	*sb,
 	int			type)
 {
@@ -762,7 +762,7 @@ linvfs_quotasync(
 }
 
 STATIC int
-linvfs_getxstate(
+xfs_fs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
 {
@@ -774,7 +774,7 @@ linvfs_getxstate(
 }
 
 STATIC int
-linvfs_setxstate(
+xfs_fs_setxstate(
 	struct super_block	*sb,
 	unsigned int		flags,
 	int			op)
@@ -787,7 +787,7 @@ linvfs_setxstate(
 }
 
 STATIC int
-linvfs_getxquota(
+xfs_fs_getxquota(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -803,7 +803,7 @@ linvfs_getxquota(
 }
 
 STATIC int
-linvfs_setxquota(
+xfs_fs_setxquota(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -819,7 +819,7 @@ linvfs_setxquota(
 }
 
 STATIC int
-linvfs_fill_super(
+xfs_fs_fill_super(
 	struct super_block	*sb,
 	void			*data,
 	int			silent)
@@ -844,10 +844,10 @@ linvfs_fill_super(
 
 	sb_min_blocksize(sb, BBSIZE);
 #ifdef CONFIG_XFS_EXPORT
-	sb->s_export_op = &linvfs_export_ops;
+	sb->s_export_op = &xfs_export_operations;
 #endif
-	sb->s_qcop = &linvfs_qops;
-	sb->s_op = &linvfs_sops;
+	sb->s_qcop = &xfs_quotactl_operations;
+	sb->s_op = &xfs_super_operations;
 
 	VFS_MOUNT(vfsp, args, NULL, error);
 	if (error) {
@@ -880,7 +880,7 @@ linvfs_fill_super(
 		error = EINVAL;
 		goto fail_vnrele;
 	}
-	if ((error = linvfs_start_syncd(vfsp)))
+	if ((error = xfs_fs_start_syncd(vfsp)))
 		goto fail_vnrele;
 	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
 
@@ -905,41 +905,41 @@ fail_vfsop:
 }
 
 STATIC struct super_block *
-linvfs_get_sb(
+xfs_fs_get_sb(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
 	void			*data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
-}
-
-STATIC struct super_operations linvfs_sops = {
-	.alloc_inode		= linvfs_alloc_inode,
-	.destroy_inode		= linvfs_destroy_inode,
-	.write_inode		= linvfs_write_inode,
-	.clear_inode		= linvfs_clear_inode,
-	.put_super		= linvfs_put_super,
-	.write_super		= linvfs_write_super,
-	.sync_fs		= linvfs_sync_super,
-	.write_super_lockfs	= linvfs_freeze_fs,
-	.statfs			= linvfs_statfs,
-	.remount_fs		= linvfs_remount,
-	.show_options		= linvfs_show_options,
+	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+STATIC struct super_operations xfs_super_operations = {
+	.alloc_inode		= xfs_fs_alloc_inode,
+	.destroy_inode		= xfs_fs_destroy_inode,
+	.write_inode		= xfs_fs_write_inode,
+	.clear_inode		= xfs_fs_clear_inode,
+	.put_super		= xfs_fs_put_super,
+	.write_super		= xfs_fs_write_super,
+	.sync_fs		= xfs_fs_sync_super,
+	.write_super_lockfs	= xfs_fs_lockfs,
+	.statfs			= xfs_fs_statfs,
+	.remount_fs		= xfs_fs_remount,
+	.show_options		= xfs_fs_show_options,
 };
 
-STATIC struct quotactl_ops linvfs_qops = {
-	.quota_sync		= linvfs_quotasync,
-	.get_xstate		= linvfs_getxstate,
-	.set_xstate		= linvfs_setxstate,
-	.get_xquota		= linvfs_getxquota,
-	.set_xquota		= linvfs_setxquota,
+STATIC struct quotactl_ops xfs_quotactl_operations = {
+	.quota_sync		= xfs_fs_quotasync,
+	.get_xstate		= xfs_fs_getxstate,
+	.set_xstate		= xfs_fs_setxstate,
+	.get_xquota		= xfs_fs_getxquota,
+	.set_xquota		= xfs_fs_setxquota,
 };
 
 STATIC struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
-	.get_sb			= linvfs_get_sb,
+	.get_sb			= xfs_fs_get_sb,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index df59408dca06..5e1b525cb1eb 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -120,6 +120,6 @@ extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
 extern void xfs_blkdev_put(struct block_device *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
-extern struct export_operations linvfs_export_ops;
+extern struct export_operations xfs_export_operations;
 
 #endif	/* __XFS_SUPER_H__ */
-- 
cgit v1.2.3


From f020b67f3c0c071ed5b2281bfa717125c8a39ad6 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 14:07:24 +1100
Subject: [XFS] Fix assert to check that in-core extents are inline only.

SGI-PV: 950678
SGI-Modid: xfs-linux-melb:xfs-kern:207634a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 9f0ed4869d47..64a02eaf1dfe 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3414,7 +3414,8 @@ xfs_bmap_local_to_extents(
 
 		args.tp = tp;
 		args.mp = ip->i_mount;
-		ASSERT(ifp->if_flags & XFS_IFINLINE);
+		ASSERT((ifp->if_flags &
+			(XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
 		/*
 		 * Allocate a block.  We know we need only one, since the
 		 * file currently fits in an inode.
@@ -3445,7 +3446,6 @@ xfs_bmap_local_to_extents(
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 		xfs_iext_add(ifp, 0, 1);
-		ASSERT((ifp->if_flags & (XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFEXTENTS);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
-- 
cgit v1.2.3


From f30a1211119741d2c1063ad613bec8434fb9d099 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:07:36 +1100
Subject: [XFS] Dynamically allocate the xfs_dinode_core_t structure to reduce
 our stack footprint in xfs_ialloc_ag_alloc.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25420a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 8f3fae1aa98a..0024892841a3 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -138,8 +138,6 @@ xfs_ialloc_ag_alloc(
 	int		version;	/* inode version number to use */
 	int		isaligned;	/* inode allocation at stripe unit */
 					/* boundary */
-	xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
-					/* inodes */
 
 	args.tp = tp;
 	args.mp = tp->t_mountp;
@@ -250,10 +248,6 @@ xfs_ialloc_ag_alloc(
 	else
 		version = XFS_DINODE_VERSION_1;
 
-	memset(&dic, 0, sizeof(xfs_dinode_core_t));
-	INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
-	INT_SET(dic.di_version, ARCH_CONVERT, version);
-
 	for (j = 0; j < nbufs; j++) {
 		/*
 		 * Get the block.
@@ -266,12 +260,13 @@ xfs_ialloc_ag_alloc(
 		ASSERT(fbuf);
 		ASSERT(!XFS_BUF_GETERROR(fbuf));
 		/*
-		 * Loop over the inodes in this buffer.
+		 * Set initial values for the inodes in this buffer.
 		 */
-
+		xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
 		for (i = 0; i < ninodes; i++) {
 			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
-			memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
+			INT_SET(free->di_core.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+			INT_SET(free->di_core.di_version, ARCH_CONVERT, version);
 			INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
 			xfs_ialloc_log_di(tp, fbuf, i,
 				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
-- 
cgit v1.2.3


From 524fbf5dd1b25acffe6f8a4ed5f3cce1023cfdb8 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:07:53 +1100
Subject: [XFS] Revert kiocb and vattr stack changes, theory is the AIO rework
 will help here and vattr may be small enough.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25423a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 42 +++++++--------------
 fs/xfs/linux-2.6/xfs_iops.c | 89 ++++++++++++++-------------------------------
 2 files changed, 42 insertions(+), 89 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index b050e4079427..c271c9936498 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -145,22 +145,17 @@ __xfs_file_readv(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct kiocb	*kiocb;
+	struct kiocb	kiocb;
 	ssize_t		rval;
 
-	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
-	if (unlikely(!kiocb))
-		return -ENOMEM;
-
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
 
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
+	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb->ki_pos;
-	kfree(kiocb);
+	*ppos = kiocb.ki_pos;
 	return rval;
 }
 
@@ -195,22 +190,17 @@ __xfs_file_writev(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct kiocb	*kiocb;
+	struct kiocb	kiocb;
 	ssize_t		rval;
 
-	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
-	if (unlikely(!kiocb))
-		return -ENOMEM;
-
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 
-	VOP_WRITE(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
+	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb->ki_pos;
-	kfree(kiocb);
+	*ppos = kiocb.ki_pos;
 	return rval;
 }
 
@@ -420,7 +410,7 @@ xfs_file_mmap(
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
 	vma->vm_ops = &xfs_file_vm_ops;
@@ -431,14 +421,10 @@ xfs_file_mmap(
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-	vattr->va_mask = XFS_AT_UPDATIME;
-	VOP_SETATTR(vp, vattr, XFS_AT_UPDATIME, NULL, error);
+	vattr.va_mask = XFS_AT_UPDATIME;
+	VOP_SETATTR(vp, &vattr, XFS_AT_UPDATIME, NULL, error);
 	if (likely(!error))
-		__vn_revalidate(vp, vattr);	/* update flags */
-	kfree(vattr);
+		__vn_revalidate(vp, &vattr);	/* update flags */
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 93b9e6e43f20..0a508580e572 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -295,7 +295,7 @@ xfs_vn_mknod(
 	dev_t		rdev)
 {
 	struct inode	*ip;
-	vattr_t		*vattr;
+	vattr_t		vattr = { 0 };
 	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
@@ -308,13 +308,8 @@ xfs_vn_mknod(
 	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	if (unlikely(test_default_acl && test_default_acl(dvp))) {
 		if (!_ACL_ALLOC(default_acl)) {
-			kfree(vattr);
 			return -ENOMEM;
 		}
 		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
@@ -326,20 +321,19 @@ xfs_vn_mknod(
 	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
-	memset(vattr, 0, sizeof(*vattr));
-	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-	vattr->va_mode = mode;
+	vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr.va_mode = mode;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		vattr->va_rdev = sysv_encode_dev(rdev);
-		vattr->va_mask |= XFS_AT_RDEV;
+		vattr.va_rdev = sysv_encode_dev(rdev);
+		vattr.va_mask |= XFS_AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
-		VOP_CREATE(dvp, dentry, vattr, &vp, NULL, error);
+		VOP_CREATE(dvp, dentry, &vattr, &vp, NULL, error);
 		break;
 	case S_IFDIR:
-		VOP_MKDIR(dvp, dentry, vattr, &vp, NULL, error);
+		VOP_MKDIR(dvp, dentry, &vattr, &vp, NULL, error);
 		break;
 	default:
 		error = EINVAL;
@@ -354,7 +348,7 @@ xfs_vn_mknod(
 
 	if (unlikely(default_acl)) {
 		if (!error) {
-			error = _ACL_INHERIT(vp, vattr, default_acl);
+			error = _ACL_INHERIT(vp, &vattr, default_acl);
 			if (!error)
 				VMODIFY(vp);
 			else
@@ -370,11 +364,10 @@ xfs_vn_mknod(
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			xfs_validate_fields(ip, vattr);
+			xfs_validate_fields(ip, &vattr);
 		d_instantiate(dentry, ip);
-		xfs_validate_fields(dir, vattr);
+		xfs_validate_fields(dir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -429,17 +422,13 @@ xfs_vn_link(
 	struct inode	*ip;	/* inode of guy being linked to */
 	vnode_t		*tdvp;	/* target directory for new name/link */
 	vnode_t		*vp;	/* vp of name being linked */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	tdvp = LINVFS_GET_VP(dir);
 	vp = LINVFS_GET_VP(ip);
 
@@ -447,10 +436,9 @@ xfs_vn_link(
 	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		xfs_validate_fields(ip, vattr);
+		xfs_validate_fields(ip, &vattr);
 		d_instantiate(dentry, ip);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -461,22 +449,17 @@ xfs_vn_unlink(
 {
 	struct inode	*inode;
 	vnode_t		*dvp;	/* directory containing name to remove */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	inode = dentry->d_inode;
 	dvp = LINVFS_GET_VP(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		xfs_validate_fields(dir, vattr);	/* size needs update */
-		xfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, &vattr);	/* size needs update */
+		xfs_validate_fields(inode, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -487,7 +470,7 @@ xfs_vn_symlink(
 	const char	*symname)
 {
 	struct inode	*ip;
-	vattr_t		*vattr;
+	vattr_t		vattr = { 0 };
 	vnode_t		*dvp;	/* directory containing name of symlink */
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
@@ -495,27 +478,21 @@ xfs_vn_symlink(
 	dvp = LINVFS_GET_VP(dir);
 	cvp = NULL;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
-	memset(vattr, 0, sizeof(*vattr));
-	vattr->va_mode = S_IFLNK |
+	vattr.va_mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
-	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, &vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
 		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			xfs_validate_fields(dir, vattr);
-			xfs_validate_fields(ip, vattr);
+			xfs_validate_fields(dir, &vattr);
+			xfs_validate_fields(ip, &vattr);
 		}
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -526,19 +503,14 @@ xfs_vn_rmdir(
 {
 	struct inode	*inode = dentry->d_inode;
 	vnode_t		*dvp = LINVFS_GET_VP(dir);
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	VOP_RMDIR(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		xfs_validate_fields(inode, vattr);
-		xfs_validate_fields(dir, vattr);
+		xfs_validate_fields(inode, &vattr);
+		xfs_validate_fields(dir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -552,25 +524,20 @@ xfs_vn_rename(
 	struct inode	*new_inode = ndentry->d_inode;
 	vnode_t		*fvp;	/* from directory */
 	vnode_t		*tvp;	/* target directory */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	fvp = LINVFS_GET_VP(odir);
 	tvp = LINVFS_GET_VP(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
 		if (new_inode)
-			xfs_validate_fields(new_inode, vattr);
-		xfs_validate_fields(odir, vattr);
+			xfs_validate_fields(new_inode, &vattr);
+		xfs_validate_fields(odir, &vattr);
 		if (ndir != odir)
-			xfs_validate_fields(ndir, vattr);
+			xfs_validate_fields(ndir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
-- 
cgit v1.2.3


From 8867bc9bf0aed7181aa72c7c938c6ce830b75166 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Fri, 17 Mar 2006 17:25:04 +1100
Subject: [XFS] There are a few problems with the new
 xfs_bmap_search_multi_extents() wrapper function that I introduced in mod
 xfs-linux:xfs-kern:207393a. The function was added as a wrapper around
 xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS
 interface.  The idea of the function was basically to extract the target
 extent buffer (if muli- level extent allocation mode), then call
 xfs_bmap_do_search_extents() with either a pointer to the first extent in the
 target buffer or a pointer to the first extent in the file, depending on
 which extent mode was being used.  However, in addition to locating the
 target extent record for block bno, xfs_bmap_do_search_extents() also sets
 four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp. Passing
 only the target extent buffer to xfs_bmap_do_search_extents() causes *eofp to
 be set incorrectly if the extent is at the end of the target list but there
 are actually more extents in the next er_extbuf. Likewise, if the extent is
 the first one in the buffer but NOT the first in the file, *prevp is
 incorrectly set to NULL.  Adding the needed functionality to
 xfs_bmap_search_multi_extents() to re-set any incorrectly set fields is
 redundant and makes the call to xfs_bmap_do_search_extents() not make much
 sense when multi-level extent allocation mode is being used.  This mod
 basically extracts the two functional components from
 xfs_bmap_do_search_extents(), with the intent of obsoleting/removing
 xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent changes
 are checked in.  The two components are:  1) The binary search to locate the
 target extent record, and 2) Setting the four parameters needed by the caller
 (*lastx, *eofp, *gotp, *prevp).  Component 1: I created a new function in
 xfs_inode.c called xfs_iext_bno_to_ext(), which executes the binary search to
 find the target extent record. xfs_bmap_search_multi_extents() has been
 modified to call xfs_iext_bno_to_ext() rather than
 xfs_bmap_do_search_extents().  Component 2: The parameter setting
 functionality has been added to xfs_bmap_search_multi_extents(), eliminating
 the need for xfs_bmap_do_search_extents().  These changes make the removal of
 xfs_bmap_do_search_extents() trival once the CXFS changes are in place. They
 also allow us to maintain the current XFS interface, using the new search
 function introduced in mod xfs-linux:xfs-kern:207393a.

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207866a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c  | 57 ++++++++++++++++++++++--------------------
 fs/xfs/xfs_inode.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_inode.h |  1 +
 3 files changed, 102 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 64a02eaf1dfe..2d702e4a74a3 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3575,10 +3575,11 @@ xfs_bmap_do_search_extents(
 }
 
 /*
- * Call xfs_bmap_do_search_extents() to search for the extent
- * record containing block bno. If in multi-level in-core extent
- * allocation mode, find and extract the target extent buffer,
- * otherwise just use the direct extent list.
+ * Search the extent records for the entry containing block bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies
+ * past eof, *eofp will be set, and *prevp will contain the last
+ * entry (null if none).  Else, *lastxp will be set to the index
+ * of the found entry; *gotp will contain the entry.
  */
 xfs_bmbt_rec_t *			/* pointer to found extent entry */
 xfs_bmap_search_multi_extents(
@@ -3589,36 +3590,38 @@ xfs_bmap_search_multi_extents(
 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
 	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
 {
-	xfs_bmbt_rec_t	*base;		/* base of extent records */
 	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
 	xfs_extnum_t	lastx;		/* last extent index */
-	xfs_extnum_t	nextents;	/* number of file extents */
 
 	/*
-	 * For multi-level extent allocation mode, find the
-	 * target extent list and pass only the contiguous
-	 * list to xfs_bmap_do_search_extents. Convert lastx
-	 * from a file extent index to an index within the
-	 * target extent list.
+	 * Initialize the extent entry structure to catch access to
+	 * uninitialized br_startblock field.
 	 */
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		int	erp_idx = 0;
-		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
-		base = erp->er_extbuf;
-		nextents = erp->er_extcount;
-		lastx = ifp->if_lastex - erp->er_extoff;
-	} else {
-		base = &ifp->if_u1.if_extents[0];
-		nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-		lastx = ifp->if_lastex;
+	gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
+	gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
+	gotp->br_state = XFS_EXT_INVALID;
+#if XFS_BIG_BLKNOS
+	gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
+#else
+	gotp->br_startblock = 0xffffa5a5;
+#endif
+	prevp->br_startoff = NULLFILEOFF;
+
+	ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
+	if (lastx > 0) {
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
 	}
-	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno,
-					eofp, lastxp, gotp, prevp);
-	/* Convert lastx back to file-based index */
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		*lastxp += erp->er_extoff;
+	if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+		xfs_bmbt_get_all(ep, gotp);
+		*eofp = 0;
+	} else {
+		if (lastx > 0) {
+			*gotp = *prevp;
+		}
+		*eofp = 1;
+		ep = NULL;
 	}
+	*lastxp = lastx;
 	return ep;
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 580fa0758039..a16df2d435fe 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4265,12 +4265,81 @@ xfs_iext_destroy(
 	ifp->if_bytes = 0;
 }
 
+/*
+ * Return a pointer to the extent record for file system block bno.
+ */
+xfs_bmbt_rec_t *			/* pointer to found extent record */
+xfs_iext_bno_to_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	xfs_extnum_t	*idxp)		/* index of target extent */
+{
+	xfs_bmbt_rec_t	*base;		/* pointer to first extent */
+	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
+	xfs_bmbt_rec_t	*ep = NULL;	/* pointer to target extent */
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	int		high;		/* upper boundry in search */
+	xfs_extnum_t	idx = 0;	/* index of target extent */
+	int		low;		/* lower boundry in search */
+	xfs_extnum_t	nextents;	/* number of file extents */
+	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (nextents == 0) {
+		*idxp = 0;
+		return NULL;
+	}
+	low = 0;
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		/* Find target extent list */
+		int	erp_idx = 0;
+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+		base = erp->er_extbuf;
+		high = erp->er_extcount - 1;
+	} else {
+		base = ifp->if_u1.if_extents;
+		high = nextents - 1;
+	}
+	/* Binary search extent records */
+	while (low <= high) {
+		idx = (low + high) >> 1;
+		ep = base + idx;
+		startoff = xfs_bmbt_get_startoff(ep);
+		blockcount = xfs_bmbt_get_blockcount(ep);
+		if (bno < startoff) {
+			high = idx - 1;
+		} else if (bno >= startoff + blockcount) {
+			low = idx + 1;
+		} else {
+			/* Convert back to file-based extent index */
+			if (ifp->if_flags & XFS_IFEXTIREC) {
+				idx += erp->er_extoff;
+			}
+			*idxp = idx;
+			return ep;
+		}
+	}
+	/* Convert back to file-based extent index */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		idx += erp->er_extoff;
+	}
+	if (bno >= startoff + blockcount) {
+		if (++idx == nextents) {
+			ep = NULL;
+		} else {
+			ep = xfs_iext_get_ext(ifp, idx);
+		}
+	}
+	*idxp = idx;
+	return ep;
+}
+
 /*
  * Return a pointer to the indirection array entry containing the
  * extent record for filesystem block bno. Store the index of the
  * target irec in *erp_idxp.
  */
-xfs_ext_irec_t *
+xfs_ext_irec_t *			/* pointer to found extent record */
 xfs_iext_bno_to_irec(
 	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_fileoff_t	bno,		/* block number to search for */
@@ -4278,7 +4347,7 @@ xfs_iext_bno_to_irec(
 {
 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
-	xfs_extnum_t	erp_idx;	/* indirection array index */
+	int		erp_idx;	/* indirection array index */
 	int		nlists;		/* number of extent irec's (lists) */
 	int		high;		/* binary search upper limit */
 	int		low;		/* binary search lower limit */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3c1df1d642fa..006396764cbc 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -478,6 +478,7 @@ void		xfs_iext_indirect_to_direct(xfs_ifork_t *);
 void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
 void		xfs_iext_destroy(xfs_ifork_t *);
+xfs_bmbt_rec_t	*xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
 xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
 xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
 void		xfs_iext_irec_init(xfs_ifork_t *);
-- 
cgit v1.2.3


From ec86dc02fdc062d0d298814b1e78b482ab38caf7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:25:36 +1100
Subject: [XFS] Complete transition away from linvfs naming convention,
 finally.

SGI-PV: 947038
SGI-Modid: xfs-linux-melb:xfs-kern:25474a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c    | 12 +++++-----
 fs/xfs/linux-2.6/xfs_export.c  | 10 ++++----
 fs/xfs/linux-2.6/xfs_file.c    | 30 ++++++++++++------------
 fs/xfs/linux-2.6/xfs_fs_subr.c |  6 ++---
 fs/xfs/linux-2.6/xfs_ioctl.c   |  8 +++----
 fs/xfs/linux-2.6/xfs_ioctl32.c |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c    | 48 +++++++++++++++++++-------------------
 fs/xfs/linux-2.6/xfs_linux.h   |  2 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |  2 +-
 fs/xfs/linux-2.6/xfs_super.c   | 52 +++++++++++++++++++-----------------------
 fs/xfs/linux-2.6/xfs_super.h   |  5 ----
 fs/xfs/linux-2.6/xfs_vfs.c     | 17 +++++++++++++-
 fs/xfs/linux-2.6/xfs_vfs.h     |  3 ++-
 fs/xfs/linux-2.6/xfs_vnode.c   |  6 ++---
 fs/xfs/linux-2.6/xfs_vnode.h   | 32 +++++++++++++++-----------
 fs/xfs/xfs_attr.c              |  4 ++--
 fs/xfs/xfs_dfrag.c             |  4 ++--
 fs/xfs/xfs_iget.c              |  6 ++---
 fs/xfs/xfs_inode.c             |  4 ++--
 fs/xfs/xfs_vnodeops.c          |  4 ++--
 20 files changed, 135 insertions(+), 122 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 2b610c7ba322..448912f0e753 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -76,7 +76,7 @@ xfs_page_trace(
 	int		mask)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	loff_t		isize = i_size_read(inode);
 	loff_t		offset = page_offset(page);
 	int		delalloc = -1, unmapped = -1, unwritten = -1;
@@ -214,7 +214,7 @@ xfs_alloc_ioend(
 	ioend->io_uptodate = 1; /* cleared if any I/O fails */
 	ioend->io_list = NULL;
 	ioend->io_type = type;
-	ioend->io_vnode = LINVFS_GET_VP(inode);
+	ioend->io_vnode = vn_from_inode(inode);
 	ioend->io_buffer_head = NULL;
 	ioend->io_buffer_tail = NULL;
 	atomic_inc(&ioend->io_vnode->v_iocount);
@@ -239,7 +239,7 @@ xfs_map_blocks(
 	xfs_iomap_t		*mapp,
 	int			flags)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error, nmaps = 1;
 
 	VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
@@ -1229,7 +1229,7 @@ __xfs_get_block(
 	int			direct,
 	bmapi_flags_t		flags)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	xfs_iomap_t		iomap;
 	xfs_off_t		offset;
 	ssize_t			size;
@@ -1371,7 +1371,7 @@ xfs_vm_direct_IO(
 {
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_iomap_t	iomap;
 	int		maps = 1;
 	int		error;
@@ -1410,7 +1410,7 @@ xfs_vm_bmap(
 	sector_t		block)
 {
 	struct inode		*inode = (struct inode *)mapping->host;
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error;
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 391c1353cd42..b768ea910bbe 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -97,7 +97,7 @@ xfs_fs_encode_fh(
 	int			len;
 	int			is64 = 0;
 #if XFS_BIG_INUMS
-	vfs_t			*vfs = LINVFS_GET_VFS(inode->i_sb);
+	vfs_t			*vfs = vfs_from_sb(inode->i_sb);
 
 	if (!(vfs->vfs_flag & VFS_32BITINODES)) {
 		/* filesystem may contain 64bit inode numbers */
@@ -139,14 +139,14 @@ xfs_fs_get_dentry(
 	vnode_t			*vp;
 	struct inode		*inode;
 	struct dentry		*result;
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_VGET(vfsp, &vp, (fid_t *)data, error);
 	if (error || vp == NULL)
 		return ERR_PTR(-ESTALE) ;
 
-	inode = LINVFS_GET_IP(vp);
+	inode = vn_to_inode(vp);
 	result = d_alloc_anon(inode);
         if (!result) {
 		iput(inode);
@@ -164,12 +164,12 @@ xfs_fs_get_parent(
 	struct dentry		*parent;
 
 	cvp = NULL;
-	vp = LINVFS_GET_VP(child->d_inode);
+	vp = vn_from_inode(child->d_inode);
 	VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
-	parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+	parent = d_alloc_anon(vn_to_inode(cvp));
 	if (unlikely(!parent)) {
 		VN_RELE(cvp);
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c271c9936498..185567a6a561 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -58,7 +58,7 @@ __xfs_file_read(
 {
 	struct iovec		iov = {buf, count};
 	struct file		*file = iocb->ki_filp;
-	vnode_t			*vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(file->f_dentry->d_inode);
 	ssize_t			rval;
 
 	BUG_ON(iocb->ki_pos != pos);
@@ -102,7 +102,7 @@ __xfs_file_write(
 	struct iovec	iov = {(void __user *)buf, count};
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	ssize_t		rval;
 
 	BUG_ON(iocb->ki_pos != pos);
@@ -144,7 +144,7 @@ __xfs_file_readv(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -189,7 +189,7 @@ __xfs_file_writev(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -233,7 +233,7 @@ xfs_file_sendfile(
 	read_actor_t		actor,
 	void			*target)
 {
-	vnode_t			*vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
 	ssize_t			rval;
 
 	VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
@@ -246,7 +246,7 @@ xfs_file_open(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 
 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
@@ -263,7 +263,7 @@ xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error = 0;
 
 	if (vp)
@@ -279,7 +279,7 @@ xfs_file_fsync(
 	int		datasync)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 	int		flags = FSYNC_WAIT;
 
@@ -307,7 +307,7 @@ xfs_vm_nopage(
 	int			*type)
 {
 	struct inode	*inode = area->vm_file->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 	int		error;
 
@@ -340,7 +340,7 @@ xfs_file_readdir(
 	xfs_off_t	start_offset, curr_offset;
 	xfs_dirent_t	*dbp = NULL;
 
-	vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	vp = vn_from_inode(filp->f_dentry->d_inode);
 	ASSERT(vp);
 
 	/* Try fairly hard to get memory */
@@ -409,7 +409,7 @@ xfs_file_mmap(
 	struct vm_area_struct *vma)
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vnode_t		*vp = vn_from_inode(ip);
 	vattr_t		vattr;
 	int		error;
 
@@ -437,7 +437,7 @@ xfs_file_ioctl(
 {
 	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 
 	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
 	VMODIFY(vp);
@@ -459,7 +459,7 @@ xfs_file_ioctl_invis(
 {
 	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 
 	ASSERT(vp);
 	VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
@@ -481,7 +481,7 @@ xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
-	vnode_t		*vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
 	int		error = 0;
 
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
@@ -507,7 +507,7 @@ STATIC int
 xfs_file_open_exec(
 	struct inode	*inode)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 	int		error = 0;
 	xfs_inode_t	*ip;
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 4fa4b1a5187e..575f2a790f31 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -57,7 +57,7 @@ fs_tosspages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp))
 		truncate_inode_pages(ip->i_mapping, first);
@@ -76,7 +76,7 @@ fs_flushinval_pages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp)) {
 		filemap_write_and_wait(ip->i_mapping);
@@ -98,7 +98,7 @@ fs_flush_pages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp)) {
 		filemap_fdatawrite(ip->i_mapping);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e435ad17419d..84478491609b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -138,7 +138,7 @@ xfs_find_handle(
 	}
 
 	/* we need the vnode */
-	vp = LINVFS_GET_VP(inode);
+	vp = vn_from_inode(inode);
 
 	/* now we can grab the fsid */
 	memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
@@ -256,7 +256,7 @@ xfs_vget_fsop_handlereq(
 	}
 
 	vpp = XFS_ITOV(ip);
-	inodep = LINVFS_GET_IP(vpp);
+	inodep = vn_to_inode(vpp);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	*vp = vpp;
@@ -715,7 +715,7 @@ xfs_ioctl(
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
 
-	vp = LINVFS_GET_VP(inode);
+	vp = vn_from_inode(inode);
 
 	vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
 
@@ -1270,7 +1270,7 @@ xfs_ioc_xattr(
 	}
 
 	case XFS_IOC_GETVERSION: {
-		flags = LINVFS_GET_IP(vp)->i_generation;
+		flags = vn_to_inode(vp)->i_generation;
 		if (copy_to_user(arg, &flags, sizeof(flags)))
 			error = -EFAULT;
 		break;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index e9da0bde36a6..b6321abd9a81 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -111,7 +111,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 {
 	int		error;
 	struct		inode *inode = f->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_to_inode(inode);
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0a508580e572..64c3395074de 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -106,7 +106,7 @@ xfs_ichgtime(
 	xfs_inode_t	*ip,
 	int		flags)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	timespec_t	tv;
 
 	nanotime(&tv);
@@ -202,7 +202,7 @@ xfs_validate_fields(
 	struct inode	*ip,
 	struct vattr	*vattr)
 {
-	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vnode_t		*vp = vn_from_inode(ip);
 	int		error;
 
 	vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
@@ -228,7 +228,7 @@ xfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 	size_t		length;
 	void		*value;
 	char		*name;
@@ -277,7 +277,7 @@ xfs_cleanup_inode(
 	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
-	teardown.d_inode = LINVFS_GET_IP(vp);
+	teardown.d_inode = vn_to_inode(vp);
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
@@ -296,7 +296,7 @@ xfs_vn_mknod(
 {
 	struct inode	*ip;
 	vattr_t		vattr = { 0 };
-	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
+	vnode_t		*vp = NULL, *dvp = vn_from_inode(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
 	int		error;
@@ -359,7 +359,7 @@ xfs_vn_mknod(
 
 	if (likely(!error)) {
 		ASSERT(vp);
-		ip = LINVFS_GET_IP(vp);
+		ip = vn_to_inode(vp);
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
@@ -396,7 +396,7 @@ xfs_vn_lookup(
 	struct dentry	*dentry,
 	struct nameidata *nd)
 {
-	struct vnode	*vp = LINVFS_GET_VP(dir), *cvp;
+	struct vnode	*vp = vn_from_inode(dir), *cvp;
 	int		error;
 
 	if (dentry->d_name.len >= MAXNAMELEN)
@@ -410,7 +410,7 @@ xfs_vn_lookup(
 		return NULL;
 	}
 
-	return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
+	return d_splice_alias(vn_to_inode(cvp), dentry);
 }
 
 STATIC int
@@ -429,8 +429,8 @@ xfs_vn_link(
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
-	tdvp = LINVFS_GET_VP(dir);
-	vp = LINVFS_GET_VP(ip);
+	tdvp = vn_from_inode(dir);
+	vp = vn_from_inode(ip);
 
 	VOP_LINK(tdvp, vp, dentry, NULL, error);
 	if (likely(!error)) {
@@ -453,7 +453,7 @@ xfs_vn_unlink(
 	int		error;
 
 	inode = dentry->d_inode;
-	dvp = LINVFS_GET_VP(dir);
+	dvp = vn_from_inode(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
@@ -475,7 +475,7 @@ xfs_vn_symlink(
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
 
-	dvp = LINVFS_GET_VP(dir);
+	dvp = vn_from_inode(dir);
 	cvp = NULL;
 
 	vattr.va_mode = S_IFLNK |
@@ -487,7 +487,7 @@ xfs_vn_symlink(
 	if (likely(!error && cvp)) {
 		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
-			ip = LINVFS_GET_IP(cvp);
+			ip = vn_to_inode(cvp);
 			d_instantiate(dentry, ip);
 			xfs_validate_fields(dir, &vattr);
 			xfs_validate_fields(ip, &vattr);
@@ -502,7 +502,7 @@ xfs_vn_rmdir(
 	struct dentry	*dentry)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*dvp = LINVFS_GET_VP(dir);
+	vnode_t		*dvp = vn_from_inode(dir);
 	vattr_t		vattr;
 	int		error;
 
@@ -527,8 +527,8 @@ xfs_vn_rename(
 	vattr_t		vattr;
 	int		error;
 
-	fvp = LINVFS_GET_VP(odir);
-	tvp = LINVFS_GET_VP(ndir);
+	fvp = vn_from_inode(odir);
+	tvp = vn_from_inode(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
@@ -573,7 +573,7 @@ xfs_vn_follow_link(
 		return NULL;
 	}
 
-	vp = LINVFS_GET_VP(dentry->d_inode);
+	vp = vn_from_inode(dentry->d_inode);
 
 	iov.iov_base = link;
 	iov.iov_len = MAXPATHLEN;
@@ -616,7 +616,7 @@ xfs_vn_permission(
 	int		mode,
 	struct nameidata *nd)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 
 	mode <<= 6;		/* convert from linux to vnode access bits */
@@ -634,7 +634,7 @@ xfs_vn_getattr(
 	struct kstat	*stat)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error = 0;
 
 	if (unlikely(vp->v_flag & VMODIFIED))
@@ -651,7 +651,7 @@ xfs_vn_setattr(
 {
 	struct inode	*inode = dentry->d_inode;
 	unsigned int	ia_valid = attr->ia_valid;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	vattr_t		vattr = { 0 };
 	int		flags = 0;
 	int		error;
@@ -717,7 +717,7 @@ xfs_vn_setxattr(
 	size_t		size,
 	int		flags)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -747,7 +747,7 @@ xfs_vn_getxattr(
 	void		*data,
 	size_t		size)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -776,7 +776,7 @@ xfs_vn_listxattr(
 	char			*data,
 	size_t			size)
 {
-	vnode_t			*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(dentry->d_inode);
 	int			error, xflags = ATTR_KERNAMELS;
 	ssize_t			result;
 
@@ -795,7 +795,7 @@ xfs_vn_removexattr(
 	struct dentry	*dentry,
 	const char	*name)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index bd88ccb0cad0..1fe09f2d6519 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -234,7 +234,7 @@ BUFFER_FNS(PrivateStart, unwritten);
 #define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()	dump_stack()
 #define xfs_itruncate_data(ip, off)	\
-	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+	(-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off)))
 #define xfs_statvfs_fsid(statp, mp)	\
 	({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
 	   __kernel_fsid_t *fsid = &(statp)->f_fsid;	\
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index e0ab45fbfebd..2ede4bb7ecd0 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -432,7 +432,7 @@ xfs_zero_eof(
 	xfs_fsize_t	isize,		/* current inode size */
 	xfs_fsize_t	end_size)	/* terminal inode size */
 {
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 	xfs_fileoff_t	start_zero_fsb;
 	xfs_fileoff_t	end_zero_fsb;
 	xfs_fileoff_t	zero_count_fsb;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 352aa3d40c1f..d9d28a965bac 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -153,7 +153,7 @@ xfs_revalidate_inode(
 	vnode_t			*vp,
 	xfs_inode_t		*ip)
 {
-	struct inode		*inode = LINVFS_GET_IP(vp);
+	struct inode		*inode = vn_to_inode(vp);
 
 	inode->i_mode	= ip->i_d.di_mode;
 	inode->i_nlink	= ip->i_d.di_nlink;
@@ -210,7 +210,7 @@ xfs_initialize_vnode(
 	int			unlock)
 {
 	xfs_inode_t		*ip = XFS_BHVTOI(inode_bhv);
-	struct inode		*inode = LINVFS_GET_IP(vp);
+	struct inode		*inode = vn_to_inode(vp);
 
 	if (!inode_bhv->bd_vobj) {
 		vp->v_vfsp = bhvtovfs(bdp);
@@ -228,7 +228,7 @@ xfs_initialize_vnode(
 	if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
 		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
 		xfs_set_inodeops(inode);
-	
+
 		ip->i_flags &= ~XFS_INEW;
 		barrier();
 
@@ -340,14 +340,14 @@ xfs_fs_alloc_inode(
 	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
 	if (unlikely(!vp))
 		return NULL;
-	return LINVFS_GET_IP(vp);
+	return vn_to_inode(vp);
 }
 
 STATIC void
 xfs_fs_destroy_inode(
 	struct inode		*inode)
 {
-	kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
+	kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode));
 }
 
 STATIC void
@@ -358,7 +358,7 @@ xfs_fs_inode_init_once(
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 		      SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(LINVFS_GET_IP((vnode_t *)vnode));
+		inode_init_once(vn_to_inode((vnode_t *)vnode));
 }
 
 STATIC int
@@ -409,7 +409,7 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error = 0, flags = FLUSH_INODE;
 
 	if (vp) {
@@ -432,7 +432,7 @@ STATIC void
 xfs_fs_clear_inode(
 	struct inode		*inode)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error, cache;
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -511,7 +511,7 @@ void
 xfs_flush_inode(
 	xfs_inode_t	*ip)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
@@ -536,7 +536,7 @@ void
 xfs_flush_device(
 	xfs_inode_t	*ip)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
@@ -630,7 +630,7 @@ STATIC void
 xfs_fs_put_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	xfs_fs_stop_syncd(vfsp);
@@ -650,7 +650,7 @@ STATIC void
 xfs_fs_write_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	if (sb->s_flags & MS_RDONLY) {
@@ -667,7 +667,7 @@ xfs_fs_sync_super(
 	struct super_block	*sb,
 	int			wait)
 {
-	vfs_t		*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t		*vfsp = vfs_from_sb(sb);
 	int		error;
 	int		flags = SYNC_FSDATA;
 
@@ -706,7 +706,7 @@ xfs_fs_statfs(
 	struct super_block	*sb,
 	struct kstatfs		*statp)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_STATVFS(vfsp, statp, NULL, error);
@@ -719,7 +719,7 @@ xfs_fs_remount(
 	int			*flags,
 	char			*options)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb);
 	int			error;
 
@@ -734,7 +734,7 @@ STATIC void
 xfs_fs_lockfs(
 	struct super_block	*sb)
 {
-	VFS_FREEZE(LINVFS_GET_VFS(sb));
+	VFS_FREEZE(vfs_from_sb(sb));
 }
 
 STATIC int
@@ -742,7 +742,7 @@ xfs_fs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+	struct vfs		*vfsp = vfs_from_sb(mnt->mnt_sb);
 	int			error;
 
 	VFS_SHOWARGS(vfsp, m, error);
@@ -754,7 +754,7 @@ xfs_fs_quotasync(
 	struct super_block	*sb,
 	int			type)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
@@ -766,7 +766,7 @@ xfs_fs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
@@ -779,7 +779,7 @@ xfs_fs_setxstate(
 	unsigned int		flags,
 	int			op)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
@@ -793,7 +793,7 @@ xfs_fs_getxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error, getmode;
 
 	getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
@@ -809,7 +809,7 @@ xfs_fs_setxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error, setmode;
 
 	setmode = (type == USRQUOTA) ? Q_XSETQLIM :
@@ -825,15 +825,11 @@ xfs_fs_fill_super(
 	int			silent)
 {
 	vnode_t			*rootvp;
-	struct vfs		*vfsp = vfs_allocate();
+	struct vfs		*vfsp = vfs_allocate(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb);
 	struct kstatfs		statvfs;
 	int			error, error2;
 
-	vfsp->vfs_super = sb;
-	LINVFS_SET_VFS(sb, vfsp);
-	if (sb->s_flags & MS_RDONLY)
-		vfsp->vfs_flag |= VFS_RDONLY;
 	bhv_insert_all_vfsops(vfsp);
 
 	VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
@@ -871,7 +867,7 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_unmount;
 
-	sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+	sb->s_root = d_alloc_root(vn_to_inode(rootvp));
 	if (!sb->s_root) {
 		error = ENOMEM;
 		goto fail_vnrele;
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5e1b525cb1eb..376b96cb513a 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -98,11 +98,6 @@ extern void xfs_qm_exit(void);
 				XFS_DMAPI_STRING \
 				XFS_DBG_STRING /* DBG must be last */
 
-#define LINVFS_GET_VFS(s) \
-	(vfs_t *)((s)->s_fs_info)
-#define LINVFS_SET_VFS(s, vfsp) \
-	((s)->s_fs_info = vfsp)
-
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_buftarg;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index a6b4084bda88..6f7c9f7a8624 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -227,7 +227,8 @@ vfs_freeze(
 }
 
 vfs_t *
-vfs_allocate( void )
+vfs_allocate(
+	struct super_block	*sb)
 {
 	struct vfs		*vfsp;
 
@@ -236,9 +237,23 @@ vfs_allocate( void )
 	INIT_LIST_HEAD(&vfsp->vfs_sync_list);
 	spin_lock_init(&vfsp->vfs_sync_lock);
 	init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
+
+	vfsp->vfs_super = sb;
+	sb->s_fs_info = vfsp;
+
+	if (sb->s_flags & MS_RDONLY)
+		vfsp->vfs_flag |= VFS_RDONLY;
+
 	return vfsp;
 }
 
+vfs_t *
+vfs_from_sb(
+	struct super_block	*sb)
+{
+	return (vfs_t *)sb->s_fs_info;
+}
+
 void
 vfs_deallocate(
 	struct vfs		*vfsp)
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 57caf9eddee0..8fed356db055 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -193,7 +193,8 @@ typedef struct bhv_vfsops {
 #define vfs_bhv_set_custom(b,o)	( (b)->bhv_custom = (void *)(o))
 #define vfs_bhv_clr_custom(b)	( (b)->bhv_custom = NULL )
 
-extern vfs_t *vfs_allocate(void);
+extern vfs_t *vfs_allocate(struct super_block *);
+extern vfs_t *vfs_from_sb(struct super_block *);
 extern void vfs_deallocate(vfs_t *);
 extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
 extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 225e7dd8b21d..d27c25b27ccd 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -58,7 +58,7 @@ struct vnode *
 vn_initialize(
 	struct inode	*inode)
 {
-	struct vnode	*vp = LINVFS_GET_VP(inode);
+	struct vnode	*vp = vn_from_inode(inode);
 
 	XFS_STATS_INC(vn_active);
 	XFS_STATS_INC(vn_alloc);
@@ -97,7 +97,7 @@ vn_revalidate_core(
 	struct vnode	*vp,
 	vattr_t		*vap)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	inode->i_mode	    = vap->va_mode;
 	inode->i_nlink	    = vap->va_nlink;
@@ -166,7 +166,7 @@ vn_hold(
 	XFS_STATS_INC(vn_hold);
 
 	VN_LOCK(vp);
-	inode = igrab(LINVFS_GET_IP(vp));
+	inode = igrab(vn_to_inode(vp));
 	ASSERT(inode);
 	VN_UNLOCK(vp, 0);
 
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 0cf92ca80ce7..06f5845e9568 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -116,8 +116,14 @@ typedef enum {
 /*
  * Vnode to Linux inode mapping.
  */
-#define LINVFS_GET_VP(inode)	((vnode_t *)list_entry(inode, vnode_t, v_inode))
-#define LINVFS_GET_IP(vp)	(&(vp)->v_inode)
+static inline struct vnode *vn_from_inode(struct inode *inode)
+{
+	return (vnode_t *)list_entry(inode, vnode_t, v_inode);
+}
+static inline struct inode *vn_to_inode(struct vnode *vnode)
+{
+	return &vnode->v_inode;
+}
 
 /*
  * Vnode flags.
@@ -498,7 +504,7 @@ extern void	vn_iowake(struct vnode *vp);
 
 static inline int vn_count(struct vnode *vp)
 {
-	return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+	return atomic_read(&vn_to_inode(vp)->i_count);
 }
 
 /*
@@ -512,16 +518,16 @@ extern vnode_t	*vn_hold(struct vnode *);
 	  vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
 #define VN_RELE(vp)		\
 	  (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
-	   iput(LINVFS_GET_IP(vp)))
+	   iput(vn_to_inode(vp)))
 #else
 #define VN_HOLD(vp)		((void)vn_hold(vp))
-#define VN_RELE(vp)		(iput(LINVFS_GET_IP(vp)))
+#define VN_RELE(vp)		(iput(vn_to_inode(vp)))
 #endif
 
 static inline struct vnode *vn_grab(struct vnode *vp)
 {
-	struct inode *inode = igrab(LINVFS_GET_IP(vp));
-	return inode ? LINVFS_GET_VP(inode) : NULL;
+	struct inode *inode = igrab(vn_to_inode(vp));
+	return inode ? vn_from_inode(inode) : NULL;
 }
 
 /*
@@ -529,7 +535,7 @@ static inline struct vnode *vn_grab(struct vnode *vp)
  */
 #define VNAME(dentry)		((char *) (dentry)->d_name.name)
 #define VNAMELEN(dentry)	((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry)	(LINVFS_GET_VP((dentry)->d_inode))
+#define VNAME_TO_VNODE(dentry)	(vn_from_inode((dentry)->d_inode))
 
 /*
  * Vnode spinlock manipulation.
@@ -558,12 +564,12 @@ static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
  */
 static inline void vn_mark_bad(struct vnode *vp)
 {
-	make_bad_inode(LINVFS_GET_IP(vp));
+	make_bad_inode(vn_to_inode(vp));
 }
 
 static inline int VN_BAD(struct vnode *vp)
 {
-	return is_bad_inode(LINVFS_GET_IP(vp));
+	return is_bad_inode(vn_to_inode(vp));
 }
 
 /*
@@ -588,9 +594,9 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
 /*
  * Some useful predicates.
  */
-#define VN_MAPPED(vp)	mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
-#define VN_CACHED(vp)	(LINVFS_GET_IP(vp)->i_mapping->nrpages)
-#define VN_DIRTY(vp)	mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+#define VN_MAPPED(vp)	mapping_mapped(vn_to_inode(vp)->i_mapping)
+#define VN_CACHED(vp)	(vn_to_inode(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \
 					PAGECACHE_TAG_DIRTY)
 #define VMODIFY(vp)	VN_FLAGSET(vp, VMODIFIED)
 #define VUNMODIFY(vp)	VN_FLAGCLR(vp, VMODIFIED)
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e5e91e9c7e89..acf1b7c9f1db 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2522,7 +2522,7 @@ attr_user_capable(
 	struct vnode	*vp,
 	cred_t		*cred)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
@@ -2540,7 +2540,7 @@ attr_trusted_capable(
 	struct vnode	*vp,
 	cred_t		*cred)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index c6191d00ad27..4968a6358e61 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -83,7 +83,7 @@ xfs_swapext(
 
 	/* Pull information for the target fd */
 	if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) ||
-	    ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL))  {
+	    ((vp = vn_from_inode(fp->f_dentry->d_inode)) == NULL))  {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
@@ -95,7 +95,7 @@ xfs_swapext(
 	}
 
 	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
-	    ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) {
+	    ((tvp = vn_from_inode(tfp->f_dentry->d_inode)) == NULL)) {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e380a1fb79b..3ce35a6f700b 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -258,7 +258,7 @@ again:
 				goto finish_inode;
 
 			} else if (vp != inode_vp) {
-				struct inode *inode = LINVFS_GET_IP(inode_vp);
+				struct inode *inode = vn_to_inode(inode_vp);
 
 				/* The inode is being torn down, pause and
 				 * try again.
@@ -495,7 +495,7 @@ retry:
 	if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
 		xfs_inode_t	*ip;
 
-		vp = LINVFS_GET_VP(inode);
+		vp = vn_from_inode(inode);
 		if (inode->i_state & I_NEW) {
 			vn_initialize(inode);
 			error = xfs_iget_core(vp, mp, tp, ino, flags,
@@ -617,7 +617,7 @@ xfs_iput_new(xfs_inode_t	*ip,
 	     uint		lock_flags)
 {
 	vnode_t		*vp = XFS_ITOV(ip);
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a16df2d435fe..86a8451d10fc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2723,7 +2723,7 @@ xfs_iunpin(
 
 		/* make sync come back and flush this inode */
 		if (vp) {
-			struct inode	*inode = LINVFS_GET_IP(vp);
+			struct inode	*inode = vn_to_inode(vp);
 
 			if (!(inode->i_state & I_NEW))
 				mark_inode_dirty_sync(inode);
@@ -3519,7 +3519,7 @@ xfs_iaccess(
 {
 	int		error;
 	mode_t		orgmode = mode;
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 
 	if (mode & S_IWUSR) {
 		umode_t		imode = inode->i_mode;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index eaab355f5a89..8b5a44fe2865 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1556,7 +1556,7 @@ xfs_release(
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return error;
 			/* Update linux inode block count after free above */
-			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
+			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
 				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 	}
@@ -1637,7 +1637,7 @@ xfs_inactive(
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return VN_INACTIVE_CACHE;
 			/* Update linux inode block count after free above */
-			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
+			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
 				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 		goto out;
-- 
cgit v1.2.3


From 2ddd5928d01ca8eb49f55166411b64a5844a8959 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:25:46 +1100
Subject: [XFS] Correct the dquot reservation component for the link
 transation.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25476a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_trans.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 6a2a1e07505b..2918956553a5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -84,7 +84,7 @@ xfs_calc_rename_reservation(xfs_mount_t *mp)
 STATIC uint
 xfs_calc_link_reservation(xfs_mount_t *mp)
 {
-	return XFS_CALC_LINK_LOG_RES(mp);
+	return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
 }
 
 STATIC uint
-- 
cgit v1.2.3


From b12dd34298cf0cff9f337f667045335140873039 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:04 +1100
Subject: [XFS] Fix an infinite loop issue in bulkstat when a corrupt inode is
 detected.  Thanks to Roger Willcocks.

SGI-PV: 951054
SGI-Modid: xfs-linux-melb:xfs-kern:25477a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c       | 28 ++++++++++++++--------------
 fs/xfs/xfs_inode.h       |  7 ++++---
 fs/xfs/xfs_itable.c      |  5 ++++-
 fs/xfs/xfs_log_recover.c |  2 +-
 fs/xfs/xfs_vfsops.c      |  2 +-
 5 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 86a8451d10fc..b1e95707e7cf 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -253,7 +253,8 @@ xfs_itobp(
 	xfs_inode_t	*ip,
 	xfs_dinode_t	**dipp,
 	xfs_buf_t	**bpp,
-	xfs_daddr_t	bno)
+	xfs_daddr_t	bno,
+	uint		imap_flags)
 {
 	xfs_buf_t	*bp;
 	int		error;
@@ -269,10 +270,9 @@ xfs_itobp(
 		 * inode on disk.
 		 */
 		imap.im_blkno = bno;
-		error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP);
-		if (error != 0) {
+		if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
+					XFS_IMAP_LOOKUP | imap_flags)))
 			return error;
-		}
 
 		/*
 		 * If the inode number maps to a block outside the bounds
@@ -336,9 +336,10 @@ xfs_itobp(
 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
 	 */
 #ifdef DEBUG
-	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
+	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 :
+		(BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
 #else
-	ni = 1;
+	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1;
 #endif
 	for (i = 0; i < ni; i++) {
 		int		di_ok;
@@ -868,9 +869,8 @@ xfs_iread(
 	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
 	 * know that this is a new incore inode.
 	 */
-	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno);
-
-	if (error != 0) {
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0);
+	if (error) {
 		kmem_zone_free(xfs_inode_zone, ip);
 		return error;
 	}
@@ -1895,7 +1895,7 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			return error;
 		}
@@ -2004,7 +2004,7 @@ xfs_iunlink_remove(
 		 * of dealing with the buffer when there is no need to
 		 * change it.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2066,7 +2066,7 @@ xfs_iunlink_remove(
 		 * Now last_ibp points to the buffer previous to us on
 		 * the unlinked list.  Pull us from the list.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -3023,8 +3023,8 @@ xfs_iflush(
 	/*
 	 * Get the buffer containing the on-disk inode.
 	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0);
-	if (error != 0) {
+	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
+	if (error) {
 		xfs_ifunlock(ip);
 		return error;
 	}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 006396764cbc..39ef9c36ea55 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -95,9 +95,10 @@ typedef struct xfs_ifork {
 #define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
 
 /*
- * Flags for xfs_imap() and xfs_dilocate().
+ * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
  */
-#define	XFS_IMAP_LOOKUP		0x1
+#define XFS_IMAP_LOOKUP		0x1
+#define XFS_IMAP_BULKSTAT	0x2
 
 #ifdef __KERNEL__
 struct bhv_desc;
@@ -421,7 +422,7 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
  */
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
-			  xfs_daddr_t);
+			  xfs_daddr_t, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c59450e1be40..32247b6bfee7 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -562,7 +562,8 @@ xfs_bulkstat(
 						if (bp)
 							xfs_buf_relse(bp);
 						error = xfs_itobp(mp, NULL, ip,
-								  &dip, &bp, bno);
+								&dip, &bp, bno,
+								XFS_IMAP_BULKSTAT);
 						if (!error)
 							clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
 						kmem_zone_free(xfs_inode_zone, ip);
@@ -570,6 +571,8 @@ xfs_bulkstat(
 								   mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
 								   XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
 							bp = NULL;
+							ubleft = 0;
+							rval = error;
 							break;
 						}
 					}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7d46cbd6a07a..add13f507ed2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3249,7 +3249,7 @@ xlog_recover_process_iunlinks(
 					 * next inode in the bucket.
 					 */
 					error = xfs_itobp(mp, NULL, ip, &dip,
-							&ibp, 0);
+							&ibp, 0, 0);
 					ASSERT(error || (dip != NULL));
 				}
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 5dd84fe609cc..811a4261fa2c 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1229,7 +1229,7 @@ xfs_sync_inodes(
 					xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 					error = xfs_itobp(mp, NULL, ip,
-							  &dip, &bp, 0);
+							  &dip, &bp, 0, 0);
 					if (!error) {
 						xfs_buf_relse(bp);
 					} else {
-- 
cgit v1.2.3


From a13828b167532a2145c9e3f563a99f810500c7b4 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:14 +1100
Subject: [XFS] Cleanup references to i_sem.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25480a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 2ede4bb7ecd0..ba07b83f988d 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -249,9 +249,8 @@ xfs_read(
 	if (n < size)
 		size = n;
 
-	if (XFS_FORCED_SHUTDOWN(mp)) {
+	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
-	}
 
 	if (unlikely(ioflags & IO_ISDIRECT))
 		mutex_lock(&inode->i_mutex);
@@ -267,7 +266,7 @@ xfs_read(
 					dmflags, &locktype);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-			goto unlock_isem;
+			goto unlock_mutex;
 		}
 	}
 
@@ -281,7 +280,7 @@ xfs_read(
 
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-unlock_isem:
+unlock_mutex:
 	if (unlikely(ioflags & IO_ISDIRECT))
 		mutex_unlock(&inode->i_mutex);
 	return ret;
@@ -573,7 +572,7 @@ xfs_write(
 	vrwlock_t		locktype;
 	size_t			ocount = 0, count;
 	loff_t			pos;
-	int			need_isem = 1, need_flush = 0;
+	int			need_i_mutex = 1, need_flush = 0;
 
 	XFS_STATS_INC(xs_write_calls);
 
@@ -622,14 +621,14 @@ xfs_write(
 			return XFS_ERROR(-EINVAL);
 
 		if (!VN_CACHED(vp) && pos < i_size_read(inode))
-			need_isem = 0;
+			need_i_mutex = 0;
 
 		if (VN_CACHED(vp))
 			need_flush = 1;
 	}
 
 relock:
-	if (need_isem) {
+	if (need_i_mutex) {
 		iolock = XFS_IOLOCK_EXCL;
 		locktype = VRWLOCK_WRITE;
 
@@ -651,7 +650,7 @@ start:
 					S_ISBLK(inode->i_mode));
 	if (error) {
 		xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-		goto out_unlock_isem;
+		goto out_unlock_mutex;
 	}
 
 	new_size = pos + count;
@@ -663,7 +662,7 @@ start:
 		loff_t		savedsize = pos;
 		int		dmflags = FILP_DELAY_FLAG(file);
 
-		if (need_isem)
+		if (need_i_mutex)
 			dmflags |= DM_FLAGS_IMUX;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -672,7 +671,7 @@ start:
 				      dmflags, &locktype);
 		if (error) {
 			xfs_iunlock(xip, iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
 		eventsent = 1;
@@ -710,7 +709,7 @@ start:
 					isize, pos + count);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 	}
 	xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -731,7 +730,7 @@ start:
 			error = -remove_suid(file->f_dentry);
 		if (unlikely(error)) {
 			xfs_iunlock(xip, iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 	}
 
@@ -747,14 +746,14 @@ retry:
 					-1, FI_REMAPF_LOCKED);
 		}
 
-		if (need_isem) {
+		if (need_i_mutex) {
 			/* demote the lock now the cached pages are gone */
 			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
 			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
 			locktype = VRWLOCK_WRITE_DIRECT;
-			need_isem = 0;
+			need_i_mutex = 0;
 		}
 
  		xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
@@ -772,7 +771,7 @@ retry:
 			pos += ret;
 			count -= ret;
 
-			need_isem = 1;
+			need_i_mutex = 1;
 			ioflags &= ~IO_ISDIRECT;
 			xfs_iunlock(xip, iolock);
 			goto relock;
@@ -794,14 +793,14 @@ retry:
 	    !(ioflags & IO_INVIS)) {
 
 		xfs_rwunlock(bdp, locktype);
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
 			goto out_nounlocks;
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
 		xfs_rwlock(bdp, locktype);
 		pos = xip->i_d.di_size;
@@ -905,9 +904,9 @@ retry:
 			if (error)
 				goto out_unlock_internal;
 		}
-	
+
 		xfs_rwunlock(bdp, locktype);
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 
 		error = sync_page_range(inode, mapping, pos, ret);
@@ -918,8 +917,8 @@ retry:
 
  out_unlock_internal:
 	xfs_rwunlock(bdp, locktype);
- out_unlock_isem:
-	if (need_isem)
+ out_unlock_mutex:
+	if (need_i_mutex)
 		mutex_unlock(&inode->i_mutex);
  out_nounlocks:
 	return -error;
-- 
cgit v1.2.3


From 238f4c5468656e3e8b1d39d75c1e4fd73592c1ea Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:25 +1100
Subject: [XFS] Make couple names consitent, be more defensive on releasepage
 (and prep for nobh, someday, maybe).

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25481a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 448912f0e753..d9bf130c63ba 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1178,7 +1178,7 @@ out_unlock:
  *    free them and we should come back later via writepage.
  */
 STATIC int
-xfs_vm_release_page(
+xfs_vm_releasepage(
 	struct page		*page,
 	gfp_t			gfp_mask)
 {
@@ -1191,6 +1191,9 @@ xfs_vm_release_page(
 
 	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
 
+	if (!page_has_buffers(page))
+		return 0;
+
 	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
 	if (!delalloc && !unwritten)
 		goto free_buffers;
@@ -1440,7 +1443,7 @@ xfs_vm_readpages(
 }
 
 STATIC int
-xfs_vm_invalidate_page(
+xfs_vm_invalidatepage(
 	struct page		*page,
 	unsigned long		offset)
 {
@@ -1454,8 +1457,8 @@ struct address_space_operations xfs_address_space_operations = {
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.sync_page		= block_sync_page,
-	.releasepage		= xfs_vm_release_page,
-	.invalidatepage		= xfs_vm_invalidate_page,
+	.releasepage		= xfs_vm_releasepage,
+	.invalidatepage		= xfs_vm_invalidatepage,
 	.prepare_write		= xfs_vm_prepare_write,
 	.commit_write		= generic_commit_write,
 	.bmap			= xfs_vm_bmap,
-- 
cgit v1.2.3


From ce9d37c257ceba5b4d089c544e4673546f647565 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:34 +1100
Subject: [XFS] Merge Yingpings fix for a vn_count assert failure during QA -
 another ENOSPC condition.

SGI-PV: 950784
SGI-Modid: xfs-linux-melb:xfs-kern:25482a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 64c3395074de..af487437bd7e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -491,6 +491,8 @@ xfs_vn_symlink(
 			d_instantiate(dentry, ip);
 			xfs_validate_fields(dir, &vattr);
 			xfs_validate_fields(ip, &vattr);
+		} else {
+			xfs_cleanup_inode(dvp, cvp, dentry, 0);
 		}
 	}
 	return -error;
-- 
cgit v1.2.3


From 9cea236492ebabb9545564eb039aa0f477a05c96 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:41 +1100
Subject: [XFS] Flush and invalidate dirty pages at the start of a direct read
 also, else we can hit a delalloc-extents-via-direct-io BUG.

SGI-PV: 949916
SGI-Modid: xfs-linux-melb:xfs-kern:25483a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ba07b83f988d..2a936bd38ce7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -270,6 +270,10 @@ xfs_read(
 		}
 	}
 
+	if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(*offset)),
+						-1, FI_REMAPF_LOCKED);
+
 	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
 				(void *)iovp, segs, *offset, ioflags);
 	ret = __generic_file_aio_read(iocb, iovp, segs, offset);
-- 
cgit v1.2.3


From 70e73f59755867383edf563d5a5cbea614c0fd49 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:52 +1100
Subject: [XFS] endianess annotations for xfs_dir2_data_hdr structure.

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25484a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |   2 +-
 fs/xfs/xfs_dir2_block.c |  19 ++++----
 fs/xfs/xfs_dir2_data.c  | 123 +++++++++++++++++++++++++-----------------------
 fs/xfs/xfs_dir2_data.h  |   6 +--
 fs/xfs/xfs_dir2_leaf.c  |  24 +++++-----
 fs/xfs/xfs_dir2_node.c  |  18 +++----
 6 files changed, 99 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 473671fa5c13..96517d29e22c 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2204,7 +2204,7 @@ xfs_da_do_buf(
 		data = rbp->data;
 		free = rbp->data;
 		magic = INT_GET(info->magic, ARCH_CONVERT);
-		magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
+		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
 				   (magic != XFS_DIR_LEAF_MAGIC) &&
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 31bc99faa704..35a03f29b2fb 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -100,8 +100,7 @@ xfs_dir2_block_addname(
 	/*
 	 * Check the magic number, corrupted if wrong.
 	 */
-	if (unlikely(INT_GET(block->hdr.magic, ARCH_CONVERT)
-						!= XFS_DIR2_BLOCK_MAGIC)) {
+	if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
 				     XFS_ERRLEVEL_LOW, mp, block);
 		xfs_da_brelse(tp, bp);
@@ -138,7 +137,7 @@ xfs_dir2_block_addname(
 		 */
 		else {
 			dup = (xfs_dir2_data_unused_t *)
-			      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+			      ((char *)block + be16_to_cpu(bf[0].offset));
 			if (dup == enddup) {
 				/*
 				 * It is the biggest freespace, is it too small
@@ -149,10 +148,10 @@ xfs_dir2_block_addname(
 					 * Yes, we use the second-largest
 					 * entry instead if it works.
 					 */
-					if (INT_GET(bf[1].length, ARCH_CONVERT) >= len)
+					if (be16_to_cpu(bf[1].length) >= len)
 						dup = (xfs_dir2_data_unused_t *)
 						      ((char *)block +
-						       INT_GET(bf[1].offset, ARCH_CONVERT));
+						       be16_to_cpu(bf[1].offset));
 					else
 						dup = NULL;
 				}
@@ -172,9 +171,9 @@ xfs_dir2_block_addname(
 	 * If there are stale entries we'll use one for the leaf.
 	 * Is the biggest entry enough to avoid compaction?
 	 */
-	else if (INT_GET(bf[0].length, ARCH_CONVERT) >= len) {
+	else if (be16_to_cpu(bf[0].length) >= len) {
 		dup = (xfs_dir2_data_unused_t *)
-		      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+		      ((char *)block + be16_to_cpu(bf[0].offset));
 		compact = 0;
 	}
 	/*
@@ -935,7 +934,7 @@ xfs_dir2_leaf_to_block(
 		goto out;
 	}
 	block = dbp->data;
-	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 	/*
 	 * Size of the "leaf" area in the block.
 	 */
@@ -956,7 +955,7 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Start converting it to block form.
 	 */
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	needlog = 1;
 	needscan = 0;
 	/*
@@ -1095,7 +1094,7 @@ xfs_dir2_sf_to_block(
 		return error;
 	}
 	block = bp->data;
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Compute size of block "tail" area.
 	 */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 5b7c47e2f14a..fd6b7c03018a 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -70,11 +70,11 @@ xfs_dir2_data_check(
 
 	mp = dp->i_mount;
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	bf = d->hdr.bestfree;
 	p = (char *)d->u;
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		lep = XFS_DIR2_BLOCK_LEAF_P(btp);
 		endp = (char *)lep;
@@ -96,8 +96,8 @@ xfs_dir2_data_check(
 		ASSERT(!bf[2].offset);
 		freeseen |= 1 << 2;
 	}
-	ASSERT(INT_GET(bf[0].length, ARCH_CONVERT) >= INT_GET(bf[1].length, ARCH_CONVERT));
-	ASSERT(INT_GET(bf[1].length, ARCH_CONVERT) >= INT_GET(bf[2].length, ARCH_CONVERT));
+	ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length));
+	ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length));
 	/*
 	 * Loop over the data/unused entries.
 	 */
@@ -117,8 +117,10 @@ xfs_dir2_data_check(
 				i = (int)(dfp - bf);
 				ASSERT((freeseen & (1 << i)) == 0);
 				freeseen |= 1 << i;
-			} else
-				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(bf[2].length, ARCH_CONVERT));
+			} else {
+				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <=
+				       be16_to_cpu(bf[2].length));
+			}
 			p += INT_GET(dup->length, ARCH_CONVERT);
 			lastfree = 1;
 			continue;
@@ -136,7 +138,7 @@ xfs_dir2_data_check(
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
-		if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 			addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
@@ -154,7 +156,7 @@ xfs_dir2_data_check(
 	 * Need to have seen all the entries and all the bestfree slots.
 	 */
 	ASSERT(freeseen == 7);
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
 			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
@@ -190,8 +192,8 @@ xfs_dir2_data_freefind(
 	 * Check order, non-overlapping entries, and if we find the
 	 * one we're looking for it has to be exact.
 	 */
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
 	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
 	     dfp++) {
@@ -201,23 +203,24 @@ xfs_dir2_data_freefind(
 			continue;
 		}
 		ASSERT(seenzero == 0);
-		if (INT_GET(dfp->offset, ARCH_CONVERT) == off) {
+		if (be16_to_cpu(dfp->offset) == off) {
 			matched = 1;
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(dup->length, ARCH_CONVERT));
-		} else if (off < INT_GET(dfp->offset, ARCH_CONVERT))
-			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(dfp->offset, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(dup->length, ARCH_CONVERT));
+		} else if (off < be16_to_cpu(dfp->offset))
+			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= be16_to_cpu(dfp->offset));
 		else
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) + INT_GET(dfp->length, ARCH_CONVERT) <= off);
-		ASSERT(matched || INT_GET(dfp->length, ARCH_CONVERT) >= INT_GET(dup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
+		ASSERT(matched || be16_to_cpu(dfp->length) >= INT_GET(dup->length, ARCH_CONVERT));
 		if (dfp > &d->hdr.bestfree[0])
-			ASSERT(INT_GET(dfp[-1].length, ARCH_CONVERT) >= INT_GET(dfp[0].length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
 	}
 #endif
 	/*
 	 * If this is smaller than the smallest bestfree entry,
 	 * it can't be there since they're sorted.
 	 */
-	if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+	if (INT_GET(dup->length, ARCH_CONVERT) <
+	    be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
 		return NULL;
 	/*
 	 * Look at the three bestfree entries for our guy.
@@ -227,7 +230,7 @@ xfs_dir2_data_freefind(
 	     dfp++) {
 		if (!dfp->offset)
 			return NULL;
-		if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+		if (be16_to_cpu(dfp->offset) == off)
 			return dfp;
 	}
 	/*
@@ -249,29 +252,29 @@ xfs_dir2_data_freeinsert(
 	xfs_dir2_data_free_t	new;		/* new bestfree entry */
 
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	dfp = d->hdr.bestfree;
-	INT_COPY(new.length, dup->length, ARCH_CONVERT);
-	INT_SET(new.offset, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	new.length = dup->length;
+	new.offset = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Insert at position 0, 1, or 2; or not at all.
 	 */
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[0].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
 		dfp[2] = dfp[1];
 		dfp[1] = dfp[0];
 		dfp[0] = new;
 		*loghead = 1;
 		return &dfp[0];
 	}
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[1].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
 		dfp[2] = dfp[1];
 		dfp[1] = new;
 		*loghead = 1;
 		return &dfp[1];
 	}
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[2].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
 		dfp[2] = new;
 		*loghead = 1;
 		return &dfp[2];
@@ -289,8 +292,8 @@ xfs_dir2_data_freeremove(
 	int			*loghead)	/* out: log data header */
 {
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * It's the first entry, slide the next 2 up.
@@ -334,8 +337,8 @@ xfs_dir2_data_freescan(
 	char			*p;		/* current entry pointer */
 
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * Start by clearing the table.
@@ -348,7 +351,7 @@ xfs_dir2_data_freescan(
 	p = (char *)d->u;
 	if (aendp)
 		endp = aendp;
-	else if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	else if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
 	} else
@@ -415,8 +418,8 @@ xfs_dir2_data_init(
 	 * Initialize the header.
 	 */
 	d = bp->data;
-	INT_SET(d->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
-	INT_SET(d->hdr.bestfree[0].offset, ARCH_CONVERT, (xfs_dir2_data_off_t)sizeof(d->hdr));
+	d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+	d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
 		d->hdr.bestfree[i].length = 0;
 		d->hdr.bestfree[i].offset = 0;
@@ -428,7 +431,7 @@ xfs_dir2_data_init(
 	INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
 
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
-	INT_SET(d->hdr.bestfree[0].length, ARCH_CONVERT, t);
+	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	INT_SET(dup->length, ARCH_CONVERT, t);
 	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
@@ -453,8 +456,8 @@ xfs_dir2_data_log_entry(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
 		(uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
 		       (char *)d - 1));
@@ -471,8 +474,8 @@ xfs_dir2_data_log_header(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
 		(uint)(sizeof(d->hdr) - 1));
 }
@@ -489,8 +492,8 @@ xfs_dir2_data_log_unused(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Log the first part of the unused entry.
 	 */
@@ -533,12 +536,12 @@ xfs_dir2_data_make_free(
 	/*
 	 * Figure out where the end of the data area is.
 	 */
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC)
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
 		endptr = (char *)d + mp->m_dirblksize;
 	else {
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
-		ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+		ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
 	}
@@ -586,7 +589,7 @@ xfs_dir2_data_make_free(
 		 * since the third bestfree is there, there might be more
 		 * entries.
 		 */
-		needscan = d->hdr.bestfree[2].length;
+		needscan = (d->hdr.bestfree[2].length != 0);
 		/*
 		 * Fix up the new big freespace.
 		 */
@@ -614,7 +617,7 @@ xfs_dir2_data_make_free(
 			 */
 			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
 			ASSERT(dfp == &d->hdr.bestfree[0]);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(prevdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(prevdup->length, ARCH_CONVERT));
 			ASSERT(!dfp[1].length);
 			ASSERT(!dfp[2].length);
 		}
@@ -640,8 +643,10 @@ xfs_dir2_data_make_free(
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
-		else
-			needscan = INT_GET(prevdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+		else {
+			needscan = INT_GET(prevdup->length, ARCH_CONVERT) >
+				   be16_to_cpu(d->hdr.bestfree[2].length);
+		}
 	}
 	/*
 	 * The following entry is free, merge with it.
@@ -666,8 +671,10 @@ xfs_dir2_data_make_free(
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
-		else
-			needscan = INT_GET(newdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+		else {
+			needscan = INT_GET(newdup->length, ARCH_CONVERT) >
+				   be16_to_cpu(d->hdr.bestfree[2].length);
+		}
 	}
 	/*
 	 * Neither neighbor is free.  Make a new entry.
@@ -707,8 +714,8 @@ xfs_dir2_data_use_free(
 	int			oldlen;		/* old unused entry's length */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
@@ -718,7 +725,7 @@ xfs_dir2_data_use_free(
 	 */
 	dfp = xfs_dir2_data_freefind(d, dup);
 	oldlen = INT_GET(dup->length, ARCH_CONVERT);
-	ASSERT(dfp || oldlen <= INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT));
+	ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
 	/*
 	 * Check for alignment with front and back of the entry.
 	 */
@@ -732,7 +739,7 @@ xfs_dir2_data_use_free(
 	 */
 	if (matchfront && matchback) {
 		if (dfp) {
-			needscan = d->hdr.bestfree[2].offset;
+			needscan = (d->hdr.bestfree[2].offset != 0);
 			if (!needscan)
 				xfs_dir2_data_freeremove(d, dfp, needlogp);
 		}
@@ -755,8 +762,8 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -783,8 +790,8 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -819,7 +826,7 @@ xfs_dir2_data_use_free(
 		 * the 2 new will work.
 		 */
 		if (dfp) {
-			needscan = d->hdr.bestfree[2].length;
+			needscan = (d->hdr.bestfree[2].length != 0);
 			if (!needscan) {
 				xfs_dir2_data_freeremove(d, dfp, needlogp);
 				(void)xfs_dir2_data_freeinsert(d, newdup,
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 5e3a7f9ec735..dd8b86523635 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -65,8 +65,8 @@ struct xfs_trans;
  * The freespace will be formatted as a xfs_dir2_data_unused_t.
  */
 typedef struct xfs_dir2_data_free {
-	xfs_dir2_data_off_t	offset;		/* start of freespace */
-	xfs_dir2_data_off_t	length;		/* length of freespace */
+	__be16			offset;		/* start of freespace */
+	__be16			length;		/* length of freespace */
 } xfs_dir2_data_free_t;
 
 /*
@@ -75,7 +75,7 @@ typedef struct xfs_dir2_data_free {
  * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
  */
 typedef struct xfs_dir2_data_hdr {
-	__uint32_t		magic;		/* XFS_DIR2_DATA_MAGIC */
+	__be32			magic;		/* XFS_DIR2_DATA_MAGIC */
 						/* or XFS_DIR2_BLOCK_MAGIC */
 	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
 } xfs_dir2_data_hdr_t;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e4e07c8f7a76..8d7f154d1900 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -133,7 +133,7 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Fix up the block header, make it a data block.
 	 */
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
 	if (needscan)
 		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
 			NULL);
@@ -143,7 +143,7 @@ xfs_dir2_block_to_leaf(
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT);
+	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
 	 * Log the data header and leaf bests table.
 	 */
@@ -372,7 +372,7 @@ xfs_dir2_leaf_addname(
 		else
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 		data = dbp->data;
-		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		grown = 1;
 	}
 	/*
@@ -394,7 +394,7 @@ xfs_dir2_leaf_addname(
 	 * Point to the biggest freespace in our data block.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
 	needscan = needlog = 0;
 	/*
@@ -427,8 +427,8 @@ xfs_dir2_leaf_addname(
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
 	 */
-	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
-		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		if (!grown)
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 	}
@@ -1477,7 +1477,7 @@ xfs_dir2_leaf_removename(
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
 	needscan = needlog = 0;
-	oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
 	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
@@ -1506,15 +1506,15 @@ xfs_dir2_leaf_removename(
 	 * If the longest freespace in the data block has changed,
 	 * put the new value in the bests table and log that.
 	 */
-	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) {
-		INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
+		bestsp[db] = data->hdr.bestfree[0].length;
 		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
 	}
 	xfs_dir2_data_check(dp, dbp);
 	/*
 	 * If the data block is now empty then get rid of the data block.
 	 */
-	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	if (be16_to_cpu(data->hdr.bestfree[0].length) ==
 	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
 		ASSERT(db != mp->m_dirdatablk);
 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
@@ -1708,7 +1708,7 @@ xfs_dir2_leaf_trim_data(
 	}
 #ifdef DEBUG
 	data = dbp->data;
-	ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 #endif
 	/* this seems to be an error
 	 * data is only valid if DEBUG is defined?
@@ -1717,7 +1717,7 @@ xfs_dir2_leaf_trim_data(
 
 	leaf = lbp->data;
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
 	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 641f8633d254..e47bde74eecf 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -894,7 +894,7 @@ xfs_dir2_leafn_remove(
 	dbp = dblk->bp;
 	data = dbp->data;
 	dep = (xfs_dir2_data_entry_t *)((char *)data + off);
-	longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	longest = be16_to_cpu(data->hdr.bestfree[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
 		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
@@ -911,7 +911,7 @@ xfs_dir2_leafn_remove(
 	 * If the longest data block freespace changes, need to update
 	 * the corresponding freeblock entry.
 	 */
-	if (longest < INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+	if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
 		int		error;		/* error return value */
 		xfs_dabuf_t	*fbp;		/* freeblock buffer */
 		xfs_dir2_db_t	fdb;		/* freeblock block number */
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
 		 * Calculate which entry we need to fix.
 		 */
 		findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
-		longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+		longest = be16_to_cpu(data->hdr.bestfree[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
 		 * (usually).
@@ -1649,7 +1649,7 @@ xfs_dir2_node_addname_int(
 		 * change again.
 		 */
 		data = dbp->data;
-		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1677,12 +1677,12 @@ xfs_dir2_node_addname_int(
 		data = dbp->data;
 		logfree = 0;
 	}
-	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) >= length);
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
 	/*
 	 * Point to the existing unused space.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	needscan = needlog = 0;
 	/*
 	 * Mark the first part of the unused space, inuse for us.
@@ -1713,8 +1713,8 @@ xfs_dir2_node_addname_int(
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
-	if (INT_GET(free->bests[findex], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
-		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (INT_GET(free->bests[findex], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1900,7 +1900,7 @@ xfs_dir2_node_replace(
 		 * Point to the data entry.
 		 */
 		data = state->extrablk.bp->data;
-		ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
 		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
-- 
cgit v1.2.3


From 0ba962ef7128d9276b8f95196382d5b9e2ad841d Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:07 +1100
Subject: [XFS] endianess annotations for xfs_dir2_free_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25485a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c  |  2 +-
 fs/xfs/xfs_dir2_leaf.c |  6 ++--
 fs/xfs/xfs_dir2_node.c | 90 +++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_node.h | 10 +++---
 4 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 96517d29e22c..2f4acb9d9b9c 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2213,7 +2213,7 @@ xfs_da_do_buf(
 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-				   (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
+				   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
 				mp, XFS_ERRTAG_DA_READ_BUF,
 				XFS_RANDOM_DA_READ_BUF))) {
 			xfs_buftrace("DA READ ERROR", rbp->bps[0]);
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 8d7f154d1900..bd425925ee24 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1816,7 +1816,7 @@ xfs_dir2_node_to_leaf(
 		return error;
 	}
 	free = fbp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	ASSERT(!free->hdr.firstdb);
 	/*
 	 * Now see if the leafn and free data will fit in a leaf1.
@@ -1824,7 +1824,7 @@ xfs_dir2_node_to_leaf(
 	 */
 	if ((uint)sizeof(leaf->hdr) +
 	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
-	    INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) +
+	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
 	    (uint)sizeof(leaf->tail) >
 	    mp->m_dirblksize) {
 		xfs_da_brelse(tp, fbp);
@@ -1843,7 +1843,7 @@ xfs_dir2_node_to_leaf(
 	 * Set up the leaf tail from the freespace block.
 	 */
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT);
+	ltp->bestcount = free->hdr.nvalid;
 	/*
 	 * Set up the leaf bests table.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index e47bde74eecf..b051e2a09f31 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -76,7 +76,7 @@ xfs_dir2_free_log_bests(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp,
 		(uint)((char *)&free->bests[first] - (char *)free),
 		(uint)((char *)&free->bests[last] - (char *)free +
@@ -94,7 +94,7 @@ xfs_dir2_free_log_header(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
 		(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
 }
@@ -121,7 +121,7 @@ xfs_dir2_leaf_to_node(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			n;		/* count of live freespc ents */
 	xfs_dir2_data_off_t	off;		/* freespace entry value */
-	xfs_dir2_data_off_t	*to;		/* pointer to freespace entry */
+	__be16			*to;		/* pointer to freespace entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	xfs_dir2_trace_args_b("leaf_to_node", args, lbp);
@@ -149,10 +149,10 @@ xfs_dir2_leaf_to_node(
 	/*
 	 * Initialize the freespace block header.
 	 */
-	INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+	free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 	free->hdr.firstdb = 0;
 	ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
-	INT_COPY(free->hdr.nvalid, ltp->bestcount, ARCH_CONVERT);
+	free->hdr.nvalid = ltp->bestcount;
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
@@ -161,16 +161,16 @@ xfs_dir2_leaf_to_node(
 	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
 		if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
 			n++;
-		INT_SET(*to, ARCH_CONVERT, off);
+		*to = cpu_to_be16(off);
 	}
-	INT_SET(free->hdr.nused, ARCH_CONVERT, n);
+	free->hdr.nused = cpu_to_be32(n);
 	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Log everything.
 	 */
 	xfs_dir2_leaf_log_header(tp, lbp);
 	xfs_dir2_free_log_header(tp, fbp);
-	xfs_dir2_free_log_bests(tp, fbp, 0, INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1);
+	xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
 	xfs_da_buf_done(fbp);
 	xfs_dir2_leafn_check(dp, lbp);
 	return 0;
@@ -443,7 +443,7 @@ xfs_dir2_leafn_lookup_int(
 		curdb = -1;
 		length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
 		if ((free = (curbp ? curbp->data : NULL)))
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
 	/*
 	 * For others, it's a data block buffer, get the block number.
@@ -506,15 +506,15 @@ xfs_dir2_leafn_lookup_int(
 					}
 					curfdb = newfdb;
 					free = curbp->data;
-					ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) ==
+					ASSERT(be32_to_cpu(free->hdr.magic) ==
 					       XFS_DIR2_FREE_MAGIC);
-					ASSERT((INT_GET(free->hdr.firstdb, ARCH_CONVERT) %
+					ASSERT((be32_to_cpu(free->hdr.firstdb) %
 						XFS_DIR2_MAX_FREE_BESTS(mp)) ==
 					       0);
-					ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) <= curdb);
+					ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
 					ASSERT(curdb <
-					       INT_GET(free->hdr.firstdb, ARCH_CONVERT) +
-					       INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+					       be32_to_cpu(free->hdr.firstdb) +
+					       be32_to_cpu(free->hdr.nvalid));
 				}
 				/*
 				 * Get the index for our entry.
@@ -523,12 +523,12 @@ xfs_dir2_leafn_lookup_int(
 				/*
 				 * If it has room, return it.
 				 */
-				if (unlikely(INT_GET(free->bests[fi], ARCH_CONVERT) == NULLDATAOFF)) {
+				if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
 					XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
 							 XFS_ERRLEVEL_LOW, mp);
 					return XFS_ERROR(EFSCORRUPTED);
 				}
-				if (INT_GET(free->bests[fi], ARCH_CONVERT) >= length) {
+				if (be16_to_cpu(free->bests[fi]) >= length) {
 					*indexp = index;
 					state->extravalid = 1;
 					state->extrablk.bp = curbp;
@@ -929,8 +929,8 @@ xfs_dir2_leafn_remove(
 			return error;
 		}
 		free = fbp->data;
-		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
-		ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) ==
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+		ASSERT(be32_to_cpu(free->hdr.firstdb) ==
 		       XFS_DIR2_MAX_FREE_BESTS(mp) *
 		       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
 		/*
@@ -969,7 +969,7 @@ xfs_dir2_leafn_remove(
 			/*
 			 * One less used entry in the free table.
 			 */
-			INT_MOD(free->hdr.nused, ARCH_CONVERT, -1);
+			free->hdr.nused = cpu_to_be32(-1);
 			xfs_dir2_free_log_header(tp, fbp);
 			/*
 			 * If this was the last entry in the table, we can
@@ -977,21 +977,21 @@ xfs_dir2_leafn_remove(
 			 * entries at the end referring to non-existent
 			 * data blocks, get those too.
 			 */
-			if (findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1) {
+			if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
 				int	i;		/* free entry index */
 
 				for (i = findex - 1;
-				     i >= 0 && INT_GET(free->bests[i], ARCH_CONVERT) == NULLDATAOFF;
+				     i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
 				     i--)
 					continue;
-				INT_SET(free->hdr.nvalid, ARCH_CONVERT, i + 1);
+				free->hdr.nvalid = cpu_to_be32(i + 1);
 				logfree = 0;
 			}
 			/*
 			 * Not the last entry, just punch it out.
 			 */
 			else {
-				INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+				free->bests[findex] = cpu_to_be16(NULLDATAOFF);
 				logfree = 1;
 			}
 			/*
@@ -1017,7 +1017,7 @@ xfs_dir2_leafn_remove(
 		 * the new value.
 		 */
 		else {
-			INT_SET(free->bests[findex], ARCH_CONVERT, longest);
+			free->bests[findex] = cpu_to_be16(longest);
 			logfree = 1;
 		}
 		/*
@@ -1397,7 +1397,7 @@ xfs_dir2_node_addname_int(
 		 */
 		ifbno = fblk->blkno;
 		free = fbp->data;
-		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		findex = fblk->index;
 		/*
 		 * This means the free entry showed that the data block had
@@ -1405,10 +1405,10 @@ xfs_dir2_node_addname_int(
 		 * Use that data block.
 		 */
 		if (findex >= 0) {
-			ASSERT(findex < INT_GET(free->hdr.nvalid, ARCH_CONVERT));
-			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF);
-			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) >= length);
-			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+			ASSERT(findex < be32_to_cpu(free->hdr.nvalid));
+			ASSERT(be16_to_cpu(free->bests[findex]) != NULLDATAOFF);
+			ASSERT(be16_to_cpu(free->bests[findex]) >= length);
+			dbno = be32_to_cpu(free->hdr.firstdb) + findex;
 		}
 		/*
 		 * The data block looked at didn't have enough room.
@@ -1481,20 +1481,20 @@ xfs_dir2_node_addname_int(
 				continue;
 			}
 			free = fbp->data;
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 			findex = 0;
 		}
 		/*
 		 * Look at the current free entry.  Is it good enough?
 		 */
-		if (INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF &&
-		    INT_GET(free->bests[findex], ARCH_CONVERT) >= length)
-			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+		if (be16_to_cpu(free->bests[findex]) != NULLDATAOFF &&
+		    be16_to_cpu(free->bests[findex]) >= length)
+			dbno = be32_to_cpu(free->hdr.firstdb) + findex;
 		else {
 			/*
 			 * Are we done with the freeblock?
 			 */
-			if (++findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+			if (++findex == be32_to_cpu(free->hdr.nvalid)) {
 				/*
 				 * Drop the block.
 				 */
@@ -1608,15 +1608,15 @@ xfs_dir2_node_addname_int(
 			 * its first slot as our empty slot.
 			 */
 			free = fbp->data;
-			INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
-			INT_SET(free->hdr.firstdb, ARCH_CONVERT,
+			free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
+			free->hdr.firstdb = cpu_to_be32(
 				(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
 				XFS_DIR2_MAX_FREE_BESTS(mp));
 			free->hdr.nvalid = 0;
 			free->hdr.nused = 0;
 		} else {
 			free = fbp->data;
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		}
 
 		/*
@@ -1627,20 +1627,20 @@ xfs_dir2_node_addname_int(
 		 * If it's after the end of the current entries in the
 		 * freespace block, extend that table.
 		 */
-		if (findex >= INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+		if (findex >= be32_to_cpu(free->hdr.nvalid)) {
 			ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
-			INT_SET(free->hdr.nvalid, ARCH_CONVERT, findex + 1);
+			free->hdr.nvalid = cpu_to_be32(findex + 1);
 			/*
 			 * Tag new entry so nused will go up.
 			 */
-			INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+			free->bests[findex] = cpu_to_be16(NULLDATAOFF);
 		}
 		/*
 		 * If this entry was for an empty data block
 		 * (this should always be true) then update the header.
 		 */
-		if (INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
-			INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+		if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+			be32_add(&free->hdr.nused, 1);
 			xfs_dir2_free_log_header(tp, fbp);
 		}
 		/*
@@ -1713,7 +1713,7 @@ xfs_dir2_node_addname_int(
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
-	if (INT_GET(free->bests[findex], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+	if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
 		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
@@ -1966,11 +1966,11 @@ xfs_dir2_node_trim_free(
 		return 0;
 	}
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	/*
 	 * If there are used entries, there's nothing to do.
 	 */
-	if (INT_GET(free->hdr.nused, ARCH_CONVERT) > 0) {
+	if (be32_to_cpu(free->hdr.nused) > 0) {
 		xfs_da_brelse(tp, bp);
 		*rvalp = 0;
 		return 0;
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index 0ab8fbd59512..c7c870ee7857 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -41,15 +41,15 @@ struct xfs_trans;
 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
 
 typedef	struct xfs_dir2_free_hdr {
-	__uint32_t		magic;		/* XFS_DIR2_FREE_MAGIC */
-	__int32_t		firstdb;	/* db of first entry */
-	__int32_t		nvalid;		/* count of valid entries */
-	__int32_t		nused;		/* count of used entries */
+	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
+	__be32			firstdb;	/* db of first entry */
+	__be32			nvalid;		/* count of valid entries */
+	__be32			nused;		/* count of used entries */
 } xfs_dir2_free_hdr_t;
 
 typedef struct xfs_dir2_free {
 	xfs_dir2_free_hdr_t	hdr;		/* block header */
-	xfs_dir2_data_off_t	bests[1];	/* best free counts */
+	__be16			bests[1];	/* best free counts */
 						/* unused entries are -1 */
 } xfs_dir2_free_t;
 
-- 
cgit v1.2.3


From 68b3a1024a7cda4afaacb3d25e6ac234ddfc0834 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:19 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_LEAF_BESTS_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25486a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  4 ++--
 fs/xfs/xfs_dir2_leaf.c  | 30 +++++++++++++++---------------
 fs/xfs/xfs_dir2_leaf.h  |  5 ++---
 fs/xfs/xfs_dir2_node.c  |  4 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 35a03f29b2fb..e9d298f06ca2 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -880,7 +880,7 @@ xfs_dir2_leaf_to_block(
 	xfs_dabuf_t		*lbp,		/* leaf buffer */
 	xfs_dabuf_t		*dbp)		/* data buffer */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+	__be16			*bestsp;	/* leaf bests table */
 	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -914,7 +914,7 @@ xfs_dir2_leaf_to_block(
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
 		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-		if (INT_GET(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1], ARCH_CONVERT) ==
+		if (be16_to_cpu(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bd425925ee24..568e0feca034 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -66,7 +66,7 @@ xfs_dir2_block_to_leaf(
 	xfs_da_args_t		*args,		/* operation arguments */
 	xfs_dabuf_t		*dbp)		/* input block's buffer */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf's bestsp entries */
+	__be16			*bestsp;	/* leaf's bestsp entries */
 	xfs_dablk_t		blkno;		/* leaf block's bno */
 	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
@@ -163,7 +163,7 @@ int						/* error */
 xfs_dir2_leaf_addname(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* freespace table in leaf */
+	__be16			*bestsp;	/* freespace table in leaf */
 	int			compact;	/* need to compact leaves */
 	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
@@ -228,8 +228,8 @@ xfs_dir2_leaf_addname(
 			continue;
 		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
 		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
-		ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF);
-		if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+		if (be16_to_cpu(bestsp[i]) >= length) {
 			use_block = i;
 			break;
 		}
@@ -242,9 +242,9 @@ xfs_dir2_leaf_addname(
 			/*
 			 * Remember a block we see that's missing.
 			 */
-			if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1)
+			if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
 				use_block = i;
-			else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+			else if (be16_to_cpu(bestsp[i]) >= length) {
 				use_block = i;
 				break;
 			}
@@ -260,7 +260,7 @@ xfs_dir2_leaf_addname(
 	 * Now kill use_block if it refers to a missing block, so we
 	 * can use it as an indication of allocation needed.
 	 */
-	if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF)
+	if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
 		use_block = -1;
 	/*
 	 * If we don't have enough free bytes but we can make enough
@@ -427,7 +427,7 @@ xfs_dir2_leaf_addname(
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
 	 */
-	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
 		bestsp[use_block] = data->hdr.bestfree[0].length;
 		if (!grown)
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
@@ -1203,8 +1203,8 @@ xfs_dir2_leaf_log_bests(
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
 {
-	xfs_dir2_data_off_t	*firstb;	/* pointer to first entry */
-	xfs_dir2_data_off_t	*lastb;		/* pointer to last entry */
+	__be16			*firstb;	/* pointer to first entry */
+	__be16			*lastb;		/* pointer to last entry */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
@@ -1437,7 +1437,7 @@ int						/* error */
 xfs_dir2_leaf_removename(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf block best freespace */
+	__be16			*bestsp;	/* leaf block best freespace */
 	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_db_t		db;		/* data block number */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
@@ -1480,7 +1480,7 @@ xfs_dir2_leaf_removename(
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
+	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
 	/*
 	 * Mark the former data entry unused.
 	 */
@@ -1542,7 +1542,7 @@ xfs_dir2_leaf_removename(
 			 * Look for the last active entry (i).
 			 */
 			for (i = db - 1; i > 0; i--) {
-				if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF)
+				if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
 					break;
 			}
 			/*
@@ -1555,7 +1555,7 @@ xfs_dir2_leaf_removename(
 			xfs_dir2_leaf_log_tail(tp, lbp);
 			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 		} else
-			INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF);
+			bestsp[db] = cpu_to_be16(NULLDATAOFF);
 	}
 	/*
 	 * If the data block was not the first one, drop it.
@@ -1684,7 +1684,7 @@ xfs_dir2_leaf_trim_data(
 	xfs_dabuf_t		*lbp,		/* leaf buffer */
 	xfs_dir2_db_t		db)		/* data block number */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+	__be16			*bestsp;	/* leaf bests table */
 #ifdef DEBUG
 	xfs_dir2_data_t		*data;		/* data block structure */
 #endif
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 1393993d61e9..7fb32e954ce3 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -105,11 +105,10 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
  * Get address of the bests array in the single-leaf block.
  */
 #define	XFS_DIR2_LEAF_BESTS_P(ltp)	xfs_dir2_leaf_bests_p(ltp)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
-	return (xfs_dir2_data_off_t *)
-		(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
+	return (__be16 *)(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index b051e2a09f31..ddbe453fade2 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -114,7 +114,7 @@ xfs_dir2_leaf_to_node(
 	xfs_dabuf_t		*fbp;		/* freespace buffer */
 	xfs_dir2_db_t		fdb;		/* freespace block number */
 	xfs_dir2_free_t		*free;		/* freespace structure */
-	xfs_dir2_data_off_t	*from;		/* pointer to freespace entry */
+	__be16			*from;		/* pointer to freespace entry */
 	int			i;		/* leaf freespace index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
@@ -159,7 +159,7 @@ xfs_dir2_leaf_to_node(
 	 */
 	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
 	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
-		if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
+		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
 		*to = cpu_to_be16(off);
 	}
-- 
cgit v1.2.3


From afbcb3f91903bcd34d470efe64b3738257178667 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:28 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_tail_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25487a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  4 ++--
 fs/xfs/xfs_dir2_leaf.c  | 34 +++++++++++++++++-----------------
 fs/xfs/xfs_dir2_leaf.h  |  4 ++--
 fs/xfs/xfs_dir2_node.c  |  4 ++--
 4 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index e9d298f06ca2..f70640c6b703 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -914,11 +914,11 @@ xfs_dir2_leaf_to_block(
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
 		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-		if (be16_to_cpu(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1]) ==
+		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
-				    (xfs_dir2_db_t)(INT_GET(ltp->bestcount, ARCH_CONVERT) - 1))))
+				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
 				goto out;
 		} else {
 			error = 0;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 568e0feca034..9abecd207c4e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -141,7 +141,7 @@ xfs_dir2_block_to_leaf(
 	 * Set up leaf tail and bests table.
 	 */
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
+	ltp->bestcount = cpu_to_be32(1);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
 	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
@@ -227,7 +227,7 @@ xfs_dir2_leaf_addname(
 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
-		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
+		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
 			use_block = i;
@@ -238,7 +238,7 @@ xfs_dir2_leaf_addname(
 	 * Didn't find a block yet, linear search all the data blocks.
 	 */
 	if (use_block == -1) {
-		for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
 			/*
 			 * Remember a block we see that's missing.
 			 */
@@ -358,13 +358,13 @@ xfs_dir2_leaf_addname(
 		 * If we're adding a new data block on the end we need to
 		 * extend the bests table.  Copy it up one entry.
 		 */
-		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
+		if (use_block >= be32_to_cpu(ltp->bestcount)) {
 			bestsp--;
 			memmove(&bestsp[0], &bestsp[1],
-				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
-			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
+				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
+			be32_add(&ltp->bestcount, 1);
 			xfs_dir2_leaf_log_tail(tp, lbp);
-			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		}
 		/*
 		 * If we're filling in a previously empty block just log it.
@@ -1537,7 +1537,7 @@ xfs_dir2_leaf_removename(
 		 * If this is the last data block then compact the
 		 * bests table by getting rid of entries.
 		 */
-		if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) {
+		if (db == be32_to_cpu(ltp->bestcount) - 1) {
 			/*
 			 * Look for the last active entry (i).
 			 */
@@ -1550,10 +1550,10 @@ xfs_dir2_leaf_removename(
 			 * end are removed.
 			 */
 			memmove(&bestsp[db - i], bestsp,
-				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
-			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
+				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
+			be32_add(&ltp->bestcount, -(db - i));
 			xfs_dir2_leaf_log_tail(tp, lbp);
-			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		} else
 			bestsp[db] = cpu_to_be16(NULLDATAOFF);
 	}
@@ -1719,7 +1719,7 @@ xfs_dir2_leaf_trim_data(
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
-	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
 	/*
 	 * Get rid of the data block.
 	 */
@@ -1732,10 +1732,10 @@ xfs_dir2_leaf_trim_data(
 	 * Eliminate the last bests entry from the table.
 	 */
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
-	memmove(&bestsp[1], &bestsp[0], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+	be32_add(&ltp->bestcount, -1);
+	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
-	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	return 0;
 }
 
@@ -1848,8 +1848,8 @@ xfs_dir2_node_to_leaf(
 	 * Set up the leaf bests table.
 	 */
 	memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests,
-		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
-	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_check(dp, lbp);
 	/*
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 7fb32e954ce3..fcd3b7dea0f6 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -62,7 +62,7 @@ typedef struct xfs_dir2_leaf_entry {
  * Leaf block tail.
  */
 typedef struct xfs_dir2_leaf_tail {
-	__uint32_t		bestcount;
+	__be32			bestcount;
 } xfs_dir2_leaf_tail_t;
 
 /*
@@ -108,7 +108,7 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
-	return (__be16 *)(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
+	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ddbe453fade2..c32894cb6223 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -151,14 +151,14 @@ xfs_dir2_leaf_to_node(
 	 */
 	free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 	free->hdr.firstdb = 0;
-	ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
+	ASSERT(be32_to_cpu(ltp->bestcount) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
 	free->hdr.nvalid = ltp->bestcount;
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
 	 */
 	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
-	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
+	     i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
 		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
 		*to = cpu_to_be16(off);
-- 
cgit v1.2.3


From ad354eb34eb354eedc483d1e89e17710165bd2db Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:37 +1100
Subject: [XFS] endianess annotations for xfs_dir2_data_unused_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25489a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 27 ++++++++++----------
 fs/xfs/xfs_dir2_data.c  | 68 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_data.h  |  8 +++---
 fs/xfs/xfs_dir2_leaf.c  | 12 ++++-----
 fs/xfs/xfs_dir2_sf.c    |  4 +--
 5 files changed, 58 insertions(+), 61 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index f70640c6b703..8b8aed77acd1 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -130,7 +130,7 @@ xfs_dir2_block_addname(
 		 * the space before the first leaf entry needs to be free so it
 		 * can be expanded to hold the pointer to the new entry.
 		 */
-		if (INT_GET(enddup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			dup = enddup = NULL;
 		/*
 		 * Check out the biggest freespace and see if it's the same one.
@@ -143,7 +143,7 @@ xfs_dir2_block_addname(
 				 * It is the biggest freespace, is it too small
 				 * to hold the new leaf too?
 				 */
-				if (INT_GET(dup->length, ARCH_CONVERT) < len + (uint)sizeof(*blp)) {
+				if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
 					/*
 					 * Yes, we use the second-largest
 					 * entry instead if it works.
@@ -160,7 +160,7 @@ xfs_dir2_block_addname(
 				 * Not the same free entry,
 				 * just check its length.
 				 */
-				if (INT_GET(dup->length, ARCH_CONVERT) < len) {
+				if (be16_to_cpu(dup->length) < len) {
 					dup = NULL;
 				}
 			}
@@ -192,8 +192,8 @@ xfs_dir2_block_addname(
 		 * If it's not free then the data will go where the
 		 * leaf data starts now, if it works at all.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			if (INT_GET(dup->length, ARCH_CONVERT) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			if (be16_to_cpu(dup->length) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
 			    (uint)sizeof(*blp) < len)
 				dup = NULL;
 		} else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
@@ -310,7 +310,7 @@ xfs_dir2_block_addname(
 		 */
 		xfs_dir2_data_use_free(tp, bp, enddup,
 			(xfs_dir2_data_aoff_t)
-			((char *)enddup - (char *)block + INT_GET(enddup->length, ARCH_CONVERT) -
+			((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
 			 sizeof(*blp)),
 			(xfs_dir2_data_aoff_t)sizeof(*blp),
 			&needlog, &needscan);
@@ -484,8 +484,8 @@ xfs_dir2_block_getdents(
 		/*
 		 * Unused, skip it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += be16_to_cpu(dup->length);
 			continue;
 		}
 
@@ -948,7 +948,8 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * If it's not free or is too short we can't do it.
 	 */
-	if (INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG || INT_GET(dup->length, ARCH_CONVERT) < size) {
+	if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
+	    be16_to_cpu(dup->length) < size) {
 		error = 0;
 		goto out;
 	}
@@ -1122,7 +1123,7 @@ xfs_dir2_sf_to_block(
 	 */
 	xfs_dir2_data_use_free(tp, bp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
-		INT_GET(dup->length, ARCH_CONVERT), &needlog, &needscan);
+		be16_to_cpu(dup->length), &needlog, &needscan);
 	/*
 	 * Create entry for .
 	 */
@@ -1175,15 +1176,15 @@ xfs_dir2_sf_to_block(
 		if (offset < newoffset) {
 			dup = (xfs_dir2_data_unused_t *)
 			      ((char *)block + offset);
-			INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-			INT_SET(dup->length, ARCH_CONVERT, newoffset - offset);
+			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+			dup->length = cpu_to_be16(newoffset - offset);
 			INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 				(xfs_dir2_data_off_t)
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
 				dup, &dummy);
-			offset += INT_GET(dup->length, ARCH_CONVERT);
+			offset += be16_to_cpu(dup->length);
 			continue;
 		}
 		/*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fd6b7c03018a..48b4906c8e80 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -108,7 +108,7 @@ xfs_dir2_data_check(
 		 * If we find it, account for that, else make sure it
 		 * doesn't need to be there.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
 			ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT) ==
 			       (char *)dup - (char *)d);
@@ -118,10 +118,10 @@ xfs_dir2_data_check(
 				ASSERT((freeseen & (1 << i)) == 0);
 				freeseen |= 1 << i;
 			} else {
-				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <=
+				ASSERT(be16_to_cpu(dup->length) <=
 				       be16_to_cpu(bf[2].length));
 			}
-			p += INT_GET(dup->length, ARCH_CONVERT);
+			p += be16_to_cpu(dup->length);
 			lastfree = 1;
 			continue;
 		}
@@ -205,12 +205,12 @@ xfs_dir2_data_freefind(
 		ASSERT(seenzero == 0);
 		if (be16_to_cpu(dfp->offset) == off) {
 			matched = 1;
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(dup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == dup->length);
 		} else if (off < be16_to_cpu(dfp->offset))
-			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= be16_to_cpu(dfp->offset));
+			ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
 		else
 			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
-		ASSERT(matched || be16_to_cpu(dfp->length) >= INT_GET(dup->length, ARCH_CONVERT));
+		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
 		if (dfp > &d->hdr.bestfree[0])
 			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
 	}
@@ -219,7 +219,7 @@ xfs_dir2_data_freefind(
 	 * If this is smaller than the smallest bestfree entry,
 	 * it can't be there since they're sorted.
 	 */
-	if (INT_GET(dup->length, ARCH_CONVERT) <
+	if (be16_to_cpu(dup->length) <
 	    be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
 		return NULL;
 	/*
@@ -364,11 +364,11 @@ xfs_dir2_data_freescan(
 		/*
 		 * If it's a free entry, insert it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
 			       INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
-			p += INT_GET(dup->length, ARCH_CONVERT);
+			p += be16_to_cpu(dup->length);
 		}
 		/*
 		 * For active entries, check their tags and skip them.
@@ -428,11 +428,11 @@ xfs_dir2_data_init(
 	 * Set up an unused entry for the block's body.
 	 */
 	dup = &d->u[0].unused;
-	INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
-	INT_SET(dup->length, ARCH_CONVERT, t);
+	dup->length = cpu_to_be16(t);
 	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
 	/*
@@ -554,7 +554,7 @@ xfs_dir2_data_make_free(
 
 		tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
 		prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
-		if (INT_GET(prevdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			prevdup = NULL;
 	} else
 		prevdup = NULL;
@@ -565,7 +565,7 @@ xfs_dir2_data_make_free(
 	if ((char *)d + offset + len < endptr) {
 		postdup =
 			(xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		if (INT_GET(postdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			postdup = NULL;
 	} else
 		postdup = NULL;
@@ -593,7 +593,7 @@ xfs_dir2_data_make_free(
 		/*
 		 * Fix up the new big freespace.
 		 */
-		INT_MOD(prevdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
@@ -617,7 +617,7 @@ xfs_dir2_data_make_free(
 			 */
 			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
 			ASSERT(dfp == &d->hdr.bestfree[0]);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(prevdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == prevdup->length);
 			ASSERT(!dfp[1].length);
 			ASSERT(!dfp[2].length);
 		}
@@ -627,7 +627,7 @@ xfs_dir2_data_make_free(
 	 */
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
-		INT_MOD(prevdup->length, ARCH_CONVERT, len);
+		be16_add(&prevdup->length, len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
@@ -644,7 +644,7 @@ xfs_dir2_data_make_free(
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
-			needscan = INT_GET(prevdup->length, ARCH_CONVERT) >
+			needscan = be16_to_cpu(prevdup->length) >
 				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
@@ -654,8 +654,8 @@ xfs_dir2_data_make_free(
 	else if (postdup) {
 		dfp = xfs_dir2_data_freefind(d, postdup);
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -672,7 +672,7 @@ xfs_dir2_data_make_free(
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
-			needscan = INT_GET(newdup->length, ARCH_CONVERT) >
+			needscan = be16_to_cpu(newdup->length) >
 				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
@@ -681,8 +681,8 @@ xfs_dir2_data_make_free(
 	 */
 	else {
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, len);
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -716,15 +716,15 @@ xfs_dir2_data_use_free(
 	d = bp->data;
 	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
 	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-	ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
+	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
-	ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
+	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
 	ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
 	dfp = xfs_dir2_data_freefind(d, dup);
-	oldlen = INT_GET(dup->length, ARCH_CONVERT);
+	oldlen = be16_to_cpu(dup->length);
 	ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
 	/*
 	 * Check for alignment with front and back of the entry.
@@ -750,8 +750,8 @@ xfs_dir2_data_use_free(
 	 */
 	else if (matchfront) {
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, oldlen - len);
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(oldlen - len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -762,7 +762,7 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == newdup->length);
 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
@@ -778,8 +778,7 @@ xfs_dir2_data_use_free(
 	 */
 	else if (matchback) {
 		newdup = dup;
-		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
-			(((char *)d + offset) - (char *)newdup));
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -790,7 +789,7 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == newdup->length);
 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
@@ -806,14 +805,13 @@ xfs_dir2_data_use_free(
 	 */
 	else {
 		newdup = dup;
-		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
-			(((char *)d + offset) - (char *)newdup));
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		INT_SET(newdup2->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup2->length, ARCH_CONVERT, oldlen - len - INT_GET(newdup->length, ARCH_CONVERT));
+		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index dd8b86523635..28adddb48e88 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -97,10 +97,10 @@ typedef struct xfs_dir2_data_entry {
  * Tag appears as the last 2 bytes.
  */
 typedef struct xfs_dir2_data_unused {
-	__uint16_t		freetag;	/* XFS_DIR2_DATA_FREE_TAG */
-	xfs_dir2_data_off_t	length;		/* total free length */
+	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
+	__be16			length;		/* total free length */
 						/* variable offset */
-	xfs_dir2_data_off_t	tag;		/* starting offset of us */
+	__be16			tag;		/* starting offset of us */
 } xfs_dir2_data_unused_t;
 
 typedef union {
@@ -151,7 +151,7 @@ static inline xfs_dir2_data_off_t *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
 	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dup) + INT_GET((dup)->length, ARCH_CONVERT) \
+		 ((char *)(dup) + be16_to_cpu((dup)->length) \
 				- (uint)sizeof(xfs_dir2_data_off_t));
 }
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 9abecd207c4e..cbd371d9e98d 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -395,7 +395,7 @@ xfs_dir2_leaf_addname(
 	 */
 	dup = (xfs_dir2_data_unused_t *)
 	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
-	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
+	ASSERT(be16_to_cpu(dup->length) >= length);
 	needscan = needlog = 0;
 	/*
 	 * Mark the initial part of our freespace in use for the new entry.
@@ -1047,11 +1047,10 @@ xfs_dir2_leaf_getdents(
 				while ((char *)ptr - (char *)data < byteoff) {
 					dup = (xfs_dir2_data_unused_t *)ptr;
 
-					if (INT_GET(dup->freetag, ARCH_CONVERT)
+					if (be16_to_cpu(dup->freetag)
 						  == XFS_DIR2_DATA_FREE_TAG) {
 
-						length = INT_GET(dup->length,
-								 ARCH_CONVERT);
+						length = be16_to_cpu(dup->length);
 						ptr += length;
 						continue;
 					}
@@ -1080,9 +1079,8 @@ xfs_dir2_leaf_getdents(
 		/*
 		 * No, it's unused, skip over it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT)
-						== XFS_DIR2_DATA_FREE_TAG) {
-			length = INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			length = be16_to_cpu(dup->length);
 			ptr += length;
 			curoff += length;
 			continue;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index ec8e7476c8b7..3a571d8cf1f3 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -220,8 +220,8 @@ xfs_dir2_block_to_sf(
 		 * If it's unused, just skip over it.
 		 */
 		dup = (xfs_dir2_data_unused_t *)ptr;
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += be16_to_cpu(dup->length);
 			continue;
 		}
 		dep = (xfs_dir2_data_entry_t *)ptr;
-- 
cgit v1.2.3


From 1fba9f7fe2164553557e26583e6feb5299cf9f76 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:47 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_DATA_UNUSED_TAG_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25490a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  3 +--
 fs/xfs/xfs_dir2_data.c  | 41 ++++++++++++++++++++---------------------
 fs/xfs/xfs_dir2_data.h  |  7 +++----
 3 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 8b8aed77acd1..2c1fcdc7a762 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1178,8 +1178,7 @@ xfs_dir2_sf_to_block(
 			      ((char *)block + offset);
 			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 			dup->length = cpu_to_be16(newoffset - offset);
-			INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
-				(xfs_dir2_data_off_t)
+			*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16(
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 48b4906c8e80..acb61132dec4 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -110,7 +110,7 @@ xfs_dir2_data_check(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
-			ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT) ==
+			ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) ==
 			       (char *)dup - (char *)d);
 			dfp = xfs_dir2_data_freefind(d, dup);
 			if (dfp) {
@@ -366,7 +366,7 @@ xfs_dir2_data_freescan(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
-			       INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
+			       be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
 			p += be16_to_cpu(dup->length);
 		}
@@ -433,8 +433,7 @@ xfs_dir2_data_init(
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
-	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
-		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Log it and return it.
 	 */
@@ -594,8 +593,8 @@ xfs_dir2_data_make_free(
 		 * Fix up the new big freespace.
 		 */
 		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		if (!needscan) {
 			/*
@@ -628,8 +627,8 @@ xfs_dir2_data_make_free(
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
 		be16_add(&prevdup->length, len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		/*
 		 * If the previous entry was in the table, the new entry
@@ -656,8 +655,8 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If the following entry was in the table, the new entry
@@ -683,8 +682,8 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
 	}
@@ -719,7 +718,7 @@ xfs_dir2_data_use_free(
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-	ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
+	ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
@@ -752,8 +751,8 @@ xfs_dir2_data_use_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(oldlen - len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
@@ -779,8 +778,8 @@ xfs_dir2_data_use_free(
 	else if (matchback) {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
@@ -806,14 +805,14 @@ xfs_dir2_data_use_free(
 	else {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) =
+			cpu_to_be16((char *)newdup2 - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
 		/*
 		 * If the old entry was in the table, we need to scan
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 28adddb48e88..479b59f50dbc 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -147,12 +147,11 @@ xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
  */
 #define	XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
 	xfs_dir2_data_unused_tag_p(dup)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
-	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dup) + be16_to_cpu((dup)->length) \
-				- (uint)sizeof(xfs_dir2_data_off_t));
+	return (__be16 *)((char *)dup +
+			be16_to_cpu(dup->length) - sizeof(__be16));
 }
 
 /*
-- 
cgit v1.2.3


From e922fffa4188ef6207cd3afef7f4d33bf4a9ca64 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:56 +1100
Subject: [XFS] endianess annotations for xfs_dir2_block_tail_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25491a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 52 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_block.h |  7 +++----
 fs/xfs/xfs_dir2_data.c  | 10 +++++-----
 fs/xfs/xfs_dir2_leaf.c  |  6 +++---
 fs/xfs/xfs_dir2_sf.c    |  2 +-
 5 files changed, 38 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 2c1fcdc7a762..6f3b99dfe55c 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -193,10 +193,10 @@ xfs_dir2_block_addname(
 		 * leaf data starts now, if it works at all.
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-			if (be16_to_cpu(dup->length) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+			if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
 			    (uint)sizeof(*blp) < len)
 				dup = NULL;
-		} else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
+		} else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
 			dup = NULL;
 		else
 			dup = (xfs_dir2_data_unused_t *)blp;
@@ -242,7 +242,7 @@ xfs_dir2_block_addname(
 		int	fromidx;		/* source leaf index */
 		int	toidx;			/* target leaf index */
 
-		for (fromidx = toidx = INT_GET(btp->count, ARCH_CONVERT) - 1,
+		for (fromidx = toidx = be32_to_cpu(btp->count) - 1,
 			highstale = lfloghigh = -1;
 		     fromidx >= 0;
 		     fromidx--) {
@@ -259,15 +259,15 @@ xfs_dir2_block_addname(
 				blp[toidx] = blp[fromidx];
 			toidx--;
 		}
-		lfloglow = toidx + 1 - (INT_GET(btp->stale, ARCH_CONVERT) - 1);
-		lfloghigh -= INT_GET(btp->stale, ARCH_CONVERT) - 1;
-		INT_MOD(btp->count, ARCH_CONVERT, -(INT_GET(btp->stale, ARCH_CONVERT) - 1));
+		lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
+		lfloghigh -= be32_to_cpu(btp->stale) - 1;
+		be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1));
 		xfs_dir2_data_make_free(tp, bp,
 			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
-			(xfs_dir2_data_aoff_t)((INT_GET(btp->stale, ARCH_CONVERT) - 1) * sizeof(*blp)),
+			(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
 			&needlog, &needscan);
-		blp += INT_GET(btp->stale, ARCH_CONVERT) - 1;
-		INT_SET(btp->stale, ARCH_CONVERT, 1);
+		blp += be32_to_cpu(btp->stale) - 1;
+		btp->stale = cpu_to_be32(1);
 		/*
 		 * If we now need to rebuild the bestfree map, do so.
 		 * This needs to happen before the next call to use_free.
@@ -282,14 +282,14 @@ xfs_dir2_block_addname(
 	 * Set leaf logging boundaries to impossible state.
 	 * For the no-stale case they're set explicitly.
 	 */
-	else if (INT_GET(btp->stale, ARCH_CONVERT)) {
-		lfloglow = INT_GET(btp->count, ARCH_CONVERT);
+	else if (btp->stale) {
+		lfloglow = be32_to_cpu(btp->count);
 		lfloghigh = -1;
 	}
 	/*
 	 * Find the slot that's first lower than our hash value, -1 if none.
 	 */
-	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; low <= high; ) {
+	for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
 		mid = (low + high) >> 1;
 		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
 			break;
@@ -317,7 +317,7 @@ xfs_dir2_block_addname(
 		/*
 		 * Update the tail (entry count).
 		 */
-		INT_MOD(btp->count, ARCH_CONVERT, +1);
+		be32_add(&btp->count, 1);
 		/*
 		 * If we now need to rebuild the bestfree map, do so.
 		 * This needs to happen before the next call to use_free.
@@ -349,7 +349,7 @@ xfs_dir2_block_addname(
 		     lowstale--)
 			continue;
 		for (highstale = mid + 1;
-		     highstale < INT_GET(btp->count, ARCH_CONVERT) &&
+		     highstale < be32_to_cpu(btp->count) &&
 			INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
 			(lowstale < 0 || mid - lowstale > highstale - mid);
 		     highstale++)
@@ -358,7 +358,7 @@ xfs_dir2_block_addname(
 		 * Move entries toward the low-numbered stale entry.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
+		    (highstale == be32_to_cpu(btp->count) ||
 		     mid - lowstale <= highstale - mid)) {
 			if (mid - lowstale)
 				memmove(&blp[lowstale], &blp[lowstale + 1],
@@ -370,7 +370,7 @@ xfs_dir2_block_addname(
 		 * Move entries toward the high-numbered stale entry.
 		 */
 		else {
-			ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
+			ASSERT(highstale < be32_to_cpu(btp->count));
 			mid++;
 			if (highstale - mid)
 				memmove(&blp[mid + 1], &blp[mid],
@@ -378,7 +378,7 @@ xfs_dir2_block_addname(
 			lfloglow = MIN(mid, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(btp->stale, ARCH_CONVERT, -1);
+		be32_add(&btp->stale, -1);
 	}
 	/*
 	 * Point to the new data entry.
@@ -673,7 +673,7 @@ xfs_dir2_block_lookup_int(
 	 * Loop doing a binary search for our hash value.
 	 * Find our entry, ENOENT if it's not there.
 	 */
-	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; ; ) {
+	for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
 		ASSERT(low <= high);
 		mid = (low + high) >> 1;
 		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
@@ -716,7 +716,7 @@ xfs_dir2_block_lookup_int(
 			*entno = mid;
 			return 0;
 		}
-	} while (++mid < INT_GET(btp->count, ARCH_CONVERT) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+	} while (++mid < be32_to_cpu(btp->count) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
@@ -777,7 +777,7 @@ xfs_dir2_block_removename(
 	/*
 	 * Fix up the block tail.
 	 */
-	INT_MOD(btp->stale, ARCH_CONVERT, +1);
+	be32_add(&btp->stale, 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	/*
 	 * Remove the leaf entry by marking it stale.
@@ -968,7 +968,7 @@ xfs_dir2_leaf_to_block(
 	 * Initialize the block tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	INT_SET(btp->count, ARCH_CONVERT, INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	btp->count = cpu_to_be32(INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
@@ -980,8 +980,8 @@ xfs_dir2_leaf_to_block(
 			continue;
 		lep[to++] = leaf->ents[from];
 	}
-	ASSERT(to == INT_GET(btp->count, ARCH_CONVERT));
-	xfs_dir2_block_log_leaf(tp, dbp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	ASSERT(to == be32_to_cpu(btp->count));
+	xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
 	/*
 	 * Scan the bestfree if we need it and log the data block header.
 	 */
@@ -1114,7 +1114,7 @@ xfs_dir2_sf_to_block(
 	 * Fill in the tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	INT_SET(btp->count, ARCH_CONVERT, INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
+	btp->count = cpu_to_be32(INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
@@ -1211,13 +1211,13 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
-	xfs_sort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
+	xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort);
 	/*
 	 * Log the leaf entry area and tail.
 	 * Already logged the header in data_init, ignore needlog.
 	 */
 	ASSERT(needscan == 0);
-	xfs_dir2_block_log_leaf(tp, bp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	xfs_dir2_data_check(dp, bp);
 	xfs_da_buf_done(bp);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index a2e5cb98a838..6722effd0b20 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -43,8 +43,8 @@ struct xfs_trans;
 #define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: for one block dirs */
 
 typedef struct xfs_dir2_block_tail {
-	__uint32_t	count;			/* count of leaf entries */
-	__uint32_t	stale;			/* count of stale lf entries */
+	__be32		count;			/* count of leaf entries */
+	__be32		stale;			/* count of stale lf entries */
 } xfs_dir2_block_tail_t;
 
 /*
@@ -75,8 +75,7 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 static inline struct xfs_dir2_leaf_entry *
 xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
 {
-	return (((struct xfs_dir2_leaf_entry *)
-		(btp)) - INT_GET((btp)->count, ARCH_CONVERT));
+	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index acb61132dec4..0af757873be5 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -143,12 +143,12 @@ xfs_dir2_data_check(
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
-			for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+			for (i = 0; i < be32_to_cpu(btp->count); i++) {
 				if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
 				    INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
 					break;
 			}
-			ASSERT(i < INT_GET(btp->count, ARCH_CONVERT));
+			ASSERT(i < be32_to_cpu(btp->count));
 		}
 		p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
 	}
@@ -157,14 +157,14 @@ xfs_dir2_data_check(
 	 */
 	ASSERT(freeseen == 7);
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
 			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 			if (i > 0)
 				ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
 		}
-		ASSERT(count == INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT));
-		ASSERT(stale == INT_GET(btp->stale, ARCH_CONVERT));
+		ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
+		ASSERT(stale == be32_to_cpu(btp->stale));
 	}
 }
 #endif
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index cbd371d9e98d..63e81dc2737e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -111,13 +111,13 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set the counts in the leaf header.
 	 */
-	INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */
-	INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */
+	leaf->hdr.count = btp->count;
+	leaf->hdr.stale = btp->stale;
 	/*
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
-	memcpy(leaf->ents, blp, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
 	needscan = 0;
 	needlog = 1;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 3a571d8cf1f3..35dd003051c5 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -98,7 +98,7 @@ xfs_dir2_block_sfsize(
 	/*
 	 * Iterate over the block's data entries by using the leaf pointers.
 	 */
-	for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+	for (i = 0; i < be32_to_cpu(btp->count); i++) {
 		if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
-- 
cgit v1.2.3


From a818e5de7e21ddaa7352bb8c9fc785c7b4f3019f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:07 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25492a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |   4 +-
 fs/xfs/xfs_dir2_block.c |   6 +--
 fs/xfs/xfs_dir2_leaf.c  |  74 ++++++++++++++++----------------
 fs/xfs/xfs_dir2_leaf.h  |   4 +-
 fs/xfs/xfs_dir2_node.c  | 112 ++++++++++++++++++++++++------------------------
 5 files changed, 100 insertions(+), 100 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2f4acb9d9b9c..c443cbaaaa92 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -366,7 +366,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
-		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
+		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
 			     (char *)leaf);
 	}
 	memcpy(node, oldroot, size);
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
-		dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+		dead_hash = INT_GET(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval, ARCH_CONVERT);
 	} else {
 		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 6f3b99dfe55c..1dd1a7694e4b 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -939,7 +939,7 @@ xfs_dir2_leaf_to_block(
 	 * Size of the "leaf" area in the block.
 	 */
 	size = (uint)sizeof(block->tail) +
-	       (uint)sizeof(*lep) * (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	       (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	/*
 	 * Look at the last data entry.
 	 */
@@ -968,14 +968,14 @@ xfs_dir2_leaf_to_block(
 	 * Initialize the block tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	btp->count = cpu_to_be32(INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
 	 * Initialize the block leaf area.  We compact out stale entries.
 	 */
 	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
-	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		lep[to++] = leaf->ents[from];
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 63e81dc2737e..0d366ab4db17 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -111,14 +111,14 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set the counts in the leaf header.
 	 */
-	leaf->hdr.count = btp->count;
-	leaf->hdr.stale = btp->stale;
+	leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
+	leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
 	/*
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
 	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
-	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
+	xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
 	needscan = 0;
 	needlog = 1;
 	/*
@@ -222,7 +222,7 @@ xfs_dir2_leaf_addname(
 	 * in a data block, improving the lookup of those entries.
 	 */
 	for (use_block = -1, lep = &leaf->ents[index];
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     index++, lep++) {
 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
@@ -266,15 +266,15 @@ xfs_dir2_leaf_addname(
 	 * If we don't have enough free bytes but we can make enough
 	 * by compacting out stale entries, we'll do that.
 	 */
-	if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes &&
-	    INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) {
+	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
+	    be16_to_cpu(leaf->hdr.stale) > 1) {
 		compact = 1;
 	}
 	/*
 	 * Otherwise if we don't have enough free bytes we need to
 	 * convert to node form.
 	 */
-	else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <
+	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
 		 needbytes) {
 		/*
 		 * Just checking or no space reservation, give up.
@@ -330,8 +330,8 @@ xfs_dir2_leaf_addname(
 	 * There are stale entries, so we'll need log-low and log-high
 	 * impossibly bad values later.
 	 */
-	else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
-		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	else if (be16_to_cpu(leaf->hdr.stale)) {
+		lfloglow = be16_to_cpu(leaf->hdr.count);
 		lfloghigh = -1;
 	}
 	/*
@@ -440,15 +440,15 @@ xfs_dir2_leaf_addname(
 		/*
 		 * lep is still good as the index leaf entry.
 		 */
-		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+		if (index < be16_to_cpu(leaf->hdr.count))
 			memmove(lep + 1, lep,
-				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
 		/*
 		 * Record low and high logging indices for the leaf.
 		 */
 		lfloglow = index;
-		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.
@@ -478,7 +478,7 @@ xfs_dir2_leaf_addname(
 			 * lowstale entry would be better.
 			 */
 			for (highstale = index;
-			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
 				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
@@ -490,7 +490,7 @@ xfs_dir2_leaf_addname(
 		 * If the low one is better, use it.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(index - lowstale - 1 >= 0);
 			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
@@ -526,7 +526,7 @@ xfs_dir2_leaf_addname(
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+		be16_add(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Fill in the new leaf entry.
@@ -569,24 +569,24 @@ xfs_dir2_leaf_check(
 	 * Should factor in the size of the bests table as well.
 	 * We can deduce a value for that from di_size.
 	 */
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	/*
 	 * Leaves and bests don't overlap.
 	 */
-	ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <=
+	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
 	       (char *)XFS_DIR2_LEAF_BESTS_P(ltp));
 	/*
 	 * Check hash value order, count stale entries.
 	 */
-	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
-		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
+		if (i + 1 < be16_to_cpu(leaf->hdr.count))
 			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
 			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
 		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
 }
 #endif	/* DEBUG */
 
@@ -611,7 +611,7 @@ xfs_dir2_leaf_compact(
 	/*
 	 * Compress out the stale entries in place.
 	 */
-	for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
@@ -627,8 +627,8 @@ xfs_dir2_leaf_compact(
 	/*
 	 * Update and log the header, log the leaf entries.
 	 */
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to);
-	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT)));
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
+	be16_add(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
 	leaf->hdr.stale = 0;
 	xfs_dir2_leaf_log_header(args->trans, bp);
 	if (loglow != -1)
@@ -662,7 +662,7 @@ xfs_dir2_leaf_compact_x1(
 	int		to;		/* destination copy index */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
 	index = *indexp;
 	/*
 	 * Find the first stale entry before our index, if any.
@@ -677,7 +677,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Stop if the answer would be worse than lowstale.
 	 */
 	for (highstale = index;
-	     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+	     highstale < be16_to_cpu(leaf->hdr.count) &&
 		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
 		(lowstale < 0 || index - lowstale > highstale - index);
 	     highstale++)
@@ -686,7 +686,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Pick the better of lowstale and highstale.
 	 */
 	if (lowstale >= 0 &&
-	    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	    (highstale == be16_to_cpu(leaf->hdr.count) ||
 	     index - lowstale <= highstale - index))
 		keepstale = lowstale;
 	else
@@ -695,7 +695,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Copy the entries in place, removing all the stale entries
 	 * except keepstale.
 	 */
-	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		/*
 		 * Notice the new value of index.
 		 */
@@ -730,8 +730,8 @@ xfs_dir2_leaf_compact_x1(
 	/*
 	 * Adjust the leaf header values.
 	 */
-	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to));
-	INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1);
+	be16_add(&leaf->hdr.count, -(from - to));
+	leaf->hdr.stale = cpu_to_be16(1);
 	/*
 	 * Remember the low/high stale value only in the "right"
 	 * direction.
@@ -739,8 +739,8 @@ xfs_dir2_leaf_compact_x1(
 	if (lowstale >= newindex)
 		lowstale = -1;
 	else
-		highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-	*highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
+		highstale = be16_to_cpu(leaf->hdr.count);
+	*highlogp = be16_to_cpu(leaf->hdr.count) - 1;
 	*lowstalep = lowstale;
 	*highstalep = highstale;
 }
@@ -1373,7 +1373,7 @@ xfs_dir2_leaf_lookup_int(
 	 * looking to match the name.
 	 */
 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip over stale leaf entries.
@@ -1488,7 +1488,7 @@ xfs_dir2_leaf_removename(
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
-	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, lbp);
 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
@@ -1645,7 +1645,7 @@ xfs_dir2_leaf_search_hash(
 	 * Note, the table cannot be empty, so we have to go through the loop.
 	 * Binary search the leaf entries looking for our hash value.
 	 */
-	for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1,
+	for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
 		hashwant = args->hashval;
 	     low <= high; ) {
 		mid = (low + high) >> 1;
@@ -1821,7 +1821,7 @@ xfs_dir2_node_to_leaf(
 	 * If not, release the buffer and give up.
 	 */
 	if ((uint)sizeof(leaf->hdr) +
-	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
+	    (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
 	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
 	    (uint)sizeof(leaf->tail) >
 	    mp->m_dirblksize) {
@@ -1832,7 +1832,7 @@ xfs_dir2_node_to_leaf(
 	 * If the leaf has any stale entries in it, compress them out.
 	 * The compact routine will log the header.
 	 */
-	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT))
+	if (be16_to_cpu(leaf->hdr.stale))
 		xfs_dir2_leaf_compact(args, lbp);
 	else
 		xfs_dir2_leaf_log_header(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index fcd3b7dea0f6..a1db51b845b9 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -46,8 +46,8 @@ typedef	__uint32_t	xfs_dir2_dataptr_t;
  */
 typedef struct xfs_dir2_leaf_hdr {
 	xfs_da_blkinfo_t	info;		/* header for da routines */
-	__uint16_t		count;		/* count of entries */
-	__uint16_t		stale;		/* count of stale entries */
+	__be16			count;		/* count of entries */
+	__be16			stale;		/* count of stale entries */
 } xfs_dir2_leaf_hdr_t;
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index c32894cb6223..40e4a2e27765 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -217,14 +217,14 @@ xfs_dir2_leafn_add(
 	 * a compact.
 	 */
 
-	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+	if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
 		if (!leaf->hdr.stale)
 			return XFS_ERROR(ENOSPC);
-		compact = INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1;
+		compact = be16_to_cpu(leaf->hdr.stale) > 1;
 	} else
 		compact = 0;
 	ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
-	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
 	       INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
 
 	if (args->justcheck)
@@ -242,7 +242,7 @@ xfs_dir2_leafn_add(
 	 * Set impossible logging indices for this case.
 	 */
 	else if (leaf->hdr.stale) {
-		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		lfloglow = be16_to_cpu(leaf->hdr.count);
 		lfloghigh = -1;
 	}
 	/*
@@ -250,12 +250,12 @@ xfs_dir2_leafn_add(
 	 */
 	if (!leaf->hdr.stale) {
 		lep = &leaf->ents[index];
-		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+		if (index < be16_to_cpu(leaf->hdr.count))
 			memmove(lep + 1, lep,
-				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
 		lfloglow = index;
-		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.  We'll use one for the new entry.
@@ -281,7 +281,7 @@ xfs_dir2_leafn_add(
 			 * lowstale already found.
 			 */
 			for (highstale = index;
-			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
 				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
@@ -294,7 +294,7 @@ xfs_dir2_leafn_add(
 		 * Shift entries up toward the stale slot.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
 			       XFS_DIR2_NULL_DATAPTR);
@@ -323,7 +323,7 @@ xfs_dir2_leafn_add(
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+		be16_add(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Insert the new entry, log everything.
@@ -353,16 +353,16 @@ xfs_dir2_leafn_check(
 	leaf = bp->data;
 	mp = dp->i_mount;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
-	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
-		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
+		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
 			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
 			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
 		}
 		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
 }
 #endif	/* DEBUG */
 
@@ -380,10 +380,10 @@ xfs_dir2_leafn_lasthash(
 	leaf = bp->data;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	if (count)
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return 0;
-	return INT_GET(leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	return INT_GET(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 }
 
 /*
@@ -421,7 +421,7 @@ xfs_dir2_leafn_lookup_int(
 	leaf = bp->data;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > 0);
+	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
 	xfs_dir2_leafn_check(dp, bp);
 	/*
@@ -456,7 +456,7 @@ xfs_dir2_leafn_lookup_int(
 	 * Loop over leaf entries with the right hash value.
 	 */
 	for (lep = &leaf->ents[index];
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip stale leaf entries.
@@ -619,7 +619,7 @@ xfs_dir2_leafn_lookup_int(
 	 * Return the final index, that will be the insertion point.
 	 */
 	*indexp = index;
-	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
 	return XFS_ERROR(ENOENT);
 }
 
@@ -657,12 +657,12 @@ xfs_dir2_leafn_moveents(
 	 * destination leaf entries, open up a hole in the destination
 	 * to hold the new entries.
 	 */
-	if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
+	if (start_d < be16_to_cpu(leaf_d->hdr.count)) {
 		memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d],
-			(INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
+			(be16_to_cpu(leaf_d->hdr.count) - start_d) *
 			sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
-			count + INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - 1);
+			count + be16_to_cpu(leaf_d->hdr.count) - 1);
 	}
 	/*
 	 * If the source has stale leaves, count the ones in the copy range
@@ -687,7 +687,7 @@ xfs_dir2_leafn_moveents(
 	 * If there are source entries after the ones we copied,
 	 * delete the ones we copied by sliding the next ones down.
 	 */
-	if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
+	if (start_s + count < be16_to_cpu(leaf_s->hdr.count)) {
 		memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count],
 			count * sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
@@ -695,10 +695,10 @@ xfs_dir2_leafn_moveents(
 	/*
 	 * Update the headers and log them.
 	 */
-	INT_MOD(leaf_s->hdr.count, ARCH_CONVERT, -(count));
-	INT_MOD(leaf_s->hdr.stale, ARCH_CONVERT, -(stale));
-	INT_MOD(leaf_d->hdr.count, ARCH_CONVERT, count);
-	INT_MOD(leaf_d->hdr.stale, ARCH_CONVERT, stale);
+	be16_add(&leaf_s->hdr.count, -(count));
+	be16_add(&leaf_s->hdr.stale, -(stale));
+	be16_add(&leaf_d->hdr.count, count);
+	be16_add(&leaf_d->hdr.stale, stale);
 	xfs_dir2_leaf_log_header(tp, bp_s);
 	xfs_dir2_leaf_log_header(tp, bp_d);
 	xfs_dir2_leafn_check(args->dp, bp_s);
@@ -721,11 +721,11 @@ xfs_dir2_leafn_order(
 	leaf2 = leaf2_bp->data;
 	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0 &&
-	    INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0 &&
+	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
+	    be16_to_cpu(leaf2->hdr.count) > 0 &&
 	    (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
-	     INT_GET(leaf2->ents[INT_GET(leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT) <
-	     INT_GET(leaf1->ents[INT_GET(leaf1->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)))
+	     INT_GET(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval, ARCH_CONVERT) <
+	     INT_GET(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval, ARCH_CONVERT)))
 		return 1;
 	return 0;
 }
@@ -768,9 +768,9 @@ xfs_dir2_leafn_rebalance(
 	}
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	oldsum = INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT);
+	oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
 #ifdef DEBUG
-	oldstale = INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT);
+	oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
 #endif
 	mid = oldsum >> 1;
 	/*
@@ -780,8 +780,8 @@ xfs_dir2_leafn_rebalance(
 	if (oldsum & 1) {
 		xfs_dahash_t	midhash;	/* middle entry hash value */
 
-		if (mid >= INT_GET(leaf1->hdr.count, ARCH_CONVERT))
-			midhash = INT_GET(leaf2->ents[mid - INT_GET(leaf1->hdr.count, ARCH_CONVERT)].hashval, ARCH_CONVERT);
+		if (mid >= be16_to_cpu(leaf1->hdr.count))
+			midhash = INT_GET(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval, ARCH_CONVERT);
 		else
 			midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
 		isleft = args->hashval <= midhash;
@@ -797,30 +797,30 @@ xfs_dir2_leafn_rebalance(
 	 * Calculate moved entry count.  Positive means left-to-right,
 	 * negative means right-to-left.  Then move the entries.
 	 */
-	count = INT_GET(leaf1->hdr.count, ARCH_CONVERT) - mid + (isleft == 0);
+	count = be16_to_cpu(leaf1->hdr.count) - mid + (isleft == 0);
 	if (count > 0)
 		xfs_dir2_leafn_moveents(args, blk1->bp,
-			INT_GET(leaf1->hdr.count, ARCH_CONVERT) - count, blk2->bp, 0, count);
+			be16_to_cpu(leaf1->hdr.count) - count, blk2->bp, 0, count);
 	else if (count < 0)
 		xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp,
-			INT_GET(leaf1->hdr.count, ARCH_CONVERT), count);
-	ASSERT(INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT) == oldsum);
-	ASSERT(INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT) == oldstale);
+			be16_to_cpu(leaf1->hdr.count), count);
+	ASSERT(be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count) == oldsum);
+	ASSERT(be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale) == oldstale);
 	/*
 	 * Mark whether we're inserting into the old or new leaf.
 	 */
-	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) < INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+	if (be16_to_cpu(leaf1->hdr.count) < be16_to_cpu(leaf2->hdr.count))
 		state->inleaf = swap;
-	else if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+	else if (be16_to_cpu(leaf1->hdr.count) > be16_to_cpu(leaf2->hdr.count))
 		state->inleaf = !swap;
 	else
 		state->inleaf =
-			swap ^ (blk1->index <= INT_GET(leaf1->hdr.count, ARCH_CONVERT));
+			swap ^ (blk1->index <= be16_to_cpu(leaf1->hdr.count));
 	/*
 	 * Adjust the expected index for insertion.
 	 */
 	if (!state->inleaf)
-		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+		blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
 	
 	/* 
 	 * Finally sanity check just to make sure we are not returning a negative index 
@@ -883,7 +883,7 @@ xfs_dir2_leafn_remove(
 	 * Kill the leaf entry by marking it stale.
 	 * Log the leaf block changes.
 	 */
-	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, bp);
 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, bp, index, index);
@@ -1039,7 +1039,7 @@ xfs_dir2_leafn_remove(
 	*rval =
 		((uint)sizeof(leaf->hdr) +
 		 (uint)sizeof(leaf->ents[0]) *
-		 (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT))) <
+		 (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale))) <
 		mp->m_dir_magicpct;
 	return 0;
 }
@@ -1140,7 +1140,7 @@ xfs_dir2_leafn_toosmall(
 	info = blk->bp->data;
 	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	leaf = (xfs_dir2_leaf_t *)info;
-	count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
 	if (bytes > (state->blocksize >> 1)) {
 		/*
@@ -1194,11 +1194,11 @@ xfs_dir2_leafn_toosmall(
 		 * Count bytes in the two blocks combined.
 		 */
 		leaf = (xfs_dir2_leaf_t *)info;
-		count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
 		leaf = bp->data;
 		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-		count += INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
 		/*
 		 * Fits with at least 25% to spare.
@@ -1262,21 +1262,21 @@ xfs_dir2_leafn_unbalance(
 	 * If there are any stale leaf entries, take this opportunity
 	 * to purge them.
 	 */
-	if (INT_GET(drop_leaf->hdr.stale, ARCH_CONVERT))
+	if (drop_leaf->hdr.stale)
 		xfs_dir2_leaf_compact(args, drop_blk->bp);
-	if (INT_GET(save_leaf->hdr.stale, ARCH_CONVERT))
+	if (save_leaf->hdr.stale)
 		xfs_dir2_leaf_compact(args, save_blk->bp);
 	/*
 	 * Move the entries from drop to the appropriate end of save.
 	 */
-	drop_blk->hashval = INT_GET(drop_leaf->ents[INT_GET(drop_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	drop_blk->hashval = INT_GET(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
-			INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+			be16_to_cpu(drop_leaf->hdr.count));
 	else
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
-			INT_GET(save_leaf->hdr.count, ARCH_CONVERT), INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
-	save_blk->hashval = INT_GET(save_leaf->ents[INT_GET(save_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+			be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count));
+	save_blk->hashval = INT_GET(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 	xfs_dir2_leafn_check(args->dp, save_blk->bp);
 }
 
-- 
cgit v1.2.3


From 3c1f9c158050259cf3965cf900916ec49a288972 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:18 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25493a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |  2 +-
 fs/xfs/xfs_dir2_block.c | 52 +++++++++++++++++++++++---------------------
 fs/xfs/xfs_dir2_data.c  |  8 +++----
 fs/xfs/xfs_dir2_leaf.c  | 55 ++++++++++++++++++++++++-----------------------
 fs/xfs/xfs_dir2_leaf.h  |  4 ++--
 fs/xfs/xfs_dir2_node.c  | 57 +++++++++++++++++++++++++------------------------
 fs/xfs/xfs_dir2_sf.c    |  2 +-
 7 files changed, 93 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c443cbaaaa92..31796c7faba0 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
-		dead_hash = INT_GET(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval, ARCH_CONVERT);
+		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
 	} else {
 		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 1dd1a7694e4b..fa372b2b433d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -246,7 +246,7 @@ xfs_dir2_block_addname(
 			highstale = lfloghigh = -1;
 		     fromidx >= 0;
 		     fromidx--) {
-			if (INT_GET(blp[fromidx].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+			if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
 				if (highstale == -1)
 					highstale = toidx;
 				else {
@@ -291,14 +291,14 @@ xfs_dir2_block_addname(
 	 */
 	for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+		if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
 			break;
 		if (hash < args->hashval)
 			low = mid + 1;
 		else
 			high = mid - 1;
 	}
-	while (mid >= 0 && INT_GET(blp[mid].hashval, ARCH_CONVERT) >= args->hashval) {
+	while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) {
 		mid--;
 	}
 	/*
@@ -345,12 +345,12 @@ xfs_dir2_block_addname(
 	else {
 		for (lowstale = mid;
 		     lowstale >= 0 &&
-			INT_GET(blp[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+			be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
 		     lowstale--)
 			continue;
 		for (highstale = mid + 1;
 		     highstale < be32_to_cpu(btp->count) &&
-			INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+			be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
 			(lowstale < 0 || mid - lowstale > highstale - mid);
 		     highstale++)
 			continue;
@@ -387,8 +387,9 @@ xfs_dir2_block_addname(
 	/*
 	 * Fill in the leaf entry.
 	 */
-	INT_SET(blp[mid].hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(blp[mid].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[mid].hashval = cpu_to_be32(args->hashval);
+	blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
 	 * Mark space for the data entry used.
@@ -621,7 +622,7 @@ xfs_dir2_block_lookup(
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, release the block.
 	 */
@@ -676,7 +677,7 @@ xfs_dir2_block_lookup_int(
 	for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
 		ASSERT(low <= high);
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+		if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
 			break;
 		if (hash < args->hashval)
 			low = mid + 1;
@@ -691,7 +692,7 @@ xfs_dir2_block_lookup_int(
 	/*
 	 * Back up to the first one with the right hash value.
 	 */
-	while (mid > 0 && INT_GET(blp[mid - 1].hashval, ARCH_CONVERT) == args->hashval) {
+	while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) {
 		mid--;
 	}
 	/*
@@ -699,7 +700,7 @@ xfs_dir2_block_lookup_int(
 	 * right hash value looking for our name.
 	 */
 	do {
-		if ((addr = INT_GET(blp[mid].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+		if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Get pointer to the entry from the leaf.
@@ -716,7 +717,7 @@ xfs_dir2_block_lookup_int(
 			*entno = mid;
 			return 0;
 		}
-	} while (++mid < be32_to_cpu(btp->count) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+	} while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
@@ -766,7 +767,7 @@ xfs_dir2_block_removename(
 	 * Point to the data entry using the leaf entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
@@ -782,7 +783,7 @@ xfs_dir2_block_removename(
 	/*
 	 * Remove the leaf entry by marking it stale.
 	 */
-	INT_SET(blp[ent].address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_block_log_leaf(tp, bp, ent, ent);
 	/*
 	 * Fix up bestfree, log the header if necessary.
@@ -842,7 +843,7 @@ xfs_dir2_block_replace(
 	 * Point to the data entry we need to change.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
@@ -867,8 +868,8 @@ xfs_dir2_block_sort(
 
 	la = a;
 	lb = b;
-	return INT_GET(la->hashval, ARCH_CONVERT) < INT_GET(lb->hashval, ARCH_CONVERT) ? -1 :
-		(INT_GET(la->hashval, ARCH_CONVERT) > INT_GET(lb->hashval, ARCH_CONVERT) ? 1 : 0);
+	return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 :
+		(be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0);
 }
 
 /*
@@ -976,7 +977,7 @@ xfs_dir2_leaf_to_block(
 	 */
 	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		lep[to++] = leaf->ents[from];
 	}
@@ -1135,8 +1136,9 @@ xfs_dir2_sf_to_block(
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	xfs_dir2_data_log_entry(tp, bp, dep);
-	INT_SET(blp[0].hashval, ARCH_CONVERT, xfs_dir_hash_dot);
-	INT_SET(blp[0].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
+	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	/*
 	 * Create entry for ..
 	 */
@@ -1148,8 +1150,9 @@ xfs_dir2_sf_to_block(
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	xfs_dir2_data_log_entry(tp, bp, dep);
-	INT_SET(blp[1].hashval, ARCH_CONVERT, xfs_dir_hash_dotdot);
-	INT_SET(blp[1].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
+	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	/*
 	 * Loop over existing entries, stuff them in.
@@ -1197,8 +1200,9 @@ xfs_dir2_sf_to_block(
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 		xfs_dir2_data_log_entry(tp, bp, dep);
-		INT_SET(blp[2 + i].hashval, ARCH_CONVERT, xfs_da_hashname((char *)sfep->name, sfep->namelen));
-		INT_SET(blp[2 + i].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp,
+		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
+					(char *)sfep->name, sfep->namelen));
+		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
 		if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 0af757873be5..084b134361aa 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -144,8 +144,8 @@ xfs_dir2_data_check(
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
 			for (i = 0; i < be32_to_cpu(btp->count); i++) {
-				if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
-				    INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
+				if (be32_to_cpu(lep[i].address) == addr &&
+				    be32_to_cpu(lep[i].hashval) == hash)
 					break;
 			}
 			ASSERT(i < be32_to_cpu(btp->count));
@@ -158,10 +158,10 @@ xfs_dir2_data_check(
 	ASSERT(freeseen == 7);
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 			if (i > 0)
-				ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
+				ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
 		}
 		ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
 		ASSERT(stale == be32_to_cpu(btp->stale));
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 0d366ab4db17..752fc67354ea 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -222,11 +222,11 @@ xfs_dir2_leaf_addname(
 	 * in a data block, improving the lookup of those entries.
 	 */
 	for (use_block = -1, lep = &leaf->ents[index];
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     index++, lep++) {
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
-		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
@@ -468,7 +468,7 @@ xfs_dir2_leaf_addname(
 			 */
 			for (lowstale = index - 1;
 			     lowstale >= 0 &&
-				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[lowstale].address) !=
 				XFS_DIR2_NULL_DATAPTR;
 			     lowstale--)
 				continue;
@@ -479,7 +479,7 @@ xfs_dir2_leaf_addname(
 			 */
 			for (highstale = index;
 			     highstale < be16_to_cpu(leaf->hdr.count) &&
-				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[highstale].address) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
 				 index - lowstale - 1 >= highstale - index);
@@ -493,7 +493,7 @@ xfs_dir2_leaf_addname(
 		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(index - lowstale - 1 >= 0);
-			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
 			 * Copy entries up to cover the stale entry
@@ -512,7 +512,7 @@ xfs_dir2_leaf_addname(
 		 */
 		else {
 			ASSERT(highstale - index >= 0);
-			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
 			 * Copy entries down to copver the stale entry
@@ -531,8 +531,9 @@ xfs_dir2_leaf_addname(
 	/*
 	 * Fill in the new leaf entry.
 	 */
-	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT)));
+	lep->hashval = cpu_to_be32(args->hashval);
+	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
+				INT_GET(*tagp, ARCH_CONVERT)));
 	/*
 	 * Log the leaf fields and give up the buffers.
 	 */
@@ -581,9 +582,9 @@ xfs_dir2_leaf_check(
 	 */
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count))
-			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
-			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
-		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
+			       be32_to_cpu(leaf->ents[i + 1].hashval));
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -612,7 +613,7 @@ xfs_dir2_leaf_compact(
 	 * Compress out the stale entries in place.
 	 */
 	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Only actually copy the entries that are different.
@@ -669,7 +670,7 @@ xfs_dir2_leaf_compact_x1(
 	 */
 	for (lowstale = index - 1;
 	     lowstale >= 0 &&
-		INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+		be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
 	     lowstale--)
 		continue;
 	/*
@@ -678,7 +679,7 @@ xfs_dir2_leaf_compact_x1(
 	 */
 	for (highstale = index;
 	     highstale < be16_to_cpu(leaf->hdr.count) &&
-		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+		be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
 		(lowstale < 0 || index - lowstale > highstale - index);
 	     highstale++)
 		continue;
@@ -702,7 +703,7 @@ xfs_dir2_leaf_compact_x1(
 		if (index == from)
 			newindex = to;
 		if (from != keepstale &&
-		    INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+		    be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
 			if (from == to)
 				*lowlogp = to;
 			continue;
@@ -1314,7 +1315,7 @@ xfs_dir2_leaf_lookup(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number.
 	 */
@@ -1373,17 +1374,17 @@ xfs_dir2_leaf_lookup_int(
 	 * looking to match the name.
 	 */
 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip over stale leaf entries.
 		 */
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Get the new data block number.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		/*
 		 * If it's not the same as the old data block number,
 		 * need to pitch the old one and read the new one.
@@ -1406,7 +1407,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)dbp->data +
-		       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+		       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 		/*
 		 * If it matches then return it.
 		 */
@@ -1471,9 +1472,9 @@ xfs_dir2_leaf_removename(
 	 * Point to the leaf entry, use that to point to the data entry.
 	 */
 	lep = &leaf->ents[index];
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
@@ -1490,7 +1491,7 @@ xfs_dir2_leaf_removename(
 	 */
 	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, lbp);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
 	/*
 	 * Scan the freespace in the data block again if necessary,
@@ -1604,7 +1605,7 @@ xfs_dir2_leaf_replace(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
 	/*
 	 * Put the new inode number in, log it.
@@ -1649,7 +1650,7 @@ xfs_dir2_leaf_search_hash(
 		hashwant = args->hashval;
 	     low <= high; ) {
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant)
+		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
 			break;
 		if (hash < hashwant)
 			low = mid + 1;
@@ -1660,7 +1661,7 @@ xfs_dir2_leaf_search_hash(
 	 * Found one, back up through all the equal hash values.
 	 */
 	if (hash == hashwant) {
-		while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) {
+		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
 			mid--;
 		}
 	}
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index a1db51b845b9..f57ca1162412 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -54,8 +54,8 @@ typedef struct xfs_dir2_leaf_hdr {
  * Leaf block entry.
  */
 typedef struct xfs_dir2_leaf_entry {
-	xfs_dahash_t		hashval;	/* hash value of name */
-	xfs_dir2_dataptr_t	address;	/* address of data entry */
+	__be32			hashval;	/* hash value of name */
+	__be32			address;	/* address of data entry */
 } xfs_dir2_leaf_entry_t;
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 40e4a2e27765..062e4a99209c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -223,9 +223,9 @@ xfs_dir2_leafn_add(
 		compact = be16_to_cpu(leaf->hdr.stale) > 1;
 	} else
 		compact = 0;
-	ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
+	ASSERT(index == 0 || be32_to_cpu(leaf->ents[index - 1].hashval) <= args->hashval);
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
-	       INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
+	       be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
 
 	if (args->justcheck)
 		return 0;
@@ -271,7 +271,7 @@ xfs_dir2_leafn_add(
 			 */
 			for (lowstale = index - 1;
 			     lowstale >= 0 &&
-				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[lowstale].address) !=
 				XFS_DIR2_NULL_DATAPTR;
 			     lowstale--)
 				continue;
@@ -282,7 +282,7 @@ xfs_dir2_leafn_add(
 			 */
 			for (highstale = index;
 			     highstale < be16_to_cpu(leaf->hdr.count) &&
-				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[highstale].address) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
 				 index - lowstale - 1 >= highstale - index);
@@ -296,7 +296,7 @@ xfs_dir2_leafn_add(
 		if (lowstale >= 0 &&
 		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
-			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(index - lowstale - 1 >= 0);
 			if (index - lowstale - 1 > 0)
@@ -312,7 +312,7 @@ xfs_dir2_leafn_add(
 		 * Shift entries down toward the stale slot.
 		 */
 		else {
-			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(highstale - index >= 0);
 			if (highstale - index > 0)
@@ -328,8 +328,9 @@ xfs_dir2_leafn_add(
 	/*
 	 * Insert the new entry, log everything.
 	 */
-	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, args->blkno, args->index));
+	lep->hashval = cpu_to_be32(args->hashval);
+	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp,
+				args->blkno, args->index));
 	xfs_dir2_leaf_log_header(tp, bp);
 	xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
 	xfs_dir2_leafn_check(dp, bp);
@@ -356,10 +357,10 @@ xfs_dir2_leafn_check(
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
-			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
-			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
+			       be32_to_cpu(leaf->ents[i + 1].hashval));
 		}
-		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -383,7 +384,7 @@ xfs_dir2_leafn_lasthash(
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return 0;
-	return INT_GET(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	return be32_to_cpu(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval);
 }
 
 /*
@@ -456,17 +457,17 @@ xfs_dir2_leafn_lookup_int(
 	 * Loop over leaf entries with the right hash value.
 	 */
 	for (lep = &leaf->ents[index];
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip stale leaf entries.
 		 */
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		/*
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
@@ -572,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			dep = (xfs_dir2_data_entry_t *)
 			      ((char *)curbp->data +
-			       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+			       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 			/*
 			 * Compare the entry, return it if it matches.
 			 */
@@ -672,7 +673,7 @@ xfs_dir2_leafn_moveents(
 		int	i;			/* temp leaf index */
 
 		for (i = start_s, stale = 0; i < start_s + count; i++) {
-			if (INT_GET(leaf_s->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 		}
 	} else
@@ -723,9 +724,9 @@ xfs_dir2_leafn_order(
 	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
 	    be16_to_cpu(leaf2->hdr.count) > 0 &&
-	    (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
-	     INT_GET(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval, ARCH_CONVERT) <
-	     INT_GET(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval, ARCH_CONVERT)))
+	    (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
+	     be32_to_cpu(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval) <
+	     be32_to_cpu(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval)))
 		return 1;
 	return 0;
 }
@@ -781,9 +782,9 @@ xfs_dir2_leafn_rebalance(
 		xfs_dahash_t	midhash;	/* middle entry hash value */
 
 		if (mid >= be16_to_cpu(leaf1->hdr.count))
-			midhash = INT_GET(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval, ARCH_CONVERT);
+			midhash = be32_to_cpu(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval);
 		else
-			midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
+			midhash = be32_to_cpu(leaf1->ents[mid].hashval);
 		isleft = args->hashval <= midhash;
 	}
 	/*
@@ -875,9 +876,9 @@ xfs_dir2_leafn_remove(
 	/*
 	 * Extract the data block and offset from the entry.
 	 */
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->blkno == db);
-	off = XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT));
+	off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->index == off);
 	/*
 	 * Kill the leaf entry by marking it stale.
@@ -885,7 +886,7 @@ xfs_dir2_leafn_remove(
 	 */
 	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, bp);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, bp, index, index);
 	/*
 	 * Make the data entry free.  Keep track of the longest freespace
@@ -1269,14 +1270,14 @@ xfs_dir2_leafn_unbalance(
 	/*
 	 * Move the entries from drop to the appropriate end of save.
 	 */
-	drop_blk->hashval = INT_GET(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval);
 	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
 			be16_to_cpu(drop_leaf->hdr.count));
 	else
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
 			be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count));
-	save_blk->hashval = INT_GET(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval);
 	xfs_dir2_leafn_check(args->dp, save_blk->bp);
 }
 
@@ -1903,7 +1904,7 @@ xfs_dir2_node_replace(
 		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
-		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
+		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
 		ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT));
 		/*
 		 * Fill in the new inode number and log the entry.
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 35dd003051c5..d98a41d1fe63 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -99,7 +99,7 @@ xfs_dir2_block_sfsize(
 	 * Iterate over the block's data entries by using the leaf pointers.
 	 */
 	for (i = 0; i < be32_to_cpu(btp->count); i++) {
-		if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+		if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Calculate the pointer to the entry at hand.
-- 
cgit v1.2.3


From 3d693c6ed7892d066e8fb3311c6b74f7699326f9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:27 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_DATA_ENTRY_TAG_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25494a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 26 +++++++++++++-------------
 fs/xfs/xfs_dir2_data.c  | 10 +++++-----
 fs/xfs/xfs_dir2_data.h  |  7 +++----
 fs/xfs/xfs_dir2_leaf.c  |  6 +++---
 fs/xfs/xfs_dir2_node.c  |  6 +++---
 5 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fa372b2b433d..25d3a04f2e46 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -81,7 +81,7 @@ xfs_dir2_block_addname(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			needlog;	/* need to log header */
 	int			needscan;	/* need to rescan freespace */
-	xfs_dir2_data_off_t	*tagp;		/* pointer to tag value */
+	__be16			*tagp;		/* pointer to tag value */
 	xfs_trans_t		*tp;		/* transaction structure */
 
 	xfs_dir2_trace_args("block_addname", args);
@@ -120,11 +120,11 @@ xfs_dir2_block_addname(
 		/*
 		 * Tag just before the first leaf entry.
 		 */
-		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		tagp = (__be16 *)blp - 1;
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		enddup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then can't do this add without cleaning up:
 		 * the space before the first leaf entry needs to be free so it
@@ -183,11 +183,11 @@ xfs_dir2_block_addname(
 		/*
 		 * Tag just before the first leaf entry.
 		 */
-		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		tagp = (__be16 *)blp - 1;
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then the data will go where the
 		 * leaf data starts now, if it works at all.
@@ -404,7 +404,7 @@ xfs_dir2_block_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
 	 */
@@ -896,7 +896,7 @@ xfs_dir2_leaf_to_block(
 	int			needscan;	/* need to scan for bestfree */
 	xfs_dir2_sf_hdr_t	sfh;		/* shortform header */
 	int			size;		/* bytes used */
-	xfs_dir2_data_off_t	*tagp;		/* end of entry (tag) */
+	__be16			*tagp;		/* end of entry (tag) */
 	int			to;		/* block/leaf to index */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
@@ -944,8 +944,8 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Look at the last data entry.
 	 */
-	tagp = (xfs_dir2_data_off_t *)((char *)block + mp->m_dirblksize) - 1;
-	dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+	tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
+	dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 	/*
 	 * If it's not free or is too short we can't do it.
 	 */
@@ -1044,7 +1044,7 @@ xfs_dir2_sf_to_block(
 	int			offset;		/* target block offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* sf entry pointer */
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
-	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	xfs_dir2_trace_args("sf_to_block", args);
@@ -1134,7 +1134,7 @@ xfs_dir2_sf_to_block(
 	dep->namelen = 1;
 	dep->name[0] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
 	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
@@ -1148,7 +1148,7 @@ xfs_dir2_sf_to_block(
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
 	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
@@ -1198,7 +1198,7 @@ xfs_dir2_sf_to_block(
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
 					(char *)sfep->name, sfep->namelen));
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 084b134361aa..bb3d03ff002b 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -134,7 +134,7 @@ xfs_dir2_data_check(
 		dep = (xfs_dir2_data_entry_t *)p;
 		ASSERT(dep->namelen != 0);
 		ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0);
-		ASSERT(INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) ==
+		ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
@@ -376,7 +376,7 @@ xfs_dir2_data_freescan(
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
 			ASSERT((char *)dep - (char *)d ==
-			       INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT));
+			       be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)));
 			p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
 		}
 	}
@@ -549,10 +549,10 @@ xfs_dir2_data_make_free(
 	 * the previous entry and see if it's free.
 	 */
 	if (offset > sizeof(d->hdr)) {
-		xfs_dir2_data_off_t	*tagp;	/* tag just before us */
+		__be16			*tagp;	/* tag just before us */
 
-		tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
-		prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
+		tagp = (__be16 *)((char *)d + offset) - 1;
+		prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
 		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			prevdup = NULL;
 	} else
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 479b59f50dbc..0847cbb53e17 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -134,12 +134,11 @@ static inline int xfs_dir2_data_entsize(int n)
  * Pointer to an entry's tag word.
  */
 #define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
 {
-	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
-		  (uint)sizeof(xfs_dir2_data_off_t));
+	return (__be16 *)((char *)dep +
+		XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16));
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 752fc67354ea..b5036512d02f 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -187,7 +187,7 @@ xfs_dir2_leaf_addname(
 	int			needbytes;	/* leaf block bytes needed */
 	int			needlog;	/* need to log data header */
 	int			needscan;	/* need to rescan data free */
-	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	xfs_dir2_db_t		use_block;	/* data block number */
 
@@ -411,7 +411,7 @@ xfs_dir2_leaf_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	/*
 	 * Need to scan fix up the bestfree table.
 	 */
@@ -533,7 +533,7 @@ xfs_dir2_leaf_addname(
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
 	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
-				INT_GET(*tagp, ARCH_CONVERT)));
+				be16_to_cpu(*tagp)));
 	/*
 	 * Log the leaf fields and give up the buffers.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 062e4a99209c..56b7cc89645e 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1379,7 +1379,7 @@ xfs_dir2_node_addname_int(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			needlog;	/* need to log data header */
 	int			needscan;	/* need to rescan data frees */
-	xfs_dir2_data_off_t	*tagp;		/* data entry tag pointer */
+	__be16			*tagp;		/* data entry tag pointer */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	dp = args->dp;
@@ -1699,7 +1699,7 @@ xfs_dir2_node_addname_int(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
 	 * Rescan the block for bestfree if needed.
@@ -1732,7 +1732,7 @@ xfs_dir2_node_addname_int(
 	 * Return the data block and offset in args, then drop the data block.
 	 */
 	args->blkno = (xfs_dablk_t)dbno;
-	args->index = INT_GET(*tagp, ARCH_CONVERT);
+	args->index = be16_to_cpu(*tagp);
 	xfs_da_buf_done(dbp);
 	return 0;
 }
-- 
cgit v1.2.3


From 89da054424a775b4b257556eda8a300be1134d7c Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:40 +1100
Subject: [XFS] endianess annotations for xfs_da_blkinfo_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25495a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c       |  17 ++--
 fs/xfs/xfs_attr_leaf.c  |  98 ++++++++---------------
 fs/xfs/xfs_da_btree.c   | 206 +++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h   |   8 +-
 fs/xfs/xfs_dir.c        |  16 ++--
 fs/xfs/xfs_dir2_block.c |   2 +-
 fs/xfs/xfs_dir2_leaf.c  |  20 ++---
 fs/xfs/xfs_dir2_node.c  |  28 +++----
 fs/xfs/xfs_dir_leaf.c   |  55 +++++++------
 9 files changed, 205 insertions(+), 245 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index acf1b7c9f1db..36b120d859af 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1127,8 +1127,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 		return(error);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						!= XFS_ATTR_LEAF_MAGIC)) {
+	if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
 				     context->dp->i_mount, leaf);
 		xfs_da_brelse(NULL, bp);
@@ -1541,8 +1540,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 						     XFS_ATTR_FORK);
 		if (error)
 			goto out;
-		ASSERT(INT_GET(((xfs_attr_leafblock_t *)
-				      bp->data)->hdr.info.magic, ARCH_CONVERT)
+		ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
+				      bp->data)->hdr.info.magic)
 						       == XFS_ATTR_LEAF_MAGIC);
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
@@ -1763,7 +1762,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 			return(error);
 		if (bp) {
 			node = bp->data;
-			switch (INT_GET(node->hdr.info.magic, ARCH_CONVERT)) {
+			switch (be16_to_cpu(node->hdr.info.magic)) {
 			case XFS_DA_NODE_MAGIC:
 				xfs_attr_trace_l_cn("wrong blk", context, node);
 				xfs_da_brelse(NULL, bp);
@@ -1817,10 +1816,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			node = bp->data;
-			if (INT_GET(node->hdr.info.magic, ARCH_CONVERT)
+			if (be16_to_cpu(node->hdr.info.magic)
 							== XFS_ATTR_LEAF_MAGIC)
 				break;
-			if (unlikely(INT_GET(node->hdr.info.magic, ARCH_CONVERT)
+			if (unlikely(be16_to_cpu(node->hdr.info.magic)
 							!= XFS_DA_NODE_MAGIC)) {
 				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
 						     XFS_ERRLEVEL_LOW,
@@ -1858,7 +1857,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 */
 	for (;;) {
 		leaf = bp->data;
-		if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+		if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
 						!= XFS_ATTR_LEAF_MAGIC)) {
 			XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
 					     XFS_ERRLEVEL_LOW,
@@ -1869,7 +1868,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		error = xfs_attr_leaf_list_int(bp, context);
 		if (error || !leaf->hdr.info.forw)
 			break;	/* not really an error, buffer full or EOF */
-		cursor->blkno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+		cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
 		xfs_da_brelse(NULL, bp);
 		error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
 					      &bp, XFS_ATTR_FORK);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe91eac4e2a7..d279d944e03d 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -720,8 +720,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 	int bytes, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	entry = &leaf->entries[0];
 	bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -766,8 +765,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	ASSERT(bp != NULL);
 	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
@@ -875,8 +873,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 		goto out;
 	node = bp1->data;
 	leaf = bp2->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval;
@@ -920,7 +917,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	leaf = bp->data;
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
-	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
+	hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused) {
 		INT_SET(hdr->firstused, ARCH_CONVERT,
@@ -1004,8 +1001,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	int tablesize, entsize, sum, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT((args->index >= 0)
 		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
 	hdr = &leaf->hdr;
@@ -1079,8 +1075,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	int tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
 	ASSERT((args->index >= 0)
@@ -1279,10 +1274,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	args = state->args;
 
 	/*
@@ -1566,7 +1559,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 	leaf = (xfs_attr_leafblock_t *)info;
 	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1588,7 +1581,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1610,13 +1603,12 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * to shrink an attribute list over time.
 	 */
 	/* start with smaller blk num */
-	forward = (INT_GET(info->forw, ARCH_CONVERT)
-					< INT_GET(info->back, ARCH_CONVERT));
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -1630,8 +1622,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
 		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1685,8 +1676,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_mount_t *mp;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
 	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0)
@@ -1859,10 +1849,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	drop_hdr = &drop_leaf->hdr;
 	save_hdr = &save_leaf->hdr;
 
@@ -1972,8 +1960,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_dahash_t hashval;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 
@@ -2090,8 +2077,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 	ASSERT(args->index < ((int)INT_GET(leaf->hdr.count, ARCH_CONVERT)));
@@ -2159,10 +2145,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Set up environment.
 	 */
-	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
@@ -2301,10 +2285,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC) &&
-	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC));
+	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
+	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
 	if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
 	    && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
 	    && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
@@ -2327,8 +2309,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_attr_leafblock_t *leaf;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (count)
 		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	if (!leaf->hdr.count)
@@ -2348,8 +2329,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 	int size;
 
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
@@ -2596,8 +2576,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2663,8 +2642,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2736,15 +2714,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	}
 
 	leaf1 = bp1->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf1->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
 	leaf2 = bp2->data;
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index2 < INT_GET(leaf2->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index2 >= 0);
 	entry2 = &leaf2->entries[ args->index2 ];
@@ -2842,9 +2818,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 	 * This is a depth-first traversal!
 	 */
 	info = bp->data;
-	if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+	if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 		error = xfs_attr_node_inactive(trans, dp, bp, 1);
-	} else if (INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+	} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 		error = xfs_attr_leaf_inactive(trans, dp, bp);
 	} else {
 		error = XFS_ERROR(EIO);
@@ -2892,8 +2868,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 	}
 
 	node = bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT)
-						== XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
 	count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!count) {
@@ -2927,12 +2902,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 			 * Invalidate the subtree, however we have to.
 			 */
 			info = child_bp->data;
-			if (INT_GET(info->magic, ARCH_CONVERT)
-							== XFS_DA_NODE_MAGIC) {
+			if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 				error = xfs_attr_node_inactive(trans, dp,
 						child_bp, level+1);
-			} else if (INT_GET(info->magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC) {
+			} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 				error = xfs_attr_leaf_inactive(trans, dp,
 						child_bp);
 			} else {
@@ -2991,8 +2964,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	int error, count, size, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	/*
 	 * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 31796c7faba0..4f3bb1cb9615 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -126,7 +126,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	node = bp->data;
 	node->hdr.info.forw = 0;
 	node->hdr.info.back = 0;
-	INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
+	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
 	node->hdr.info.pad = 0;
 	node->hdr.count = 0;
 	INT_SET(node->hdr.level, ARCH_CONVERT, level);
@@ -290,28 +290,28 @@ xfs_da_split(xfs_da_state_t *state)
 
 	node = oldblk->bp->data;
 	if (node->hdr.info.forw) {
-		if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
+		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
 			bp = addblk->bp;
 		} else {
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
 		node = bp->data;
-		INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
+		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
 		xfs_da_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
 	}
 	node = oldblk->bp->data;
-	if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
-		if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
+	if (node->hdr.info.back) {
+		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
 			bp = addblk->bp;
 		} else {
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
 		node = bp->data;
-		INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
+		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
 		xfs_da_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
@@ -359,12 +359,12 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(bp != NULL);
 	node = bp->data;
 	oldroot = blk1->bp->data;
-	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
 		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
 			     (char *)oldroot);
 	} else {
 		ASSERT(XFS_DIR_IS_V2(mp));
-		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
 			     (char *)leaf);
@@ -392,7 +392,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 
 #ifdef DEBUG
-	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
 		       blk1->blkno < mp->m_dirfreeblk);
 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -424,7 +424,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	int useextra;
 
 	node = oldblk->bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 
 	/*
 	 * With V2 the extra block is data or freespace.
@@ -524,8 +524,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		node1 = node2;
 		node2 = tmpnode;
 	}
-	ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
-	ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
 	if (count == 0)
 		return;
@@ -622,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 	node = oldblk->bp->data;
 	mp = state->mp;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
@@ -768,7 +768,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(args != NULL);
 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
 	oldroot = root_blk->bp->data;
-	ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT(!oldroot->hdr.info.forw);
 	ASSERT(!oldroot->hdr.info.back);
 
@@ -791,10 +791,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
 	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
-		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
-		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
 	}
 	ASSERT(!blkinfo->forw);
 	ASSERT(!blkinfo->back);
@@ -830,7 +830,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
 	node = (xfs_da_intnode_t *)info;
 	count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (count > (state->node_ents >> 1)) {
@@ -849,7 +849,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -871,13 +871,12 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 * to shrink a directory over time.
 	 */
 	/* start with smaller blk num */
-	forward = (INT_GET(info->forw, ARCH_CONVERT)
-				< INT_GET(info->back, ARCH_CONVERT));
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -891,7 +890,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		count -= state->node_ents >> 2;
 		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 		node = bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 		xfs_da_brelse(state->args->trans, bp);
 		if (count >= 0)
@@ -973,7 +972,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 	}
 	for (blk--, level--; level >= 0; blk--, level--) {
 		node = blk->bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		btree = &node->btree[ blk->index ];
 		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
 			break;
@@ -1041,8 +1040,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
 	drop_node = drop_blk->bp->data;
 	save_node = save_blk->bp->data;
-	ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
-	ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	tp = state->args->trans;
 
 	/*
@@ -1138,15 +1137,15 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			return(error);
 		}
 		curr = blk->bp->data;
-		ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
-		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
+		       be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
 
 		/*
 		 * Search an intermediate node for a match.
 		 */
-		blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
-		if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+		blk->magic = be16_to_cpu(curr->magic);
+		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
 			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
@@ -1193,15 +1192,15 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 				blkno = INT_GET(btree->before, ARCH_CONVERT);
 			}
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) {
 			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
 			break;
 		}
@@ -1274,8 +1273,8 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
 	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
-	ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
+	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
+	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
 	ASSERT(old_blk->magic == new_blk->magic);
 
 	switch (old_blk->magic) {
@@ -1302,47 +1301,44 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 		/*
 		 * Link new block in before existing block.
 		 */
-		INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
-		new_info->back = old_info->back; /* INT_: direct copy */
-		if (INT_GET(old_info->back, ARCH_CONVERT)) {
+		new_info->forw = cpu_to_be32(old_blk->blkno);
+		new_info->back = old_info->back;
+		if (old_info->back) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(old_info->back,
-							ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(old_info->back),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
-			INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
+			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
+			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
+			tmp_info->forw = cpu_to_be32(new_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 			xfs_da_buf_done(bp);
 		}
-		INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
+		old_info->back = cpu_to_be32(new_blk->blkno);
 	} else {
 		/*
 		 * Link new block in after existing block.
 		 */
-		new_info->forw = old_info->forw; /* INT_: direct copy */
-		INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
-		if (INT_GET(old_info->forw, ARCH_CONVERT)) {
+		new_info->forw = old_info->forw;
+		new_info->back = cpu_to_be32(old_blk->blkno);
+		if (old_info->forw) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(old_info->forw),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
-				    == INT_GET(old_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
-				    == old_blk->blkno);
-			INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
+			ASSERT(tmp_info->magic == old_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
+			tmp_info->back = cpu_to_be32(new_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 			xfs_da_buf_done(bp);
 		}
-		INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
+		old_info->forw = cpu_to_be32(new_blk->blkno);
 	}
 
 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
@@ -1360,8 +1356,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 
 	node1 = node1_bp->data;
 	node2 = node2_bp->data;
-	ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
-	       (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
+	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
+	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
 	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
 	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
@@ -1381,7 +1377,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_da_intnode_t *node;
 
 	node = bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	if (count)
 		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!node->hdr.count)
@@ -1411,50 +1407,47 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
 	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
-	ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
+	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
+	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
 	ASSERT(save_blk->magic == drop_blk->magic);
-	ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
-	       (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
-	ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
-	       (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
+	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
+	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
+	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
+	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
 
 	/*
 	 * Unlink the leaf block from the doubly linked chain of leaves.
 	 */
-	if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
-		save_info->back = drop_info->back; /* INT_: direct copy */
-		if (INT_GET(drop_info->back, ARCH_CONVERT)) {
+	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
+		save_info->back = drop_info->back;
+		if (drop_info->back) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(drop_info->back,
-							ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(drop_info->back),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
-			INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
+			ASSERT(tmp_info->magic == save_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
+			tmp_info->forw = cpu_to_be32(save_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
 			xfs_da_buf_done(bp);
 		}
 	} else {
-		save_info->forw = drop_info->forw; /* INT_: direct copy */
-		if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
+		save_info->forw = drop_info->forw;
+		if (drop_info->forw) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(drop_info->forw),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
-				    == INT_GET(save_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
-				    == drop_blk->blkno);
-			INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
+			ASSERT(tmp_info->magic == save_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
+			tmp_info->back = cpu_to_be32(save_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
 			xfs_da_buf_done(bp);
@@ -1497,7 +1490,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
 		ASSERT(blk->bp != NULL);
 		node = blk->bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 			blk->index++;
 			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
@@ -1536,11 +1529,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 			return(error);
 		ASSERT(blk->bp != NULL);
 		info = blk->bp->data;
-		ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
-		       INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
-		blk->magic = INT_GET(info->magic, ARCH_CONVERT);
-		if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
+		       be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+		blk->magic = be16_to_cpu(info->magic);
+		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
 			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 			if (forward)
@@ -1788,19 +1781,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Get values from the moved block.
 	 */
-	if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+	if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) {
 		ASSERT(XFS_DIR_IS_V1(mp));
 		dead_leaf = (xfs_dir_leafblock_t *)dead_info;
 		dead_level = 0;
 		dead_hash =
 			INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
-	} else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+	} else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
 	} else {
-		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
 		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
 		dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
@@ -1809,19 +1802,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * If the moved block has a left sibling, fix up the pointers.
 	 */
-	if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
+	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
 		sib_info = sib_buf->data;
 		if (unlikely(
-		    INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
-		    INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
+		    be32_to_cpu(sib_info->forw) != last_blkno ||
+		    sib_info->magic != dead_info->magic)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
+		sib_info->forw = cpu_to_be32(dead_blkno);
 		xfs_da_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
 					sizeof(sib_info->forw)));
@@ -1831,20 +1824,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * If the moved block has a right sibling, fix up the pointers.
 	 */
-	if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
+	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
 		sib_info = sib_buf->data;
 		if (unlikely(
-		       INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
-		    || INT_GET(sib_info->magic, ARCH_CONVERT)
-				!= INT_GET(dead_info->magic, ARCH_CONVERT))) {
+		       be32_to_cpu(sib_info->back) != last_blkno ||
+		       sib_info->magic != dead_info->magic)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
+		sib_info->back = cpu_to_be32(dead_blkno);
 		xfs_da_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
 					sizeof(sib_info->back)));
@@ -1861,7 +1853,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		par_node = par_buf->data;
 		if (unlikely(
-		    INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
 		    (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -1898,7 +1890,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			continue;
 		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
 			break;
-		par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
+		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
 		xfs_da_brelse(tp, par_buf);
 		par_buf = NULL;
 		if (unlikely(par_blkno == 0)) {
@@ -1912,7 +1904,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_node = par_buf->data;
 		if (unlikely(
 		    INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
-		    INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
@@ -2203,7 +2195,7 @@ xfs_da_do_buf(
 		info = rbp->data;
 		data = rbp->data;
 		free = rbp->data;
-		magic = INT_GET(info->magic, ARCH_CONVERT);
+		magic = be16_to_cpu(info->magic);
 		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 41352113721a..e727bf456a12 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -45,10 +45,10 @@ struct zone;
 	(XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
 
 typedef struct xfs_da_blkinfo {
-	xfs_dablk_t forw;			/* previous block in list */
-	xfs_dablk_t back;			/* following block in list */
-	__uint16_t magic;			/* validity check on block */
-	__uint16_t pad;				/* unused */
+	__be32		forw;			/* previous block in list */
+	__be32		back;			/* following block in list */
+	__be16		magic;			/* validity check on block */
+	__be16		pad;			/* unused */
 } xfs_da_blkinfo_t;
 
 /*
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index bb87d2a700a9..7a708e993db6 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -634,7 +634,7 @@ xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
 		return(retval);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
 	if (retval == EEXIST) {
 		(void)xfs_dir_leaf_remove(args->trans, bp, index);
@@ -912,7 +912,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			return(error);
 		if (bp)
 			leaf = bp->data;
-		if (bp && INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+		if (bp && be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
 			xfs_dir_trace_g_dub("node: block not a leaf",
 						   dp, uio, bno);
 			xfs_da_brelse(trans, bp);
@@ -949,7 +949,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			if (bp == NULL)
 				return(XFS_ERROR(EFSCORRUPTED));
 			node = bp->data;
-			if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+			if (be16_to_cpu(node->hdr.info.magic) != XFS_DA_NODE_MAGIC)
 				break;
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
@@ -982,7 +982,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 	 */
 	for (;;) {
 		leaf = bp->data;
-		if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)) {
+		if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC)) {
 			xfs_dir_trace_g_dul("node: not a leaf", dp, uio, leaf);
 			xfs_da_brelse(trans, bp);
 			XFS_CORRUPTION_ERROR("xfs_dir_node_getdents(1)",
@@ -990,7 +990,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			return XFS_ERROR(EFSCORRUPTED);
 		}
 		xfs_dir_trace_g_dul("node: leaf detail", dp, uio, leaf);
-		if ((nextbno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT))) {
+		if ((nextbno = be32_to_cpu(leaf->hdr.info.forw))) {
 			nextda = xfs_da_reada_buf(trans, dp, nextbno,
 						  XFS_DATA_FORK);
 		} else
@@ -1125,8 +1125,7 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)((unsigned long)(uio->uio_offset >> 32)),
 		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
 		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)
-			INT_GET(node->hdr.info.forw, ARCH_CONVERT),
+		     (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
 		     (void *)(unsigned long)
 			INT_GET(node->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
@@ -1150,8 +1149,7 @@ xfs_dir_trace_g_dul(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)((unsigned long)(uio->uio_offset >> 32)),
 		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
 		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)
-			INT_GET(leaf->hdr.info.forw, ARCH_CONVERT),
+		     (void *)(unsigned long)be32_to_cpu(leaf->hdr.info.forw),
 		     (void *)(unsigned long)
 			INT_GET(leaf->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 25d3a04f2e46..aaf5644d8fdc 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -905,7 +905,7 @@ xfs_dir2_leaf_to_block(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = lbp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	/*
 	 * If there are data blocks other than the first one, take this
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index b5036512d02f..08648b18265c 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -564,7 +564,7 @@ xfs_dir2_leaf_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	/*
 	 * This value is not restrictive enough.
 	 * Should factor in the size of the bests table as well.
@@ -1172,7 +1172,7 @@ xfs_dir2_leaf_init(
 	/*
 	 * Initialize the header.
 	 */
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic);
+	leaf->hdr.info.magic = cpu_to_be16(magic);
 	leaf->hdr.info.forw = 0;
 	leaf->hdr.info.back = 0;
 	leaf->hdr.count = 0;
@@ -1208,7 +1208,7 @@ xfs_dir2_leaf_log_bests(
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
 	firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first;
 	lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last;
@@ -1231,8 +1231,8 @@ xfs_dir2_leaf_log_ents(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
-	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	firstlep = &leaf->ents[first];
 	lastlep = &leaf->ents[last];
 	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1250,8 +1250,8 @@ xfs_dir2_leaf_log_header(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
-	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
 		(uint)(sizeof(leaf->hdr) - 1));
 }
@@ -1270,7 +1270,7 @@ xfs_dir2_leaf_log_tail(
 
 	mp = tp->t_mountp;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
@@ -1806,7 +1806,7 @@ xfs_dir2_node_to_leaf(
 		return 0;
 	lbp = state->path.blk[0].bp;
 	leaf = lbp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Read the freespace block.
 	 */
@@ -1837,7 +1837,7 @@ xfs_dir2_node_to_leaf(
 		xfs_dir2_leaf_compact(args, lbp);
 	else
 		xfs_dir2_leaf_log_header(tp, lbp);
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC);
+	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
 	/*
 	 * Set up the leaf tail from the freespace block.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56b7cc89645e..af556f16a0c7 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -164,7 +164,7 @@ xfs_dir2_leaf_to_node(
 		*to = cpu_to_be16(off);
 	}
 	free->hdr.nused = cpu_to_be32(n);
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Log everything.
 	 */
@@ -353,7 +353,7 @@ xfs_dir2_leafn_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
@@ -379,7 +379,7 @@ xfs_dir2_leafn_lasthash(
 	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
@@ -420,7 +420,7 @@ xfs_dir2_leafn_lookup_int(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -720,8 +720,8 @@ xfs_dir2_leafn_order(
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
 	    be16_to_cpu(leaf2->hdr.count) > 0 &&
 	    (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -868,7 +868,7 @@ xfs_dir2_leafn_remove(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Point to the entry we're removing.
 	 */
@@ -1139,7 +1139,7 @@ xfs_dir2_leafn_toosmall(
 	 */
 	blk = &state->path.blk[state->path.active - 1];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
 	leaf = (xfs_dir2_leaf_t *)info;
 	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1161,7 +1161,7 @@ xfs_dir2_leafn_toosmall(
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
@@ -1177,9 +1177,9 @@ xfs_dir2_leafn_toosmall(
 	 * We prefer coalescing with the lower numbered sibling so as
 	 * to shrink a directory over time.
 	 */
-	forward = INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT);
+	forward = be32_to_cpu(info->forw) < be32_to_cpu(info->back);
 	for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
-		blkno = forward ?INT_GET( info->forw, ARCH_CONVERT) : INT_GET(info->back, ARCH_CONVERT);
+		blkno = forward ? be32_to_cpu(info->forw) : be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		/*
@@ -1198,7 +1198,7 @@ xfs_dir2_leafn_toosmall(
 		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
 		/*
@@ -1257,8 +1257,8 @@ xfs_dir2_leafn_unbalance(
 	ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * If there are any stale leaf entries, take this opportunity
 	 * to purge them.
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index e83074016abb..0b2eca590b36 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -644,7 +644,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 	ASSERT(bp != NULL);
 	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
@@ -742,7 +742,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	}
 	node = bp1->data;
 	leaf = bp2->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
 	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
@@ -781,7 +781,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	leaf = bp->data;
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
-	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
+	hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused)
 		INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
@@ -860,7 +860,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
 	int tablesize, entsize, sum, i, tmp, error;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
 	hdr = &leaf->hdr;
 	entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
@@ -940,7 +940,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	int tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
 	ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
@@ -1097,8 +1097,8 @@ xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 
 	/*
 	 * Check ordering of blocks, reverse if it makes things simpler.
@@ -1325,7 +1325,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC);
 	leaf = (xfs_dir_leafblock_t *)info;
 	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
@@ -1348,7 +1348,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1369,12 +1369,12 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * We prefer coalescing with the lower numbered sibling so as
 	 * to shrink a directory over time.
 	 */
-	forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT));	/* start with smaller blk num */
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));	/* start with smaller blk num */
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -1389,7 +1389,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
 		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
 		bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
@@ -1447,7 +1447,7 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
 	xfs_mount_t *mp;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = trans->t_mountp;
 	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
@@ -1599,8 +1599,8 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	drop_hdr = &drop_leaf->hdr;
 	save_hdr = &save_leaf->hdr;
 
@@ -1695,7 +1695,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
 	xfs_dahash_t hashval;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
 
 	/*
@@ -1782,8 +1782,8 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Set up environment.
 	 */
-	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
@@ -1883,8 +1883,8 @@ xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) &&
-	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC));
+	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) &&
+	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC));
 	if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) &&
 	    ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
 	      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
@@ -1904,7 +1904,7 @@ xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_dir_leafblock_t *leaf;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	if (count)
 		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	if (!leaf->hdr.count)
@@ -1940,7 +1940,7 @@ xfs_dir_leaf_getdents_int(
 
 	mp = dp->i_mount;
 	leaf = bp->data;
-	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+	if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
 		*eobp = 1;
 		return XFS_ERROR(ENOENT);	/* XXX wrong code */
 	}
@@ -1992,7 +1992,7 @@ xfs_dir_leaf_getdents_int(
 
 	if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
 		xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
-		if (!INT_GET(leaf->hdr.info.forw, ARCH_CONVERT))
+		if (!leaf->hdr.info.forw)
 			uio->uio_offset =
 				XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
 		/*
@@ -2047,8 +2047,7 @@ xfs_dir_leaf_getdents_int(
 			xfs_dir_trace_g_duc("leaf: middle cookie  ",
 						   dp, uio, p.cook.o);
 
-		} else if ((thishash = INT_GET(leaf->hdr.info.forw,
-							ARCH_CONVERT))) {
+		} else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) {
 			xfs_dabuf_t *bp2;
 			xfs_dir_leafblock_t *leaf2;
 
@@ -2064,9 +2063,9 @@ xfs_dir_leaf_getdents_int(
 			leaf2 = bp2->data;
 
 			if (unlikely(
-			       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+			       (be16_to_cpu(leaf2->hdr.info.magic)
 						!= XFS_DIR_LEAF_MAGIC)
-			    || (INT_GET(leaf2->hdr.info.back, ARCH_CONVERT)
+			    || (be32_to_cpu(leaf2->hdr.info.back)
 						!= bno))) {	/* GROT */
 				XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
 						     XFS_ERRLEVEL_LOW, mp,
-- 
cgit v1.2.3


From 8f44e047a044df613bbc29837b9556e0c2e42e6b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:47 +1100
Subject: [XFS] remove bogus INT_GET on u8 variables in xfs_dir2_block.c

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25496a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index aaf5644d8fdc..bd5cee6aa51a 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1101,7 +1101,7 @@ xfs_dir2_sf_to_block(
 	 * Compute size of block "tail" area.
 	 */
 	i = (uint)sizeof(*btp) +
-	    (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+	    (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
 	/*
 	 * The whole thing is initialized to free by the init routine.
 	 * Say we're using the leaf and tail area.
@@ -1115,7 +1115,7 @@ xfs_dir2_sf_to_block(
 	 * Fill in the tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	btp->count = cpu_to_be32(INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
+	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
@@ -1157,7 +1157,7 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Loop over existing entries, stuff them in.
 	 */
-	if ((i = 0) == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+	if ((i = 0) == sfp->hdr.count)
 		sfep = NULL;
 	else
 		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
@@ -1205,7 +1205,7 @@ xfs_dir2_sf_to_block(
 		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
-		if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+		if (++i == sfp->hdr.count)
 			sfep = NULL;
 		else
 			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
-- 
cgit v1.2.3


From 918ae424e18666249cf32f16ba2803061bf1ebb7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:54 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25497a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |   7 +-
 fs/xfs/xfs_attr_leaf.c | 414 ++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_leaf.h |  14 +-
 3 files changed, 198 insertions(+), 237 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36b120d859af..4a3f3cf6b20c 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1772,8 +1772,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				leaf = bp->data;
 				if (cursor->hashval >
 				    INT_GET(leaf->entries[
-					 INT_GET(leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
+					be16_to_cpu(leaf->hdr.count)-1].hashval,
 							ARCH_CONVERT)) {
 					xfs_attr_trace_l_cl("wrong blk",
 							   context, leaf);
@@ -2289,9 +2288,9 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(leaf->hdr.count, ARCH_CONVERT),
+		(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
 		(__psunsigned_t)INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval, ARCH_CONVERT));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d279d944e03d..31b55e58e2cf 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -724,7 +724,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 
 	entry = &leaf->entries[0];
 	bytes = sizeof(struct xfs_attr_sf_hdr);
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
 			continue;		/* don't copy partial entries */
 		if (!(entry->flags & XFS_ATTR_LOCAL))
@@ -808,7 +808,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	nargs.trans = args->trans;
 	nargs.oknoent = 1;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
 			continue;	/* don't copy partial entries */
 		if (!entry->nameidx)
@@ -876,7 +876,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
-		leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval;
+		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
 	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
@@ -918,18 +918,15 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
-	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+	hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused) {
-		INT_SET(hdr->firstused, ARCH_CONVERT,
+		hdr->firstused = cpu_to_be16(
 			XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
 	}
 
-	INT_SET(hdr->freemap[0].base, ARCH_CONVERT,
-						sizeof(xfs_attr_leaf_hdr_t));
-	INT_SET(hdr->freemap[0].size, ARCH_CONVERT,
-					  INT_GET(hdr->firstused, ARCH_CONVERT)
-					- INT_GET(hdr->freemap[0].base,
-								ARCH_CONVERT));
+	hdr->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
+					   sizeof(xfs_attr_leaf_hdr_t));
 
 	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
@@ -1003,7 +1000,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT((args->index >= 0)
-		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+		&& (args->index <= be16_to_cpu(leaf->hdr.count)));
 	hdr = &leaf->hdr;
 	entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 			   args->trans->t_mountp->m_sb.sb_blocksize, NULL);
@@ -1012,26 +1009,25 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Search through freemap for first-fit on new name length.
 	 * (may need to figure in size of entry struct too)
 	 */
-	tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1)
+	tablesize = (be16_to_cpu(hdr->count) + 1)
 					* sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1];
 	for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
-		if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
-			sum += INT_GET(map->size, ARCH_CONVERT);
+		if (tablesize > be16_to_cpu(hdr->firstused)) {
+			sum += be16_to_cpu(map->size);
 			continue;
 		}
 		if (!map->size)
 			continue;	/* no space in this map */
 		tmp = entsize;
-		if (INT_GET(map->base, ARCH_CONVERT)
-				< INT_GET(hdr->firstused, ARCH_CONVERT))
+		if (be16_to_cpu(map->base) < be16_to_cpu(hdr->firstused))
 			tmp += sizeof(xfs_attr_leaf_entry_t);
-		if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+		if (be16_to_cpu(map->size) >= tmp) {
 			tmp = xfs_attr_leaf_add_work(bp, args, i);
 			return(tmp);
 		}
-		sum += INT_GET(map->size, ARCH_CONVERT);
+		sum += be16_to_cpu(map->size);
 	}
 
 	/*
@@ -1052,7 +1048,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * After compaction, the block is guaranteed to have only one
 	 * free region, in freemap[0].  If it is not big enough, give up.
 	 */
-	if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT)
+	if (be16_to_cpu(hdr->freemap[0].size)
 				< (entsize + sizeof(xfs_attr_leaf_entry_t)))
 		return(XFS_ERROR(ENOSPC));
 
@@ -1078,40 +1074,39 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
-	ASSERT((args->index >= 0)
-		&& (args->index <= INT_GET(hdr->count, ARCH_CONVERT)));
+	ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
 
 	/*
 	 * Force open some space in the entry array and fill it in.
 	 */
 	entry = &leaf->entries[args->index];
-	if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
+	if (args->index < be16_to_cpu(hdr->count)) {
+		tmp  = be16_to_cpu(hdr->count) - args->index;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		memmove((char *)(entry+1), (char *)entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
-	INT_MOD(hdr->count, ARCH_CONVERT, 1);
+	be16_add(&hdr->count, 1);
 
 	/*
 	 * Allocate space for the new string (at the end of the run).
 	 */
 	map = &hdr->freemap[mapindex];
 	mp = args->trans->t_mountp;
-	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0);
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) >=
+	ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
+	ASSERT((be16_to_cpu(map->base) & 0x3) == 0);
+	ASSERT(be16_to_cpu(map->size) >=
 		xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					 mp->m_sb.sb_blocksize, NULL));
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0);
-	INT_MOD(map->size, ARCH_CONVERT,
+	ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
+	ASSERT((be16_to_cpu(map->size) & 0x3) == 0);
+	be16_add(&map->size,
 		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					  mp->m_sb.sb_blocksize, &tmp));
 	INT_SET(entry->nameidx, ARCH_CONVERT,
-					INT_GET(map->base, ARCH_CONVERT)
-				      + INT_GET(map->size, ARCH_CONVERT));
+					be16_to_cpu(map->base)
+				      + be16_to_cpu(map->size));
 	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
 	entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
@@ -1128,7 +1123,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
 						>= INT_GET((entry-1)->hashval,
 							    ARCH_CONVERT)));
-	ASSERT((args->index == INT_GET(hdr->count, ARCH_CONVERT)-1) ||
+	ASSERT((args->index == be16_to_cpu(hdr->count)-1) ||
 	       (INT_GET(entry->hashval, ARCH_CONVERT)
 			    <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
 
@@ -1167,27 +1162,23 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	 * Update the control info for this leaf node
 	 */
 	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				< INT_GET(hdr->firstused, ARCH_CONVERT)) {
+				< be16_to_cpu(hdr->firstused)) {
 		/* both on-disk, don't endian-flip twice */
 		hdr->firstused = entry->nameidx;
 	}
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
-				>= ((INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(*entry))+sizeof(*hdr)));
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1)
-					* sizeof(xfs_attr_leaf_entry_t)
+	ASSERT(be16_to_cpu(hdr->firstused) >=
+	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
+	tmp = (be16_to_cpu(hdr->count)-1) * sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[0];
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
-		if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
-			INT_MOD(map->base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_entry_t));
-			INT_MOD(map->size, ARCH_CONVERT,
-					-sizeof(xfs_attr_leaf_entry_t));
+		if (be16_to_cpu(map->base) == tmp) {
+			be16_add(&map->base, sizeof(xfs_attr_leaf_entry_t));
+			be16_add(&map->size,
+				 -((int)sizeof(xfs_attr_leaf_entry_t)));
 		}
 	}
-	INT_MOD(hdr->usedbytes, ARCH_CONVERT,
-				xfs_attr_leaf_entsize(leaf, args->index));
+	be16_add(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
 	xfs_da_log_buf(args->trans, bp,
 		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
 	return(0);
@@ -1218,28 +1209,25 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	hdr_d->info = hdr_s->info;	/* struct copy */
-	INT_SET(hdr_d->firstused, ARCH_CONVERT, XFS_LBSIZE(mp));
+	hdr_d->firstused = cpu_to_be16(XFS_LBSIZE(mp));
 	/* handle truncation gracefully */
 	if (!hdr_d->firstused) {
-		INT_SET(hdr_d->firstused, ARCH_CONVERT,
+		hdr_d->firstused = cpu_to_be16(
 				XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
 	}
 	hdr_d->usedbytes = 0;
 	hdr_d->count = 0;
 	hdr_d->holes = 0;
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_hdr_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
-				INT_GET(hdr_d->firstused, ARCH_CONVERT)
-			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
+					     sizeof(xfs_attr_leaf_hdr_t));
 
 	/*
 	 * Copy all entry's in the same (sorted) order,
 	 * but allocate name/value pairs packed and in sequence.
 	 */
 	xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
-				(int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
-
+				be16_to_cpu(hdr_s->count), mp);
 	xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
 
 	kmem_free(tmpbuffer, XFS_LBSIZE(mp));
@@ -1312,22 +1300,21 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	/*
 	 * Move any entries required from leaf to leaf:
 	 */
-	if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+	if (count < be16_to_cpu(hdr1->count)) {
 		/*
 		 * Figure the total bytes to be added to the destination leaf.
 		 */
 		/* number entries being moved */
-		count = INT_GET(hdr1->count, ARCH_CONVERT) - count;
-		space  = INT_GET(hdr1->usedbytes, ARCH_CONVERT) - totallen;
+		count = be16_to_cpu(hdr1->count) - count;
+		space  = be16_to_cpu(hdr1->usedbytes) - totallen;
 		space += count * sizeof(xfs_attr_leaf_entry_t);
 
 		/*
 		 * leaf2 is the destination, compact it if it looks tight.
 		 */
-		max  = INT_GET(hdr2->firstused, ARCH_CONVERT)
+		max  = be16_to_cpu(hdr2->firstused)
 						- sizeof(xfs_attr_leaf_hdr_t);
-		max -= INT_GET(hdr2->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
+		max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
 		if (space > max) {
 			xfs_attr_leaf_compact(args->trans, blk2->bp);
 		}
@@ -1335,13 +1322,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		/*
 		 * Move high entries from leaf1 to low end of leaf2.
 		 */
-		xfs_attr_leaf_moveents(leaf1,
-				INT_GET(hdr1->count, ARCH_CONVERT)-count,
+		xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
 				leaf2, 0, count, state->mp);
 
 		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
 		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
-	} else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+	} else if (count > be16_to_cpu(hdr1->count)) {
 		/*
 		 * I assert that since all callers pass in an empty
 		 * second buffer, this code should never execute.
@@ -1351,17 +1337,16 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Figure the total bytes to be added to the destination leaf.
 		 */
 		/* number entries being moved */
-		count -= INT_GET(hdr1->count, ARCH_CONVERT);
-		space  = totallen - INT_GET(hdr1->usedbytes, ARCH_CONVERT);
+		count -= be16_to_cpu(hdr1->count);
+		space  = totallen - be16_to_cpu(hdr1->usedbytes);
 		space += count * sizeof(xfs_attr_leaf_entry_t);
 
 		/*
 		 * leaf1 is the destination, compact it if it looks tight.
 		 */
-		max  = INT_GET(hdr1->firstused, ARCH_CONVERT)
+		max  = be16_to_cpu(hdr1->firstused)
 						- sizeof(xfs_attr_leaf_hdr_t);
-		max -= INT_GET(hdr1->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
+		max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
 		if (space > max) {
 			xfs_attr_leaf_compact(args->trans, blk1->bp);
 		}
@@ -1370,8 +1355,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Move low entries from leaf2 to high end of leaf1.
 		 */
 		xfs_attr_leaf_moveents(leaf2, 0, leaf1,
-				(int)INT_GET(hdr1->count, ARCH_CONVERT), count,
-				state->mp);
+				be16_to_cpu(hdr1->count), count, state->mp);
 
 		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
 		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
@@ -1381,11 +1365,11 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Copy out last hashval in each block for B-tree code.
 	 */
 	blk1->hashval =
-	    INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
-				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+	    INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
+			    ARCH_CONVERT);
 	blk2->hashval =
-	    INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
-				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+	    INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
+			    ARCH_CONVERT);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -1399,13 +1383,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * inserting.  The index/blkno fields refer to the "old" entry,
 	 * while the index2/blkno2 fields refer to the "new" entry.
 	 */
-	if (blk1->index > INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+	if (blk1->index > be16_to_cpu(leaf1->hdr.count)) {
 		ASSERT(state->inleaf == 0);
-		blk2->index = blk1->index
-				- INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+		blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
 		args->index = args->index2 = blk2->index;
 		args->blkno = args->blkno2 = blk2->blkno;
-	} else if (blk1->index == INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+	} else if (blk1->index == be16_to_cpu(leaf1->hdr.count)) {
 		if (state->inleaf) {
 			args->index = blk1->index;
 			args->blkno = blk1->blkno;
@@ -1413,7 +1396,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 			args->blkno2 = blk2->blkno;
 		} else {
 			blk2->index = blk1->index
-				    - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+				    - be16_to_cpu(leaf1->hdr.count);
 			args->index = args->index2 = blk2->index;
 			args->blkno = args->blkno2 = blk2->blkno;
 		}
@@ -1457,15 +1440,14 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 	 * Examine entries until we reduce the absolute difference in
 	 * byte usage between the two blocks to a minimum.
 	 */
-	max = INT_GET(hdr1->count, ARCH_CONVERT)
-			+ INT_GET(hdr2->count, ARCH_CONVERT);
+	max = be16_to_cpu(hdr1->count) + be16_to_cpu(hdr2->count);
 	half  = (max+1) * sizeof(*entry);
-	half += INT_GET(hdr1->usedbytes, ARCH_CONVERT)
-				+ INT_GET(hdr2->usedbytes, ARCH_CONVERT)
-				+ xfs_attr_leaf_newentsize(
-						state->args->namelen,
-						state->args->valuelen,
-						state->blocksize, NULL);
+	half += be16_to_cpu(hdr1->usedbytes) +
+		be16_to_cpu(hdr2->usedbytes) +
+		xfs_attr_leaf_newentsize(
+				state->args->namelen,
+				state->args->valuelen,
+				state->blocksize, NULL);
 	half /= 2;
 	lastdelta = state->blocksize;
 	entry = &leaf1->entries[0];
@@ -1491,7 +1473,7 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 		/*
 		 * Wrap around into the second block if necessary.
 		 */
-		if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+		if (count == be16_to_cpu(hdr1->count)) {
 			leaf1 = leaf2;
 			entry = &leaf1->entries[0];
 			index = 0;
@@ -1561,10 +1543,10 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	info = blk->bp->data;
 	ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 	leaf = (xfs_attr_leafblock_t *)info;
-	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(leaf->hdr.count);
 	bytes = sizeof(xfs_attr_leaf_hdr_t) +
 		count * sizeof(xfs_attr_leaf_entry_t) +
-		INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		be16_to_cpu(leaf->hdr.usedbytes);
 	if (bytes > (state->blocksize >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);
@@ -1618,13 +1600,13 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		ASSERT(bp != NULL);
 
 		leaf = (xfs_attr_leafblock_t *)info;
-		count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		count  = be16_to_cpu(leaf->hdr.count);
 		bytes  = state->blocksize - (state->blocksize>>2);
-		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		leaf = bp->data;
 		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		count += be16_to_cpu(leaf->hdr.count);
+		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
 		bytes -= sizeof(xfs_attr_leaf_hdr_t);
 		xfs_da_brelse(state->args->trans, bp);
@@ -1679,16 +1661,15 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
-	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0)
-		&& (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+	ASSERT((be16_to_cpu(hdr->count) > 0)
+		&& (be16_to_cpu(hdr->count) < (XFS_LBSIZE(mp)/8)));
 	ASSERT((args->index >= 0)
-		&& (args->index < INT_GET(hdr->count, ARCH_CONVERT)));
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
-				>= ((INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(*entry))+sizeof(*hdr)));
+		&& (args->index < be16_to_cpu(hdr->count)));
+	ASSERT(be16_to_cpu(hdr->firstused) >=
+	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
 	entry = &leaf->entries[args->index];
 	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr->firstused));
 	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
 
 	/*
@@ -1697,33 +1678,30 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 *    find smallest free region in case we need to replace it,
 	 *    adjust any map that borders the entry table,
 	 */
-	tablesize = INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t)
+	tablesize = be16_to_cpu(hdr->count) * sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[0];
-	tmp = INT_GET(map->size, ARCH_CONVERT);
+	tmp = be16_to_cpu(map->size);
 	before = after = -1;
 	smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
 	entsize = xfs_attr_leaf_entsize(leaf, args->index);
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
-		ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
-			INT_MOD(map->base, ARCH_CONVERT,
-					-sizeof(xfs_attr_leaf_entry_t));
-			INT_MOD(map->size, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_entry_t));
+		ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
+		ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
+		if (be16_to_cpu(map->base) == tablesize) {
+			be16_add(&map->base,
+				 -((int)sizeof(xfs_attr_leaf_entry_t)));
+			be16_add(&map->size, sizeof(xfs_attr_leaf_entry_t));
 		}
 
-		if ((INT_GET(map->base, ARCH_CONVERT)
-					+ INT_GET(map->size, ARCH_CONVERT))
+		if ((be16_to_cpu(map->base) + be16_to_cpu(map->size))
 				== INT_GET(entry->nameidx, ARCH_CONVERT)) {
 			before = i;
-		} else if (INT_GET(map->base, ARCH_CONVERT)
+		} else if (be16_to_cpu(map->base)
 			== (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
 			after = i;
-		} else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
-			tmp = INT_GET(map->size, ARCH_CONVERT);
+		} else if (be16_to_cpu(map->size) < tmp) {
+			tmp = be16_to_cpu(map->size);
 			smallest = i;
 		}
 	}
@@ -1735,30 +1713,29 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if ((before >= 0) || (after >= 0)) {
 		if ((before >= 0) && (after >= 0)) {
 			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
-			INT_MOD(map->size, ARCH_CONVERT,
-				INT_GET(hdr->freemap[after].size,
-							ARCH_CONVERT));
+			be16_add(&map->size, entsize);
+			be16_add(&map->size,
+				 be16_to_cpu(hdr->freemap[after].size));
 			hdr->freemap[after].base = 0;
 			hdr->freemap[after].size = 0;
 		} else if (before >= 0) {
 			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
+			be16_add(&map->size, entsize);
 		} else {
 			map = &hdr->freemap[after];
 			/* both on-disk, don't endian flip twice */
 			map->base = entry->nameidx;
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
+			be16_add(&map->size, entsize);
 		}
 	} else {
 		/*
 		 * Replace smallest region (if it is smaller than free'd entry)
 		 */
 		map = &hdr->freemap[smallest];
-		if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
-			INT_SET(map->base, ARCH_CONVERT,
+		if (be16_to_cpu(map->size) < entsize) {
+			map->base = cpu_to_be16(
 					INT_GET(entry->nameidx, ARCH_CONVERT));
-			INT_SET(map->size, ARCH_CONVERT, entsize);
+			map->size = cpu_to_be16(entsize);
 		}
 	}
 
@@ -1766,7 +1743,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Did we remove the first entry?
 	 */
 	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				== INT_GET(hdr->firstused, ARCH_CONVERT))
+				== be16_to_cpu(hdr->firstused))
 		smallest = 1;
 	else
 		smallest = 0;
@@ -1775,18 +1752,18 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
 	memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize);
-	INT_MOD(hdr->usedbytes, ARCH_CONVERT, -entsize);
+	be16_add(&hdr->usedbytes, -entsize);
 	xfs_da_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
 				   entsize));
 
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - args->index)
+	tmp = (be16_to_cpu(hdr->count) - args->index)
 					* sizeof(xfs_attr_leaf_entry_t);
 	memmove((char *)entry, (char *)(entry+1), tmp);
-	INT_MOD(hdr->count, ARCH_CONVERT, -1);
+	be16_add(&hdr->count, -1);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
-	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
+	entry = &leaf->entries[be16_to_cpu(hdr->count)];
 	memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
 
 	/*
@@ -1798,18 +1775,17 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if (smallest) {
 		tmp = XFS_LBSIZE(mp);
 		entry = &leaf->entries[0];
-		for (i = INT_GET(hdr->count, ARCH_CONVERT)-1;
-						i >= 0; entry++, i--) {
+		for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
 			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr->firstused));
 			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
 							< XFS_LBSIZE(mp));
 			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
 				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
 		}
-		INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
+		hdr->firstused = cpu_to_be16(tmp);
 		if (!hdr->firstused) {
-			INT_SET(hdr->firstused, ARCH_CONVERT,
+			hdr->firstused = cpu_to_be16(
 					tmp - XFS_ATTR_LEAF_NAME_ALIGN);
 		}
 	} else {
@@ -1823,9 +1799,8 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * "join" the leaf with a sibling if so.
 	 */
 	tmp  = sizeof(xfs_attr_leaf_hdr_t);
-	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
-	tmp += INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+	tmp += be16_to_cpu(leaf->hdr.count) * sizeof(xfs_attr_leaf_entry_t);
+	tmp += be16_to_cpu(leaf->hdr.usedbytes);
 	return(tmp < mp->m_attr_magicpct); /* leaf is < 37% full */
 }
 
@@ -1858,9 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Save last hashval from dying block for later Btree fixup.
 	 */
 	drop_blk->hashval =
-		INT_GET(drop_leaf->entries[INT_GET(drop_leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
-								ARCH_CONVERT);
+		INT_GET(drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1]
+				.hashval, ARCH_CONVERT);
 
 	/*
 	 * Check if we need a temp buffer, or can we do it in place.
@@ -1874,12 +1848,11 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
 			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0,
-			     (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+			     be16_to_cpu(drop_hdr->count), mp);
 		} else {
 			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf,
-				  INT_GET(save_hdr->count, ARCH_CONVERT),
-				  (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				  mp);
+				  be16_to_cpu(save_hdr->count),
+				  be16_to_cpu(drop_hdr->count), mp);
 		}
 	} else {
 		/*
@@ -1893,28 +1866,24 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info;	/* struct copy */
 		tmp_hdr->count = 0;
-		INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+		tmp_hdr->firstused = cpu_to_be16(state->blocksize);
 		if (!tmp_hdr->firstused) {
-			INT_SET(tmp_hdr->firstused, ARCH_CONVERT,
+			tmp_hdr->firstused = cpu_to_be16(
 				state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN);
 		}
 		tmp_hdr->usedbytes = 0;
 		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
 			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
-				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(drop_hdr->count), mp);
 			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf,
-				  INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-				 (int)INT_GET(save_hdr->count, ARCH_CONVERT),
-				 mp);
+				  be16_to_cpu(tmp_leaf->hdr.count),
+				  be16_to_cpu(save_hdr->count), mp);
 		} else {
 			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
-				(int)INT_GET(save_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(save_hdr->count), mp);
 			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf,
-				INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(tmp_leaf->hdr.count),
+				be16_to_cpu(drop_hdr->count), mp);
 		}
 		memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
@@ -1927,9 +1896,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Copy out last hashval in each block for B-tree code.
 	 */
 	save_blk->hashval =
-		INT_GET(save_leaf->entries[INT_GET(save_leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
-								ARCH_CONVERT);
+		INT_GET(save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1]
+						.hashval, ARCH_CONVERT);
 }
 
 /*========================================================================
@@ -1961,14 +1929,14 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 
 	/*
 	 * Binary search.  (note: small blocks will skip this loop)
 	 */
 	hashval = args->hashval;
-	probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
+	probe = span = be16_to_cpu(leaf->hdr.count) / 2;
 	for (entry = &leaf->entries[probe]; span > 4;
 		   entry = &leaf->entries[probe]) {
 		span /= 2;
@@ -1981,7 +1949,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	}
 	ASSERT((probe >= 0) && 
 	       (!leaf->hdr.count
-	       || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
+	       || (probe < be16_to_cpu(leaf->hdr.count))));
 	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT)
 							== hashval));
 
@@ -1994,12 +1962,12 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		entry--;
 		probe--;
 	}
-	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	while ((probe < be16_to_cpu(leaf->hdr.count))
 		&& (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
 		entry++;
 		probe++;
 	}
-	if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	if ((probe == be16_to_cpu(leaf->hdr.count))
 		    || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
 		args->index = probe;
 		return(XFS_ERROR(ENOATTR));
@@ -2008,7 +1976,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Duplicate keys may be present, so search all of them for a match.
 	 */
-	for (  ; (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (  ; (probe < be16_to_cpu(leaf->hdr.count))
 			&& (INT_GET(entry->hashval, ARCH_CONVERT) == hashval);
 			entry++, probe++) {
 /*
@@ -2078,9 +2046,9 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
-	ASSERT(args->index < ((int)INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 
 	entry = &leaf->entries[args->index];
 	if (entry->flags & XFS_ATTR_LOCAL) {
@@ -2149,26 +2117,25 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
-	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
-				&& (INT_GET(hdr_s->count, ARCH_CONVERT)
-						< (XFS_LBSIZE(mp)/8)));
-	ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_s->count, ARCH_CONVERT)
+	ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
+	       (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8)));
+	ASSERT(be16_to_cpu(hdr_s->firstused) >=
+		((be16_to_cpu(hdr_s->count)
 					* sizeof(*entry_s))+sizeof(*hdr_s)));
-	ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
-	ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_d->count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8));
+	ASSERT(be16_to_cpu(hdr_d->firstused) >=
+		((be16_to_cpu(hdr_d->count)
 					* sizeof(*entry_d))+sizeof(*hdr_d)));
 
-	ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
-	ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
-	ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+	ASSERT(start_s < be16_to_cpu(hdr_s->count));
+	ASSERT(start_d <= be16_to_cpu(hdr_d->count));
+	ASSERT(count <= be16_to_cpu(hdr_s->count));
 
 	/*
 	 * Move the entries in the destination leaf up to make a hole?
 	 */
-	if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+	if (start_d < be16_to_cpu(hdr_d->count)) {
+		tmp  = be16_to_cpu(hdr_d->count) - start_d;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
@@ -2184,7 +2151,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	desti = start_d;
 	for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
 		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr_s->firstused));
 		tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
 #ifdef GROT
 		/*
@@ -2194,15 +2161,15 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 */
 		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
-			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+			be16_add(&hdr_s->usedbytes, -tmp);
+			be16_add(&hdr_s->count, -1);
 			entry_d--;	/* to compensate for ++ in loop hdr */
 			desti--;
 			if ((start_s + i) < offset)
 				result++;	/* insertion index adjustment */
 		} else {
 #endif /* GROT */
-			INT_MOD(hdr_d->firstused, ARCH_CONVERT, -tmp);
+			be16_add(&hdr_d->firstused, -tmp);
 			/* both on-disk, don't endian flip twice */
 			entry_d->hashval = entry_s->hashval;
 			/* both on-disk, don't endian flip twice */
@@ -2215,14 +2182,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
-			INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
-			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
-			INT_MOD(hdr_d->count, ARCH_CONVERT, 1);
-			tmp = INT_GET(hdr_d->count, ARCH_CONVERT)
+			be16_add(&hdr_s->usedbytes, -tmp);
+			be16_add(&hdr_d->usedbytes, tmp);
+			be16_add(&hdr_s->count, -1);
+			be16_add(&hdr_d->count, 1);
+			tmp = be16_to_cpu(hdr_d->count)
 						* sizeof(xfs_attr_leaf_entry_t)
 						+ sizeof(xfs_attr_leaf_hdr_t);
-			ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+			ASSERT(be16_to_cpu(hdr_d->firstused) >= tmp);
 #ifdef GROT
 		}
 #endif /* GROT */
@@ -2231,7 +2198,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Zero out the entries we just copied.
 	 */
-	if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+	if (start_s == be16_to_cpu(hdr_s->count)) {
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT(((char *)entry_s + tmp) <=
@@ -2242,15 +2209,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 * Move the remaining entries down to fill the hole,
 		 * then zero the entries at the top.
 		 */
-		tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+		tmp  = be16_to_cpu(hdr_s->count) - count;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
 		memmove((char *)entry_d, (char *)entry_s, tmp);
 
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
-		entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
-							ARCH_CONVERT)];
+		entry_s = &leaf_s->entries[be16_to_cpu(hdr_s->count)];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
 		memset((char *)entry_s, 0, tmp);
@@ -2259,14 +2225,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Fill in the freemap information
 	 */
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_hdr_t));
-	INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT,
-				INT_GET(hdr_d->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
-				INT_GET(hdr_d->firstused, ARCH_CONVERT)
-			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	be16_add(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) *
+			sizeof(xfs_attr_leaf_entry_t));
+	hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused)
+			      - be16_to_cpu(hdr_d->freemap[0].base));
 	hdr_d->freemap[1].base = 0;
 	hdr_d->freemap[2].base = 0;
 	hdr_d->freemap[1].size = 0;
@@ -2287,14 +2250,14 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 	leaf2 = leaf2_bp->data;
 	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
 	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
-	if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
-	    && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
-	    && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
-		      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT))
-		|| (INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT) <
-		      INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT))) ) {
+	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
+	    (be16_to_cpu(leaf2->hdr.count) > 0) &&
+	    ((INT_GET(leaf2->entries[0].hashval, ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[0].hashval, ARCH_CONVERT)) ||
+	     (INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
+		      				ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
+		      				ARCH_CONVERT)))) {
 		return(1);
 	}
 	return(0);
@@ -2311,11 +2274,11 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (count)
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return(0);
-	return(INT_GET(leaf->entries[INT_GET(leaf->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+	return (INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval,
+				ARCH_CONVERT));
 }
 
 /*
@@ -2392,8 +2355,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	 */
 	if (context->resynch) {
 		entry = &leaf->entries[0];
-		for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
-							entry++, i++) {
+		for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 			if (INT_GET(entry->hashval, ARCH_CONVERT)
 							== cursor->hashval) {
 				if (cursor->offset == context->dupcnt) {
@@ -2407,7 +2369,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				break;
 			}
 		}
-		if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+		if (i == be16_to_cpu(leaf->hdr.count)) {
 			xfs_attr_trace_l_c("not found", context);
 			return(0);
 		}
@@ -2421,7 +2383,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	 * We have found our place, start copying out the new attributes.
 	 */
 	retval = 0;
-	for (  ; (i < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (  ; (i < be16_to_cpu(leaf->hdr.count))
 	     && (retval == 0); entry++, i++) {
 		attrnames_t	*namesp;
 
@@ -2577,7 +2539,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
 	ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
@@ -2643,7 +2605,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
 
@@ -2715,13 +2677,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 
 	leaf1 = bp1->data;
 	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf1->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
 	leaf2 = bp2->data;
 	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index2 < INT_GET(leaf2->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
 	ASSERT(args->index2 >= 0);
 	entry2 = &leaf2->entries[ args->index2 ];
 
@@ -2971,7 +2933,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	 */
 	count = 0;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
 		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
@@ -2999,7 +2961,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	 */
 	lp = list;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
 		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 541e34109bb9..52b771052c57 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -73,17 +73,17 @@ struct xfs_trans;
 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
 
 typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
-	__uint16_t	base;	 	/* base of free region */
-	__uint16_t	size;	  	/* length of free region */
+	__be16	base;			  /* base of free region */
+	__be16	size;			  /* length of free region */
 } xfs_attr_leaf_map_t;
 
 typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
 	xfs_da_blkinfo_t info;		/* block type, links, etc. */
-	__uint16_t	count;		/* count of active leaf_entry's */
-	__uint16_t	usedbytes;	/* num bytes of names/values stored */
-	__uint16_t	firstused;	/* first used byte in name area */
-	__uint8_t	holes;		/* != 0 if blk needs compaction */
-	__uint8_t	pad1;
+	__be16	count;			/* count of active leaf_entry's */
+	__be16	usedbytes;		/* num bytes of names/values stored */
+	__be16	firstused;		/* first used byte in name area */
+	__u8	holes;			/* != 0 if blk needs compaction */
+	__u8	pad1;
 	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
 					/* N largest free regions */
 } xfs_attr_leaf_hdr_t;
-- 
cgit v1.2.3


From 6b19f2d87da9908acf1e0f48b4e79cf8bc833811 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:02 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25498a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |  14 +++---
 fs/xfs/xfs_attr_leaf.c | 133 ++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_leaf.h |  19 ++++---
 3 files changed, 74 insertions(+), 92 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4a3f3cf6b20c..98d0f4d10602 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1770,17 +1770,14 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				break;
 			case XFS_ATTR_LEAF_MAGIC:
 				leaf = bp->data;
-				if (cursor->hashval >
-				    INT_GET(leaf->entries[
-					be16_to_cpu(leaf->hdr.count)-1].hashval,
-							ARCH_CONVERT)) {
+				if (cursor->hashval > be32_to_cpu(leaf->entries[
+				    be16_to_cpu(leaf->hdr.count)-1].hashval)) {
 					xfs_attr_trace_l_cl("wrong blk",
 							   context, leaf);
 					xfs_da_brelse(NULL, bp);
 					bp = NULL;
 				} else if (cursor->hashval <=
-					     INT_GET(leaf->entries[0].hashval,
-							ARCH_CONVERT)) {
+					     be32_to_cpu(leaf->entries[0].hashval)) {
 					xfs_attr_trace_l_cl("maybe wrong blk",
 							   context, leaf);
 					xfs_da_brelse(NULL, bp);
@@ -2289,8 +2286,9 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
 		(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
-		(__psunsigned_t)INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
+		(__psunsigned_t)be32_to_cpu(leaf->entries[
+				be16_to_cpu(leaf->hdr.count)-1].hashval));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 31b55e58e2cf..319285c24e42 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -819,7 +819,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 		nargs.namelen = name_loc->namelen;
 		nargs.value = (char *)&name_loc->nameval[nargs.namelen];
 		nargs.valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
-		nargs.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+		nargs.hashval = be32_to_cpu(entry->hashval);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
 		xfs_attr_shortform_add(&nargs, forkoff);
@@ -1104,10 +1104,9 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	be16_add(&map->size,
 		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					  mp->m_sb.sb_blocksize, &tmp));
-	INT_SET(entry->nameidx, ARCH_CONVERT,
-					be16_to_cpu(map->base)
-				      + be16_to_cpu(map->size));
-	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+	entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) +
+				     be16_to_cpu(map->size));
+	entry->hashval = cpu_to_be32(args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
 	entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
 			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
@@ -1120,12 +1119,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	}
 	xfs_da_log_buf(args->trans, bp,
 			  XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-	ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
-						>= INT_GET((entry-1)->hashval,
-							    ARCH_CONVERT)));
+	ASSERT((args->index == 0) ||
+	       (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
 	ASSERT((args->index == be16_to_cpu(hdr->count)-1) ||
-	       (INT_GET(entry->hashval, ARCH_CONVERT)
-			    <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
+	       (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
 
 	/*
 	 * Copy the attribute name and value into the new space.
@@ -1161,8 +1158,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	/*
 	 * Update the control info for this leaf node
 	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				< be16_to_cpu(hdr->firstused)) {
+	if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused)) {
 		/* both on-disk, don't endian-flip twice */
 		hdr->firstused = entry->nameidx;
 	}
@@ -1364,12 +1360,10 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	/*
 	 * Copy out last hashval in each block for B-tree code.
 	 */
-	blk1->hashval =
-	    INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
-			    ARCH_CONVERT);
-	blk2->hashval =
-	    INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
-			    ARCH_CONVERT);
+	blk1->hashval = be32_to_cpu(
+		leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval);
+	blk2->hashval = be32_to_cpu(
+		leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -1668,9 +1662,8 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(hdr->firstused) >=
 	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
 	entry = &leaf->entries[args->index];
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= be16_to_cpu(hdr->firstused));
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	ASSERT(be16_to_cpu(entry->nameidx) >= be16_to_cpu(hdr->firstused));
+	ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
 
 	/*
 	 * Scan through free region table:
@@ -1695,10 +1688,10 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		}
 
 		if ((be16_to_cpu(map->base) + be16_to_cpu(map->size))
-				== INT_GET(entry->nameidx, ARCH_CONVERT)) {
+				== be16_to_cpu(entry->nameidx)) {
 			before = i;
 		} else if (be16_to_cpu(map->base)
-			== (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
+			== (be16_to_cpu(entry->nameidx) + entsize)) {
 			after = i;
 		} else if (be16_to_cpu(map->size) < tmp) {
 			tmp = be16_to_cpu(map->size);
@@ -1733,8 +1726,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		 */
 		map = &hdr->freemap[smallest];
 		if (be16_to_cpu(map->size) < entsize) {
-			map->base = cpu_to_be16(
-					INT_GET(entry->nameidx, ARCH_CONVERT));
+			map->base = cpu_to_be16(be16_to_cpu(entry->nameidx));
 			map->size = cpu_to_be16(entsize);
 		}
 	}
@@ -1742,8 +1734,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Did we remove the first entry?
 	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				== be16_to_cpu(hdr->firstused))
+	if (be16_to_cpu(entry->nameidx) == be16_to_cpu(hdr->firstused))
 		smallest = 1;
 	else
 		smallest = 0;
@@ -1776,12 +1767,12 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		tmp = XFS_LBSIZE(mp);
 		entry = &leaf->entries[0];
 		for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= be16_to_cpu(hdr->firstused));
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-							< XFS_LBSIZE(mp));
-			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
-				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
+			ASSERT(be16_to_cpu(entry->nameidx) >=
+			       be16_to_cpu(hdr->firstused));
+			ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+
+			if (be16_to_cpu(entry->nameidx) < tmp)
+				tmp = be16_to_cpu(entry->nameidx);
 		}
 		hdr->firstused = cpu_to_be16(tmp);
 		if (!hdr->firstused) {
@@ -1832,9 +1823,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save last hashval from dying block for later Btree fixup.
 	 */
-	drop_blk->hashval =
-		INT_GET(drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1]
-				.hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(
+		drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1].hashval);
 
 	/*
 	 * Check if we need a temp buffer, or can we do it in place.
@@ -1895,9 +1885,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Copy out last hashval in each block for B-tree code.
 	 */
-	save_blk->hashval =
-		INT_GET(save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1]
-						.hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(
+		save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1].hashval);
 }
 
 /*========================================================================
@@ -1940,9 +1929,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	for (entry = &leaf->entries[probe]; span > 4;
 		   entry = &leaf->entries[probe]) {
 		span /= 2;
-		if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
+		if (be32_to_cpu(entry->hashval) < hashval)
 			probe += span;
-		else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
+		else if (be32_to_cpu(entry->hashval) > hashval)
 			probe -= span;
 		else
 			break;
@@ -1950,25 +1939,23 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT((probe >= 0) && 
 	       (!leaf->hdr.count
 	       || (probe < be16_to_cpu(leaf->hdr.count))));
-	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT)
-							== hashval));
+	ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
 
 	/*
 	 * Since we may have duplicate hashval's, find the first matching
 	 * hashval in the leaf.
 	 */
-	while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT)
-							>= hashval)) {
+	while ((probe > 0) && (be32_to_cpu(entry->hashval) >= hashval)) {
 		entry--;
 		probe--;
 	}
-	while ((probe < be16_to_cpu(leaf->hdr.count))
-		&& (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
+	while ((probe < be16_to_cpu(leaf->hdr.count)) &&
+	       (be32_to_cpu(entry->hashval) < hashval)) {
 		entry++;
 		probe++;
 	}
-	if ((probe == be16_to_cpu(leaf->hdr.count))
-		    || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
+	if ((probe == be16_to_cpu(leaf->hdr.count)) ||
+	    (be32_to_cpu(entry->hashval) != hashval)) {
 		args->index = probe;
 		return(XFS_ERROR(ENOATTR));
 	}
@@ -1976,8 +1963,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Duplicate keys may be present, so search all of them for a match.
 	 */
-	for (  ; (probe < be16_to_cpu(leaf->hdr.count))
-			&& (INT_GET(entry->hashval, ARCH_CONVERT) == hashval);
+	for (  ; (probe < be16_to_cpu(leaf->hdr.count)) &&
+			(be32_to_cpu(entry->hashval) == hashval);
 			entry++, probe++) {
 /*
  * GROT: Add code to remove incomplete entries.
@@ -2150,7 +2137,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	entry_d = &leaf_d->entries[start_d];
 	desti = start_d;
 	for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
-		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
+		ASSERT(be16_to_cpu(entry_s->nameidx)
 				>= be16_to_cpu(hdr_s->firstused));
 		tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
 #ifdef GROT
@@ -2175,11 +2162,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			/* both on-disk, don't endian flip twice */
 			entry_d->nameidx = hdr_d->firstused;
 			entry_d->flags = entry_s->flags;
-			ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
+			ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
 							<= XFS_LBSIZE(mp));
 			memmove(XFS_ATTR_LEAF_NAME(leaf_d, desti),
 				XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
-			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
+			ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
 							<= XFS_LBSIZE(mp));
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			be16_add(&hdr_s->usedbytes, -tmp);
@@ -2252,12 +2239,12 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
 	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
 	    (be16_to_cpu(leaf2->hdr.count) > 0) &&
-	    ((INT_GET(leaf2->entries[0].hashval, ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[0].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
-		      				ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
-		      				ARCH_CONVERT)))) {
+	    ((be32_to_cpu(leaf2->entries[0].hashval) <
+	      be32_to_cpu(leaf1->entries[0].hashval)) ||
+	     (be32_to_cpu(leaf2->entries[
+			be16_to_cpu(leaf2->hdr.count)-1].hashval) <
+	      be32_to_cpu(leaf1->entries[
+			be16_to_cpu(leaf1->hdr.count)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -2277,8 +2264,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return(0);
-	return (INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval,
-				ARCH_CONVERT));
+	return be32_to_cpu(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval);
 }
 
 /*
@@ -2356,15 +2342,14 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	if (context->resynch) {
 		entry = &leaf->entries[0];
 		for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-			if (INT_GET(entry->hashval, ARCH_CONVERT)
-							== cursor->hashval) {
+			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
 				if (cursor->offset == context->dupcnt) {
 					context->dupcnt = 0;
 					break;
 				}
 				context->dupcnt++;
-			} else if (INT_GET(entry->hashval, ARCH_CONVERT)
-							> cursor->hashval) {
+			} else if (be32_to_cpu(entry->hashval) >
+					cursor->hashval) {
 				context->dupcnt = 0;
 				break;
 			}
@@ -2387,8 +2372,8 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	     && (retval == 0); entry++, i++) {
 		attrnames_t	*namesp;
 
-		if (INT_GET(entry->hashval, ARCH_CONVERT) != cursor->hashval) {
-			cursor->hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
+			cursor->hashval = be32_to_cpu(entry->hashval);
 			cursor->offset = 0;
 		}
 
@@ -2554,7 +2539,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 		namelen = name_rmt->namelen;
 		name = (char *)name_rmt->name;
 	}
-	ASSERT(INT_GET(entry->hashval, ARCH_CONVERT) == args->hashval);
+	ASSERT(be32_to_cpu(entry->hashval) == args->hashval);
 	ASSERT(namelen == args->namelen);
 	ASSERT(memcmp(name, args->name, namelen) == 0);
 #endif /* DEBUG */
@@ -2706,7 +2691,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 		namelen2 = name_rmt->namelen;
 		name2 = (char *)name_rmt->name;
 	}
-	ASSERT(INT_GET(entry1->hashval, ARCH_CONVERT) == INT_GET(entry2->hashval, ARCH_CONVERT));
+	ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval));
 	ASSERT(namelen1 == namelen2);
 	ASSERT(memcmp(name1, name2, namelen1) == 0);
 #endif /* DEBUG */
@@ -2934,8 +2919,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	count = 0;
 	entry = &leaf->entries[0];
 	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
-		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk)
 				count++;
@@ -2962,8 +2947,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	lp = list;
 	entry = &leaf->entries[0];
 	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
-		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk) {
 				/* both on-disk, don't endian flip twice */
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 52b771052c57..7fbe4880bb83 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -89,10 +89,10 @@ typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
 } xfs_attr_leaf_hdr_t;
 
 typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
-	xfs_dahash_t	hashval;	/* hash value of name */
-	__uint16_t	nameidx;	/* index into buffer of name/value */
-	__uint8_t	flags;		/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
-	__uint8_t	pad2;		/* unused pad byte */
+	__be32	hashval;		/* hash value of name */
+ 	__be16	nameidx;		/* index into buffer of name/value */
+	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+	__u8	pad2;			/* unused pad byte */
 } xfs_attr_leaf_entry_t;
 
 typedef struct xfs_attr_leaf_name_local {
@@ -143,8 +143,8 @@ typedef struct xfs_attr_leafblock {
 static inline xfs_attr_leaf_name_remote_t *
 xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (xfs_attr_leaf_name_remote_t *) &((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+	return (xfs_attr_leaf_name_remote_t *)
+		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 #define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	\
@@ -152,16 +152,15 @@ xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
 static inline xfs_attr_leaf_name_local_t *
 xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (xfs_attr_leaf_name_local_t *) &((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+	return (xfs_attr_leaf_name_local_t *)
+		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 #define XFS_ATTR_LEAF_NAME(leafp,idx)		\
 	xfs_attr_leaf_name(leafp,idx)
 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (&((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]);
+	return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 /*
-- 
cgit v1.2.3


From 053b5758cbc096a3f718858f4da9341afbc56b7d Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:09 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_name_local_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25499a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 18 ++++++++----------
 fs/xfs/xfs_attr_leaf.h |  6 +++---
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 319285c24e42..3adfc5dcfe7d 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -732,11 +732,11 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
 		if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
 			return(0);
-		if (INT_GET(name_loc->valuelen, ARCH_CONVERT) >= XFS_ATTR_SF_ENTSIZE_MAX)
+		if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
 			return(0);
 		bytes += sizeof(struct xfs_attr_sf_entry)-1
 				+ name_loc->namelen
-				+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
+				+ be16_to_cpu(name_loc->valuelen);
 	}
 	if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
 	    (bytes == sizeof(struct xfs_attr_sf_hdr)))
@@ -818,7 +818,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 		nargs.name = (char *)name_loc->nameval;
 		nargs.namelen = name_loc->namelen;
 		nargs.value = (char *)&name_loc->nameval[nargs.namelen];
-		nargs.valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
+		nargs.valuelen = be16_to_cpu(name_loc->valuelen);
 		nargs.hashval = be32_to_cpu(entry->hashval);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
@@ -1136,10 +1136,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	if (entry->flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		name_loc->namelen = args->namelen;
-		INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
+		name_loc->valuelen = cpu_to_be16(args->valuelen);
 		memcpy((char *)name_loc->nameval, args->name, args->namelen);
 		memcpy((char *)&name_loc->nameval[args->namelen], args->value,
-				   INT_GET(name_loc->valuelen, ARCH_CONVERT));
+				   be16_to_cpu(name_loc->valuelen));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		name_rmt->namelen = args->namelen;
@@ -2042,7 +2042,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		ASSERT(name_loc->namelen == args->namelen);
 		ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
-		valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
+		valuelen = be16_to_cpu(name_loc->valuelen);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
 			return(0);
@@ -2282,8 +2282,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
-						   INT_GET(name_loc->valuelen,
-								ARCH_CONVERT));
+						   be16_to_cpu(name_loc->valuelen));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen);
@@ -2402,8 +2401,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				retval = xfs_attr_put_listent(context, namesp,
 					(char *)name_loc->nameval,
 					(int)name_loc->namelen,
-					(int)INT_GET(name_loc->valuelen,
-								ARCH_CONVERT));
+					be16_to_cpu(name_loc->valuelen));
 			}
 		} else {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 7fbe4880bb83..ad6f791ec58a 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -96,9 +96,9 @@ typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
 } xfs_attr_leaf_entry_t;
 
 typedef struct xfs_attr_leaf_name_local {
-	__uint16_t	valuelen;	/* number of bytes in value */
-	__uint8_t	namelen;	/* length of name bytes */
-	__uint8_t	nameval[1];	/* name/value bytes */
+	__be16	valuelen;		/* number of bytes in value */
+	__u8	namelen;		/* length of name bytes */
+	__u8	nameval[1];		/* name/value bytes */
 } xfs_attr_leaf_name_local_t;
 
 typedef struct xfs_attr_leaf_name_remote {
-- 
cgit v1.2.3


From c0f054e7a44b4bbed8e16dd459f33df63dad98ef Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:18 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_name_remote_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25500a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 24 ++++++++++--------------
 fs/xfs/xfs_attr_leaf.h |  8 ++++----
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 3adfc5dcfe7d..531417b2c74c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2006,11 +2006,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			    ((entry->flags & XFS_ATTR_ROOT) != 0))
 				continue;
 			args->index = probe;
-			args->rmtblkno
-				  = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
+			args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 			args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
-						   INT_GET(name_rmt->valuelen,
-								ARCH_CONVERT));
+						   be32_to_cpu(name_rmt->valuelen));
 			return(XFS_ERROR(EEXIST));
 		}
 	}
@@ -2057,8 +2055,8 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		ASSERT(name_rmt->namelen == args->namelen);
 		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
-		valuelen = INT_GET(name_rmt->valuelen, ARCH_CONVERT);
-		args->rmtblkno = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
+		valuelen = be32_to_cpu(name_rmt->valuelen);
+		args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 		args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
@@ -2413,8 +2411,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				retval = xfs_attr_put_listent(context, namesp,
 					(char *)name_rmt->name,
 					(int)name_rmt->namelen,
-					(int)INT_GET(name_rmt->valuelen,
-								ARCH_CONVERT));
+					be32_to_cpu(name_rmt->valuelen));
 			}
 		}
 		if (retval == 0) {
@@ -2549,8 +2546,8 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	if (args->rmtblkno) {
 		ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
-		INT_SET(name_rmt->valueblk, ARCH_CONVERT, args->rmtblkno);
-		INT_SET(name_rmt->valuelen, ARCH_CONVERT, args->valuelen);
+		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+		name_rmt->valuelen = cpu_to_be32(args->valuelen);
 		xfs_da_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
 	}
@@ -2703,8 +2700,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	if (args->rmtblkno) {
 		ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf1, args->index);
-		INT_SET(name_rmt->valueblk, ARCH_CONVERT, args->rmtblkno);
-		INT_SET(name_rmt->valuelen, ARCH_CONVERT, args->valuelen);
+		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+		name_rmt->valuelen = cpu_to_be32(args->valuelen);
 		xfs_da_log_buf(args->trans, bp1,
 			 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
 	}
@@ -2953,8 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 				lp->valueblk = name_rmt->valueblk;
 				INT_SET(lp->valuelen, ARCH_CONVERT,
 						XFS_B_TO_FSB(dp->i_mount,
-						    INT_GET(name_rmt->valuelen,
-							      ARCH_CONVERT)));
+						    be32_to_cpu(name_rmt->valuelen)));
 				lp++;
 			}
 		}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index ad6f791ec58a..51c3ee156b2f 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -102,10 +102,10 @@ typedef struct xfs_attr_leaf_name_local {
 } xfs_attr_leaf_name_local_t;
 
 typedef struct xfs_attr_leaf_name_remote {
-	xfs_dablk_t	valueblk;	/* block number of value bytes */
-	__uint32_t	valuelen;	/* number of bytes in value */
-	__uint8_t	namelen;	/* length of name bytes */
-	__uint8_t	name[1];	/* name bytes */
+	__be32	valueblk;		/* block number of value bytes */
+	__be32	valuelen;		/* number of bytes in value */
+	__u8	namelen;		/* length of name bytes */
+	__u8	name[1];		/* name bytes */
 } xfs_attr_leaf_name_remote_t;
 
 typedef struct xfs_attr_leafblock {
-- 
cgit v1.2.3


From 3b244aa81ecb06859e0c16abf4c26404c1d86591 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:25 +1100
Subject: [XFS] endianess annotations for xfs_attr_shortform_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25501a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 55 +++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_sf.h   |  8 ++++----
 fs/xfs/xfs_inode.c     |  2 +-
 3 files changed, 30 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 531417b2c74c..37d2e10f874c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -194,7 +194,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
 	xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
 	hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
 	hdr->count = 0;
-	INT_SET(hdr->totsize, ARCH_CONVERT, sizeof(*hdr));
+	hdr->totsize = cpu_to_be16(sizeof(*hdr));
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 }
 
@@ -224,8 +224,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
-				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+	for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 #ifdef DEBUG
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -248,13 +247,13 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset);
 
 	sfe->namelen = args->namelen;
-	INT_SET(sfe->valuelen, ARCH_CONVERT, args->valuelen);
+	sfe->valuelen = args->valuelen;
 	sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
 			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
 	memcpy(sfe->nameval, args->name, args->namelen);
 	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, 1);
-	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
+	sf->hdr.count++;
+	be16_add(&sf->hdr.totsize, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 
 	xfs_sbversion_add_attr2(mp, args->trans);
@@ -277,7 +276,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	base = sizeof(xfs_attr_sf_hdr_t);
 	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	end = INT_GET(sf->hdr.count, ARCH_CONVERT);
+	end = sf->hdr.count;
 	for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
 					base += size, i++) {
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
@@ -300,11 +299,11 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	 * Fix up the attribute fork data, covering the hole
 	 */
 	end = base + size;
-	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
+	totsize = be16_to_cpu(sf->hdr.totsize);
 	if (end != totsize)
 		memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
-	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, -size);
+	sf->hdr.count--;
+	be16_add(&sf->hdr.totsize, -size);
 
 	/*
 	 * Fix up the start offset of the attribute fork
@@ -360,7 +359,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < sf->hdr.count;
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -391,7 +390,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 	ASSERT(args->dp->i_d.di_aformat == XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < sf->hdr.count;
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -404,14 +403,14 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
 		if (args->flags & ATTR_KERNOVAL) {
-			args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+			args->valuelen = sfe->valuelen;
 			return(XFS_ERROR(EEXIST));
 		}
-		if (args->valuelen < INT_GET(sfe->valuelen, ARCH_CONVERT)) {
-			args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		if (args->valuelen < sfe->valuelen) {
+			args->valuelen = sfe->valuelen;
 			return(XFS_ERROR(ERANGE));
 		}
-		args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		args->valuelen = sfe->valuelen;
 		memcpy(args->value, &sfe->nameval[args->namelen],
 						    args->valuelen);
 		return(XFS_ERROR(EEXIST));
@@ -438,7 +437,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	dp = args->dp;
 	ifp = dp->i_afp;
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-	size = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
+	size = be16_to_cpu(sf->hdr.totsize);
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
 	memcpy(tmpbuffer, ifp->if_u1.if_data, size);
@@ -481,11 +480,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	nargs.oknoent = 1;
 
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0; i < sf->hdr.count; i++) {
 		nargs.name = (char *)sfe->nameval;
 		nargs.namelen = sfe->namelen;
 		nargs.value = (char *)&sfe->nameval[nargs.namelen];
-		nargs.valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		nargs.valuelen = sfe->valuelen;
 		nargs.hashval = xfs_da_hashname((char *)sfe->nameval,
 						sfe->namelen);
 		nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
@@ -560,10 +559,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 * If the buffer is large enough, do not bother with sorting.
 	 * Note the generous fudge factor of 16 overhead bytes per entry.
 	 */
-	if ((dp->i_afp->if_bytes + INT_GET(sf->hdr.count, ARCH_CONVERT) * 16)
-							< context->bufsize) {
-		for (i = 0, sfe = &sf->list[0];
-				i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	if ((dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize) {
+		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 			attrnames_t	*namesp;
 
 			if (((context->flags & ATTR_SECURE) != 0) !=
@@ -584,14 +581,13 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			if (context->flags & ATTR_KERNOVAL) {
 				ASSERT(context->flags & ATTR_KERNAMELS);
 				context->count += namesp->attr_namelen +
-					INT_GET(sfe->namelen, ARCH_CONVERT) + 1;
+					sfe->namelen + 1;
 			}
 			else {
 				if (xfs_attr_put_listent(context, namesp,
 						   (char *)sfe->nameval,
 						   (int)sfe->namelen,
-						   (int)INT_GET(sfe->valuelen,
-								ARCH_CONVERT)))
+						   (int)sfe->valuelen))
 					break;
 			}
 			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
@@ -603,7 +599,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	/*
 	 * It didn't all fit, so we have to sort everything on hashval.
 	 */
-	sbsize = INT_GET(sf->hdr.count, ARCH_CONVERT) * sizeof(*sbuf);
+	sbsize = sf->hdr.count * sizeof(*sbuf);
 	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
 
 	/*
@@ -611,8 +607,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 * the relevant info from only those that match into a buffer.
 	 */
 	nsbuf = 0;
-	for (i = 0, sfe = &sf->list[0];
-			i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 		if (unlikely(
 		    ((char *)sfe < (char *)sf) ||
 		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
@@ -696,7 +691,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 		} else {
 			if (xfs_attr_put_listent(context, namesp,
 					sbp->name, sbp->namelen,
-					INT_GET(sbp->valuelen, ARCH_CONVERT)))
+					sbp->valuelen))
 				break;
 		}
 		cursor->offset++;
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ffed6ca81a52..f67f917803b1 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -32,8 +32,8 @@ struct xfs_inode;
  */
 typedef struct xfs_attr_shortform {
 	struct xfs_attr_sf_hdr {	/* constant-structure header block */
-		__uint16_t totsize;	/* total bytes in shortform list */
-		__uint8_t count;	/* count of active entries */
+		__be16	totsize;	/* total bytes in shortform list */
+		__u8	count;	/* count of active entries */
 	} hdr;
 	struct xfs_attr_sf_entry {
 		__uint8_t namelen;	/* actual length of name (no NULL) */
@@ -66,8 +66,8 @@ typedef struct xfs_attr_sf_sort {
 #define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
 	((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
 #define XFS_ATTR_SF_TOTSIZE(dp)			/* total space in use */ \
-	(INT_GET(((xfs_attr_shortform_t *)	\
-		((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
+	(be16_to_cpu(((xfs_attr_shortform_t *)	\
+		((dp)->i_afp->if_u1.if_data))->hdr.totsize))
 
 #if defined(XFS_ATTR_TRACE)
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b1e95707e7cf..7d0ded05a467 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -506,7 +506,7 @@ xfs_iformat(
 	switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
 	case XFS_DINODE_FMT_LOCAL:
 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
-		size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT);
+		size = be16_to_cpu(atp->hdr.totsize);
 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
-- 
cgit v1.2.3


From 984a081a7c89ea7e1b6f47cbc0e5c8ef67ad6e09 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:31 +1100
Subject: [XFS] store xfs_attr_sf_sort in native endian

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25502a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 37d2e10f874c..e4071eb1e6cf 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -513,11 +513,9 @@ xfs_attr_shortform_compare(const void *a, const void *b)
 
 	sa = (xfs_attr_sf_sort_t *)a;
 	sb = (xfs_attr_sf_sort_t *)b;
-	if (INT_GET(sa->hash, ARCH_CONVERT)
-				< INT_GET(sb->hash, ARCH_CONVERT)) {
+	if (sa->hash < sb->hash) {
 		return(-1);
-	} else if (INT_GET(sa->hash, ARCH_CONVERT)
-				> INT_GET(sb->hash, ARCH_CONVERT)) {
+	} else if (sa->hash > sb->hash) {
 		return(1);
 	} else {
 		return(sa->entno - sb->entno);
@@ -631,8 +629,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			continue;
 		}
 		sbp->entno = i;
-		INT_SET(sbp->hash, ARCH_CONVERT,
-			xfs_da_hashname((char *)sfe->nameval, sfe->namelen));
+		sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
 		sbp->name = (char *)sfe->nameval;
 		sbp->namelen = sfe->namelen;
 		/* These are bytes, and both on-disk, don't endian-flip */
@@ -655,12 +652,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	cursor->initted = 1;
 	cursor->blkno = 0;
 	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
-		if (INT_GET(sbp->hash, ARCH_CONVERT) == cursor->hashval) {
+		if (sbp->hash == cursor->hashval) {
 			if (cursor->offset == count) {
 				break;
 			}
 			count++;
-		} else if (INT_GET(sbp->hash, ARCH_CONVERT) > cursor->hashval) {
+		} else if (sbp->hash > cursor->hashval) {
 			break;
 		}
 	}
@@ -680,8 +677,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			((sbp->flags & XFS_ATTR_ROOT) ? &attr_trusted :
 			  &attr_user);
 
-		if (cursor->hashval != INT_GET(sbp->hash, ARCH_CONVERT)) {
-			cursor->hashval = INT_GET(sbp->hash, ARCH_CONVERT);
+		if (cursor->hashval != sbp->hash) {
+			cursor->hashval = sbp->hash;
 			cursor->offset = 0;
 		}
 		if (context->flags & ATTR_KERNOVAL) {
-- 
cgit v1.2.3


From d7929ff670c802dc68d6149d3d0cc5667e18daec Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:36 +1100
Subject: [XFS] store xfs_attr_inactive_list_t in native endian

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25503a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index e4071eb1e6cf..b3d5c35b6047 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2938,11 +2938,9 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk) {
-				/* both on-disk, don't endian flip twice */
-				lp->valueblk = name_rmt->valueblk;
-				INT_SET(lp->valuelen, ARCH_CONVERT,
-						XFS_B_TO_FSB(dp->i_mount,
-						    be32_to_cpu(name_rmt->valuelen)));
+				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
+				lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
+						    be32_to_cpu(name_rmt->valuelen));
 				lp++;
 			}
 		}
@@ -2955,10 +2953,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	error = 0;
 	for (lp = list, i = 0; i < count; i++, lp++) {
 		tmp = xfs_attr_leaf_freextent(trans, dp,
-						     INT_GET(lp->valueblk,
-								ARCH_CONVERT),
-						     INT_GET(lp->valuelen,
-								ARCH_CONVERT));
+				lp->valueblk, lp->valuelen);
+
 		if (error == 0)
 			error = tmp;	/* save only the 1st errno */
 	}
-- 
cgit v1.2.3


From 403432dcb5daa03c1f1c961adb7d2a5daebea94b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:46 +1100
Subject: [XFS] endianess annotations for xfs_da_node_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25504a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      | 13 ++++----
 fs/xfs/xfs_attr_leaf.c |  6 ++--
 fs/xfs/xfs_da_btree.c  | 86 +++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h  |  4 +--
 fs/xfs/xfs_dir.c       |  8 ++---
 fs/xfs/xfs_dir_leaf.c  |  2 +-
 6 files changed, 59 insertions(+), 60 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 98d0f4d10602..5d61c628d0db 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1829,9 +1829,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				i < INT_GET(node->hdr.count, ARCH_CONVERT);
 								btree++, i++) {
 				if (cursor->hashval
-						<= INT_GET(btree->hashval,
-							    ARCH_CONVERT)) {
-					cursor->blkno = INT_GET(btree->before, ARCH_CONVERT);
+						<= be32_to_cpu(btree->hashval)) {
+					cursor->blkno = be32_to_cpu(btree->before);
 					xfs_attr_trace_l_cb("descending",
 							    context, btree);
 					break;
@@ -2228,8 +2227,8 @@ xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
 		(__psunsigned_t)INT_GET(node->hdr.count, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(node->btree[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
+		(__psunsigned_t)be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval));
 }
 
 /*
@@ -2256,8 +2255,8 @@ xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(btree->hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(btree->before, ARCH_CONVERT),
+		(__psunsigned_t)be32_to_cpu(btree->hashval),
+		(__psunsigned_t)be32_to_cpu(btree->before),
 		(__psunsigned_t)NULL);
 }
 
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b3d5c35b6047..70594bceffec 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -869,7 +869,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+	node->btree[0].before = cpu_to_be32(blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
 	error = 0;
@@ -2809,7 +2809,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 		xfs_da_brelse(*trans, bp);
 		return(0);
 	}
-	child_fsb = INT_GET(node->btree[0].before, ARCH_CONVERT);
+	child_fsb = be32_to_cpu(node->btree[0].before);
 	xfs_da_brelse(*trans, bp);	/* no locks for later trans */
 
 	/*
@@ -2869,7 +2869,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 				&bp, XFS_ATTR_FORK);
 			if (error)
 				return(error);
-			child_fsb = INT_GET(node->btree[i+1].before, ARCH_CONVERT);
+			child_fsb = be32_to_cpu(node->btree[i+1].before);
 			xfs_da_brelse(*trans, bp);
 		}
 		/*
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4f3bb1cb9615..d32670c09f39 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -385,10 +385,10 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	if (error)
 		return(error);
 	node = bp->data;
-	INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
-	INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
-	INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
+	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
+	node->btree[0].before = cpu_to_be32(blk1->blkno);
+	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
+	node->btree[1].before = cpu_to_be32(blk2->blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 
 #ifdef DEBUG
@@ -517,9 +517,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Swap the nodes around if that makes it simpler.
 	 */
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
-	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
+	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
+	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
 		tmpnode = node1;
 		node1 = node2;
 		node2 = tmpnode;
@@ -596,8 +596,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 */
 	node1 = blk1->bp->data;
 	node2 = blk2->bp->data;
-	blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
-	blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	blk1->hashval = be32_to_cpu(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	blk2->hashval = be32_to_cpu(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -638,8 +638,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
 		memmove(btree + 1, btree, tmp);
 	}
-	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
-	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
+	btree->hashval = cpu_to_be32(newblk->hashval);
+	btree->before = cpu_to_be32(newblk->blkno);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
 	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
@@ -649,7 +649,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	/*
 	 * Copy the last hash value from the oldblk to propagate upwards.
 	 */
-	oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	oldblk->hashval = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 }
 
 /*========================================================================
@@ -782,7 +782,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	 * Read in the (only) child block, then copy those bytes into
 	 * the root block's buffer and free the original child block.
 	 */
-	child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
+	child = be32_to_cpu(oldroot->btree[0].before);
 	ASSERT(child != 0);
 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
 					     args->whichfork);
@@ -974,14 +974,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		node = blk->bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		btree = &node->btree[ blk->index ];
-		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
+		if (be32_to_cpu(btree->hashval) == lasthash)
 			break;
 		blk->hashval = lasthash;
-		INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
+		btree->hashval = cpu_to_be32(lasthash);
 		xfs_da_log_buf(state->args->trans, blk->bp,
 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
-		lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+		lasthash = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 	}
 }
 
@@ -1022,7 +1022,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	 * Copy the last hash value from the block to propagate upwards.
 	 */
 	btree--;
-	drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(btree->hashval);
 }
 
 /*
@@ -1048,9 +1048,9 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * If the dying block has lower hashvals, then move all the
 	 * elements in the remaining block up to make a hole.
 	 */
-	if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	    (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	     INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
+	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
+	    (be32_to_cpu(drop_node->btree[INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1].hashval) <
+	     be32_to_cpu(save_node->btree[INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval)))
 	{
 		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
 		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
@@ -1082,7 +1082,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save the last hashval in the remaining block for upward propagation.
 	 */
-	save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 }
 
 /*========================================================================
@@ -1147,7 +1147,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		blk->magic = be16_to_cpu(curr->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
-			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 
 			/*
 			 * Binary search.  (note: small blocks will skip loop)
@@ -1158,25 +1158,25 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			for (btree = &node->btree[probe]; span > 4;
 				   btree = &node->btree[probe]) {
 				span /= 2;
-				if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
+				if (be32_to_cpu(btree->hashval) < hashval)
 					probe += span;
-				else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
+				else if (be32_to_cpu(btree->hashval) > hashval)
 					probe -= span;
 				else
 					break;
 			}
 			ASSERT((probe >= 0) && (probe < max));
-			ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
+			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
 
 			/*
 			 * Since we may have duplicate hashval's, find the first
 			 * matching hashval in the node.
 			 */
-			while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
+			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
 				btree--;
 				probe--;
 			}
-			while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
+			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
 				btree++;
 				probe++;
 			}
@@ -1186,10 +1186,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			 */
 			if (probe == max) {
 				blk->index = max-1;
-				blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
+				blkno = be32_to_cpu(node->btree[max-1].before);
 			} else {
 				blk->index = probe;
-				blkno = INT_GET(btree->before, ARCH_CONVERT);
+				blkno = be32_to_cpu(btree->before);
 			}
 		}
 		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
@@ -1359,10 +1359,10 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
-	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+	    ((be32_to_cpu(node2->btree[0].hashval) <
+	      be32_to_cpu(node1->btree[0].hashval)) ||
+	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
+	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -1382,7 +1382,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!node->hdr.count)
 		return(0);
-	return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+	return be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 }
 
 /*
@@ -1493,11 +1493,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 			blk->index++;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
 		} else if (!forward && (blk->index > 0)) {
 			blk->index--;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
 		}
 	}
@@ -1535,12 +1535,12 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
-			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 			if (forward)
 				blk->index = 0;
 			else
 				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 		} else {
 			ASSERT(level == path->active-1);
 			blk->index = 0;
@@ -1796,7 +1796,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
 		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
-		dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+		dead_hash = be32_to_cpu(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval);
 	}
 	sib_buf = par_buf = NULL;
 	/*
@@ -1863,7 +1863,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
 		for (entno = 0;
 		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
-		     INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
+		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
 		     entno++)
 			continue;
 		if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
@@ -1872,7 +1872,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
+		par_blkno = be32_to_cpu(par_node->btree[entno].before);
 		if (level == dead_level + 1)
 			break;
 		xfs_da_brelse(tp, par_buf);
@@ -1885,7 +1885,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	for (;;) {
 		for (;
 		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
-		     INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
+		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
 		     entno++)
 			continue;
 		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
@@ -1915,7 +1915,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Update the parent entry pointing to the moved block.
 	 */
-	INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
+	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
 	xfs_da_log_buf(tp, par_buf,
 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
 				sizeof(par_node->btree[entno].before)));
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index e727bf456a12..6343c3a4dbae 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -69,8 +69,8 @@ typedef struct xfs_da_intnode {
 		__uint16_t level;	/* level above leaves (leaf == 0) */
 	} hdr;
 	struct xfs_da_node_entry {
-		xfs_dahash_t hashval;	/* hash value for this descendant */
-		xfs_dablk_t before;	/* Btree block before this key */
+		__be32	hashval;	/* hash value for this descendant */
+		__be32	before;		/* Btree block before this key */
 	} btree[1];			/* variable sized array of keys */
 } xfs_da_intnode_t;
 typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 7a708e993db6..8d1975a541b1 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -954,8 +954,8 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
 			for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); btree++, i++) {
-				if (INT_GET(btree->hashval, ARCH_CONVERT) >= cookhash) {
-					bno = INT_GET(btree->before, ARCH_CONVERT);
+				if (be32_to_cpu(btree->hashval) >= cookhash) {
+					bno = be32_to_cpu(btree->before);
 					break;
 				}
 			}
@@ -1129,9 +1129,9 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)(unsigned long)
 			INT_GET(node->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
-			INT_GET(node->btree[0].hashval, ARCH_CONVERT),
+			be32_to_cpu(node->btree[0].hashval),
 		     (void *)(unsigned long)
-			INT_GET(node->btree[last].hashval, ARCH_CONVERT),
+			be32_to_cpu(node->btree[last].hashval),
 		     NULL, NULL, NULL);
 }
 
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 0b2eca590b36..3c58834fa6a7 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -745,7 +745,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+	node->btree[0].before = cpu_to_be32(blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1,
 		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
-- 
cgit v1.2.3


From fac80cce0ecc6b10ae165af5b6b9b03151083044 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:56 +1100
Subject: [XFS] endianess annotations for xfs_da_node_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25505a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |  10 ++--
 fs/xfs/xfs_attr_leaf.c |   4 +-
 fs/xfs/xfs_da_btree.c  | 139 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h  |   4 +-
 fs/xfs/xfs_dir.c       |   8 +--
 fs/xfs/xfs_dir_leaf.c  |   6 ++-
 6 files changed, 86 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5d61c628d0db..093fac476bda 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1825,8 +1825,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			btree = node->btree;
-			for (i = 0;
-				i < INT_GET(node->hdr.count, ARCH_CONVERT);
+			for (i = 0; i < be16_to_cpu(node->hdr.count);
 								btree++, i++) {
 				if (cursor->hashval
 						<= be32_to_cpu(btree->hashval)) {
@@ -1836,7 +1835,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 					break;
 				}
 			}
-			if (i == INT_GET(node->hdr.count, ARCH_CONVERT)) {
+			if (i == be16_to_cpu(node->hdr.count)) {
 				xfs_da_brelse(NULL, bp);
 				return(0);
 			}
@@ -2226,9 +2225,10 @@ xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(node->hdr.count, ARCH_CONVERT),
+		(__psunsigned_t)be16_to_cpu(node->hdr.count),
 		(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
-		(__psunsigned_t)be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval));
+		(__psunsigned_t)be32_to_cpu(node->btree[
+				    be16_to_cpu(node->hdr.count)-1].hashval));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 70594bceffec..717682747bd2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -870,7 +870,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
 	node->btree[0].before = cpu_to_be32(blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+	node->hdr.count = cpu_to_be16(1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
 	error = 0;
 out:
@@ -2804,7 +2804,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 	node = bp->data;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
-	count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(node->hdr.count);
 	if (!count) {
 		xfs_da_brelse(*trans, bp);
 		return(0);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index d32670c09f39..4bae3a76c678 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -129,7 +129,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
 	node->hdr.info.pad = 0;
 	node->hdr.count = 0;
-	INT_SET(node->hdr.level, ARCH_CONVERT, level);
+	node->hdr.level = cpu_to_be16(level);
 
 	xfs_da_log_buf(tp, bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
@@ -360,7 +360,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	node = bp->data;
 	oldroot = blk1->bp->data;
 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
-		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
+		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
 			     (char *)oldroot);
 	} else {
 		ASSERT(XFS_DIR_IS_V2(mp));
@@ -381,7 +381,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	error = xfs_da_node_create(args,
 		args->whichfork == XFS_DATA_FORK &&
 		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
-		INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
+		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
 	if (error)
 		return(error);
 	node = bp->data;
@@ -389,7 +389,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	node->btree[0].before = cpu_to_be32(blk1->blkno);
 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
 	node->btree[1].before = cpu_to_be32(blk2->blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
+	node->hdr.count = cpu_to_be16(2);
 
 #ifdef DEBUG
 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
@@ -435,7 +435,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	/*
 	 * Do we have to split the node?
 	 */
-	if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
+	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
 		/*
 		 * Allocate a new node, add to the doubly linked chain of
 		 * nodes, then move some of our excess entries into it.
@@ -472,7 +472,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	 * If we had double-split op below us, then add the extra block too.
 	 */
 	node = oldblk->bp->data;
-	if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
+	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
 		oldblk->index++;
 		xfs_da_node_add(state, oldblk, addblk);
 		if (useextra) {
@@ -516,17 +516,17 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Figure out how many entries need to move, and in which direction.
 	 * Swap the nodes around if that makes it simpler.
 	 */
-	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
-	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
-	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
+	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
+	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
 		tmpnode = node1;
 		node1 = node2;
 		node2 = tmpnode;
 	}
 	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
+	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
 	if (count == 0)
 		return;
 	tp = state->args->trans;
@@ -537,7 +537,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		/*
 		 * Move elements in node2 up to make a hole.
 		 */
-		if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
+		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
 			btree_s = &node2->btree[0];
 			btree_d = &node2->btree[count];
@@ -548,13 +548,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Move the req'd B-tree elements from high in node1 to
 		 * low in node2.
 		 */
-		INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
+		be16_add(&node2->hdr.count, count);
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
-		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
+		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
 		btree_d = &node2->btree[0];
 		memcpy(btree_d, btree_s, tmp);
-		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
-
+		be16_add(&node1->hdr.count, -count);
 	} else {
 		/*
 		 * Move the req'd B-tree elements from low in node2 to
@@ -563,21 +562,21 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		count = -count;
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[0];
-		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
+		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
 		memcpy(btree_d, btree_s, tmp);
-		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
+		be16_add(&node1->hdr.count, count);
 		xfs_da_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
 
 		/*
 		 * Move elements in node2 down to fill the hole.
 		 */
-		tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
+		tmp  = be16_to_cpu(node2->hdr.count) - count;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[count];
 		btree_d = &node2->btree[0];
 		memmove(btree_d, btree_s, tmp);
-		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
+		be16_add(&node2->hdr.count, -count);
 	}
 
 	/*
@@ -588,7 +587,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	xfs_da_log_buf(tp, blk2->bp,
 		XFS_DA_LOGRANGE(node2, &node2->hdr,
 			sizeof(node2->hdr) +
-			sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
+			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
 
 	/*
 	 * Record the last hashval from each block for upward propagation.
@@ -596,15 +595,15 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 */
 	node1 = blk1->bp->data;
 	node2 = blk2->bp->data;
-	blk1->hashval = be32_to_cpu(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval);
-	blk2->hashval = be32_to_cpu(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
+	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
 	 */
-	if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
-		blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
-		blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;	/* make it invalid */
+	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
+		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
+		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
 	}
 }
 
@@ -623,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	node = oldblk->bp->data;
 	mp = state->mp;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
+	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
@@ -634,22 +633,22 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	 */
 	tmp = 0;
 	btree = &node->btree[ oldblk->index ];
-	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
-		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
+	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
+		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
 		memmove(btree + 1, btree, tmp);
 	}
 	btree->hashval = cpu_to_be32(newblk->hashval);
 	btree->before = cpu_to_be32(newblk->blkno);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
-	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
+	be16_add(&node->hdr.count, 1);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 	/*
 	 * Copy the last hash value from the oldblk to propagate upwards.
 	 */
-	oldblk->hashval = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
 }
 
 /*========================================================================
@@ -775,7 +774,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	/*
 	 * If the root has more than one child, then don't do anything.
 	 */
-	if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
+	if (be16_to_cpu(oldroot->hdr.count) > 1)
 		return(0);
 
 	/*
@@ -790,7 +789,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 		return(error);
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
-	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
+	if (be16_to_cpu(oldroot->hdr.level) == 1) {
 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
@@ -832,7 +831,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	info = blk->bp->data;
 	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
 	node = (xfs_da_intnode_t *)info;
-	count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(node->hdr.count);
 	if (count > (state->node_ents >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);	/* blk over 50%, don't try to join */
@@ -888,10 +887,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		node = (xfs_da_intnode_t *)info;
 		count  = state->node_ents;
 		count -= state->node_ents >> 2;
-		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		count -= be16_to_cpu(node->hdr.count);
 		node = bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		count -= be16_to_cpu(node->hdr.count);
 		xfs_da_brelse(state->args->trans, bp);
 		if (count >= 0)
 			break;	/* fits with at least 25% to spare */
@@ -981,7 +980,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		xfs_da_log_buf(state->args->trans, blk->bp,
 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
-		lasthash = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 	}
 }
 
@@ -996,25 +995,25 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	int tmp;
 
 	node = drop_blk->bp->data;
-	ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
+	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
 	ASSERT(drop_blk->index >= 0);
 
 	/*
 	 * Copy over the offending entry, or just zero it out.
 	 */
 	btree = &node->btree[drop_blk->index];
-	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
-		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
+	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
+		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, btree + 1, tmp);
 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
 		    XFS_DA_LOGRANGE(node, btree, tmp));
-		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
+		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
 	}
 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
-	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
+	be16_add(&node->hdr.count, -1);
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
@@ -1049,31 +1048,31 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * elements in the remaining block up to make a hole.
 	 */
 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
-	    (be32_to_cpu(drop_node->btree[INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1].hashval) <
-	     be32_to_cpu(save_node->btree[INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval)))
+	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
+	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
 	{
-		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
-		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
+		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, &save_node->btree[0], tmp);
 		btree = &save_node->btree[0];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
-				(INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
+				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
 				sizeof(xfs_da_node_entry_t)));
 	} else {
-		btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
+		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
-				INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
+				be16_to_cpu(drop_node->hdr.count) *
 				sizeof(xfs_da_node_entry_t)));
 	}
 
 	/*
 	 * Move all the B-tree elements from drop_blk to save_blk.
 	 */
-	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 	memcpy(btree, &drop_node->btree[0], tmp);
-	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
+	be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
 
 	xfs_da_log_buf(tp, save_blk->bp,
 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
@@ -1082,7 +1081,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save the last hashval in the remaining block for upward propagation.
 	 */
-	save_blk->hashval = be32_to_cpu(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
 }
 
 /*========================================================================
@@ -1147,12 +1146,12 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		blk->magic = be16_to_cpu(curr->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
-			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 
 			/*
 			 * Binary search.  (note: small blocks will skip loop)
 			 */
-			max = INT_GET(node->hdr.count, ARCH_CONVERT);
+			max = be16_to_cpu(node->hdr.count);
 			probe = span = max / 2;
 			hashval = args->hashval;
 			for (btree = &node->btree[probe]; span > 4;
@@ -1358,11 +1357,11 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 	node2 = node2_bp->data;
 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
-	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
 	    ((be32_to_cpu(node2->btree[0].hashval) <
 	      be32_to_cpu(node1->btree[0].hashval)) ||
-	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
-	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
+	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
+	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -1379,10 +1378,10 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 	node = bp->data;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	if (count)
-		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(node->hdr.count);
 	if (!node->hdr.count)
 		return(0);
-	return be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 }
 
 /*
@@ -1491,7 +1490,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		ASSERT(blk->bp != NULL);
 		node = blk->bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
 			blk->index++;
 			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
@@ -1535,11 +1534,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
-			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 			if (forward)
 				blk->index = 0;
 			else
-				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
+				blk->index = be16_to_cpu(node->hdr.count)-1;
 			blkno = be32_to_cpu(node->btree[blk->index].before);
 		} else {
 			ASSERT(level == path->active-1);
@@ -1795,8 +1794,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	} else {
 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
-		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
-		dead_hash = be32_to_cpu(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval);
+		dead_level = be16_to_cpu(dead_node->hdr.level);
+		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
 	}
 	sib_buf = par_buf = NULL;
 	/*
@@ -1854,19 +1853,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_node = par_buf->data;
 		if (unlikely(
 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
-		    (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
+		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
+		level = be16_to_cpu(par_node->hdr.level);
 		for (entno = 0;
-		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     entno < be16_to_cpu(par_node->hdr.count) &&
 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
 		     entno++)
 			continue;
-		if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
+		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
@@ -1884,11 +1883,11 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	 */
 	for (;;) {
 		for (;
-		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     entno < be16_to_cpu(par_node->hdr.count) &&
 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
 		     entno++)
 			continue;
-		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
+		if (entno < be16_to_cpu(par_node->hdr.count))
 			break;
 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
 		xfs_da_brelse(tp, par_buf);
@@ -1903,7 +1902,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		par_node = par_buf->data;
 		if (unlikely(
-		    INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
+		    be16_to_cpu(par_node->hdr.level) != level ||
 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 					 XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6343c3a4dbae..243a730d5ec8 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -65,8 +65,8 @@ typedef struct xfs_da_blkinfo {
 typedef struct xfs_da_intnode {
 	struct xfs_da_node_hdr {	/* constant-structure header block */
 		xfs_da_blkinfo_t info;	/* block type, links, etc. */
-		__uint16_t count;	/* count of active entries */
-		__uint16_t level;	/* level above leaves (leaf == 0) */
+		__be16	count;		/* count of active entries */
+		__be16	level;		/* level above leaves (leaf == 0) */
 	} hdr;
 	struct xfs_da_node_entry {
 		__be32	hashval;	/* hash value for this descendant */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 8d1975a541b1..9cc702a839a3 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -953,13 +953,13 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 				break;
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
-			for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); btree++, i++) {
+			for (i = 0; i < be16_to_cpu(node->hdr.count); btree++, i++) {
 				if (be32_to_cpu(btree->hashval) >= cookhash) {
 					bno = be32_to_cpu(btree->before);
 					break;
 				}
 			}
-			if (i == INT_GET(node->hdr.count, ARCH_CONVERT)) {
+			if (i == be16_to_cpu(node->hdr.count)) {
 				xfs_da_brelse(trans, bp);
 				xfs_dir_trace_g_du("node: hash beyond EOF",
 							  dp, uio);
@@ -1118,7 +1118,7 @@ void
 xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 			xfs_da_intnode_t *node)
 {
-	int	last = INT_GET(node->hdr.count, ARCH_CONVERT) - 1;
+	int	last = be16_to_cpu(node->hdr.count) - 1;
 
 	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUN, where,
 		     (void *)dp, (void *)dp->i_mount,
@@ -1127,7 +1127,7 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)(unsigned long)uio->uio_resid,
 		     (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
 		     (void *)(unsigned long)
-			INT_GET(node->hdr.count, ARCH_CONVERT),
+			be16_to_cpu(node->hdr.count),
 		     (void *)(unsigned long)
 			be32_to_cpu(node->btree[0].hashval),
 		     (void *)(unsigned long)
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 3c58834fa6a7..ac9ac700acf7 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -743,10 +743,12 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	node = bp1->data;
 	leaf = bp2->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+	node->btree[0].hashval = cpu_to_be32(
+		INT_GET(leaf->entries[
+			INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
 	node->btree[0].before = cpu_to_be32(blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+	node->hdr.count = cpu_to_be16(1);
 	xfs_da_log_buf(args->trans, bp1,
 		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
 	xfs_da_buf_done(bp1);
-- 
cgit v1.2.3


From b2fc6ad01beb550f75457b7d811ff84dc81b210b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:30:01 +1100
Subject: [XFS] remove bogus INT_GET for u8 variables in xfs_dir_leaf.c

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25506a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir_leaf.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index ac9ac700acf7..ee88751c3be6 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -176,7 +176,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    args->name[0] == sfe->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0)
@@ -193,7 +193,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
 	sfe->namelen = args->namelen;
 	memcpy(sfe->name, args->name, sfe->namelen);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
+	sf->hdr.count++;
 
 	dp->i_d.di_size += size;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -227,7 +227,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 	base = sizeof(xfs_dir_sf_hdr_t);
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
@@ -245,7 +245,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 		memmove(&((char *)sf)[base], &((char *)sf)[base+size],
 					      dp->i_d.di_size - (base+size));
 	}
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+	sf->hdr.count--;
 
 	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
 	dp->i_d.di_size -= size;
@@ -288,7 +288,7 @@ xfs_dir_shortform_lookup(xfs_da_args_t *args)
 		return(XFS_ERROR(EEXIST));
 	}
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0) {
@@ -375,7 +375,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
 		goto out;
 
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0; i < sf->hdr.count; i++) {
 		args.name = (char *)(sfe->name);
 		args.namelen = sfe->namelen;
 		args.hashval = xfs_da_hashname((char *)(sfe->name),
@@ -428,7 +428,7 @@ xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
 	want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
-	nsbuf = INT_GET(sf->hdr.count, ARCH_CONVERT) + 2;
+	nsbuf = sf->hdr.count + 2;
 	sbsize = (nsbuf + 1) * sizeof(*sbuf);
 	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
 
@@ -460,8 +460,7 @@ xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
 	/*
 	 * Scan the directory data for the rest of the entries.
 	 */
-	for (i = 0, sfe = &sf->list[0];
-			i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 
 		if (unlikely(
 		    ((char *)sfe < (char *)sf) ||
@@ -600,7 +599,7 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
 	}
 	ASSERT(args->namelen != 1 || args->name[0] != '.');
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0) {
-- 
cgit v1.2.3


From 6cc8fef4cbeb0b65d225d7b599c75eb5b40a6534 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Mon, 20 Mar 2006 13:25:48 +1100
Subject: [XFS] Fix compiler warning from xfs_file_compat_invis_ioctl
 prototype.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25509a

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 8bdb33ffc032..02de6e62ee37 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -19,6 +19,6 @@
 #define __XFS_IOCTL32_H__
 
 extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
-extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned);
+extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long);
 
 #endif /* __XFS_IOCTL32_H__ */
-- 
cgit v1.2.3


From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:03 -0500
Subject: NFS: Fix a busy inodes issue...

The nfs_open_context may live longer than the file descriptor that spawned
it, so it needs to carry a reference to the vfsmount. If not, then
generic_shutdown_super() may end up being called before reads and writes
have been flushed out.

Make a couple of functions static while we're at it...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 10 ++++++----
 include/linux/nfs_fs.h |  4 +---
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95b7efb..8d5b6691ae3d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -973,7 +973,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	return err;
 }
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
 	struct nfs_open_context *ctx;
 
@@ -981,6 +981,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 	if (ctx != NULL) {
 		atomic_set(&ctx->count, 1);
 		ctx->dentry = dget(dentry);
+		ctx->vfsmnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
@@ -1011,6 +1012,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
 		if (ctx->cred != NULL)
 			put_rpccred(ctx->cred);
 		dput(ctx->dentry);
+		mntput(ctx->vfsmnt);
 		kfree(ctx);
 	}
 }
@@ -1019,7 +1021,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1053,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	return ctx;
 }
 
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1078,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+	ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b4dc6e2e10c9..1161725d75e1 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -78,6 +78,7 @@ struct nfs_access_entry {
 struct nfs4_state;
 struct nfs_open_context {
 	atomic_t count;
+	struct vfsmount *vfsmnt;
 	struct dentry *dentry;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -311,12 +312,9 @@ extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern void nfs_file_clear_open_context(struct file *filp);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
-- 
cgit v1.2.3


From cd52ed35535ef443f08bf5cd3331d350272885b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:04 -0500
Subject: NFS: Avoid races between writebacks and truncation

Currently, there is no serialisation between NFS asynchronous writebacks
and truncation at the page level due to the fact that nfs_sync_inode()
cannot lock the pages that it is about to write out.

This means that it is possible to be flushing out data (and calling something
like set_page_writeback()) while the page cache is busy evicting the page.
Oops...

Use the hooks provided in try_to_release_page() to ensure that dirty pages
are always written back to storage before we evict them.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c     | 13 +++++++++++++
 fs/nfs/pagelist.c | 10 ++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f539..387809f2d188 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -316,6 +316,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 	return status;
 }
 
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+	/* FIXME: we really should cancel any unstarted writes on this page */
+	return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+	return !nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -324,6 +335,8 @@ struct address_space_operations nfs_file_aops = {
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
 	.commit_write = nfs_commit_write,
+	.invalidatepage = nfs_invalidate_page,
+	.releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e2..d6e076c9dbe1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+	BUG_ON(page->mapping->host != inode);
+	SetPagePrivate(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -147,8 +151,10 @@ void nfs_clear_page_writeback(struct nfs_page *req)
  */
 void nfs_clear_request(struct nfs_page *req)
 {
-	if (req->wb_page) {
-		page_cache_release(req->wb_page);
+	struct page *page = req->wb_page;
+	if (page != NULL) {
+		ClearPagePrivate(page);
+		page_cache_release(page);
 		req->wb_page = NULL;
 	}
 }
-- 
cgit v1.2.3


From 1dd594b21b2d98e56f2b1fe92bb222276b28de41 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Mon, 20 Mar 2006 13:44:04 -0500
Subject: NFS: Fix buglet in fs/nfs/write.c

I've been reading through fs/nfs/write.c trying to track down a bug
that seems to be related to pages loosing a refcount and getting
freed too early (you interested in detail??) and I spotted a little
bug which the following patch should fix.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b6835509..d6ad449041eb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -653,8 +653,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				spin_unlock(&nfsi->req_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
-				if (error < 0)
+				if (error < 0) {
+					if (new)
+						nfs_release_request(new);
 					return ERR_PTR(error);
+				}
 				continue;
 			}
 			spin_unlock(&nfsi->req_lock);
-- 
cgit v1.2.3


From 47831f35b83e43c804215712dd0c834c92e8a441 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:05 -0500
Subject: VFS: Fix __posix_lock_file() copy of private lock area

The struct file_lock->fl_u area must be copied using the fl_copy_lock()
operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d0..d2c5306e3db0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -153,6 +153,21 @@ static struct file_lock *locks_alloc_lock(void)
 	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 }
 
+static void locks_release_private(struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_release_private)
+			fl->fl_ops->fl_release_private(fl);
+		fl->fl_ops = NULL;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_release_private)
+			fl->fl_lmops->fl_release_private(fl);
+		fl->fl_lmops = NULL;
+	}
+
+}
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -169,18 +184,7 @@ static void locks_free_lock(struct file_lock *fl)
 	if (!list_empty(&fl->fl_link))
 		panic("Attempting to free lock on active lock list");
 
-	if (fl->fl_ops) {
-		if (fl->fl_ops->fl_release_private)
-			fl->fl_ops->fl_release_private(fl);
-		fl->fl_ops = NULL;
-	}
-
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_release_private)
-			fl->fl_lmops->fl_release_private(fl);
-		fl->fl_lmops = NULL;
-	}
-
+	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -218,11 +222,27 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
 	locks_init_lock(lock);
 }
 
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_copy_lock)
+			fl->fl_ops->fl_copy_lock(new, fl);
+		new->fl_ops = fl->fl_ops;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_copy_lock)
+			fl->fl_lmops->fl_copy_lock(new, fl);
+		new->fl_lmops = fl->fl_lmops;
+	}
+}
+
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
+	locks_release_private(new);
+
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
 	new->fl_file = fl->fl_file;
@@ -232,10 +252,8 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 	new->fl_end = fl->fl_end;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
-	if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
-		fl->fl_ops->fl_copy_lock(new, fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
-		fl->fl_lmops->fl_copy_lock(new, fl);
+
+	locks_copy_private(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -904,7 +922,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 				fl->fl_start = request->fl_start;
 				fl->fl_end = request->fl_end;
 				fl->fl_type = request->fl_type;
-				fl->fl_u = request->fl_u;
+				locks_release_private(fl);
+				locks_copy_private(fl, request);
 				request = fl;
 				added = 1;
 			}
-- 
cgit v1.2.3


From 36943fa4b2701b9ef2d60084c85ecbe634aec252 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:05 -0500
Subject: NLM: nlm_alloc_call should not immediately fail on signal

Currently, nlm_alloc_call tests for a signal before it even tries to
allocate memory.
Fix it so that it tries at least once.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa337..615a988a92a7 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -291,14 +291,15 @@ nlmclnt_alloc_call(void)
 {
 	struct nlm_rqst	*call;
 
-	while (!signalled()) {
-		call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
-		if (call) {
-			memset(call, 0, sizeof(*call));
+	for(;;) {
+		call = kzalloc(sizeof(*call), GFP_KERNEL);
+		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
 			return call;
 		}
+		if (signalled())
+			break;
 		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
-- 
cgit v1.2.3


From 7bab377fcb495ee2e5a1cd69d235f8d84c76e3af Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:06 -0500
Subject: lockd: Don't expose the process pid to the NLM server

Instead we use the nlm_lockowner->pid.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c       | 10 +++++++++-
 fs/lockd/clntproc.c       |  7 +++++--
 fs/lockd/svclock.c        |  1 +
 fs/lockd/xdr.c            | 13 ++++++++-----
 fs/lockd/xdr4.c           | 17 ++++++++++-------
 include/linux/lockd/xdr.h |  1 +
 6 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b8..8ae79ae4b998 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -125,7 +125,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		struct file_lock *fl_blocked = block->b_lock;
 
-		if (!nlm_compare_locks(fl_blocked, fl))
+		if (fl_blocked->fl_start != fl->fl_start)
+			continue;
+		if (fl_blocked->fl_end != fl->fl_end)
+			continue;
+		/*
+		 * Careful! The NLM server will return the 32-bit "pid" that
+		 * we put on the wire: in this case the lockowner "pid".
+		 */
+		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
 		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
 			continue;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 615a988a92a7..acc3eb13a02b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,8 +132,10 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = system_utsname.nodename;
 	lock->oh.data = req->a_owner;
-	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->pid, system_utsname.nodename);
+	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
+				system_utsname.nodename);
+	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	locks_copy_lock(&lock->fl, fl);
 }
 
@@ -159,6 +161,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 	/* set default data area */
 	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
 
 	if (lock->oh.len > NLMCLNT_OHSIZE) {
 		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a2..a525a141dd3b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -397,6 +397,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				(long long)fl->fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
+		conflock->svid = fl->fl_pid;
 		conflock->fl = *fl;
 		return nlm_lck_denied;
 	}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 200fbda2c6d1..1e984ab14d3f 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm_decode_fh(p, &lock->fh))
 	 || !(p = nlm_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
 	else
 		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 	*p++ = htonl(start);
 	*p++ = htonl(len);
 
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a5303..906ddc203186 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm4_decode_fh(p, &lock->fh))
 	 || !(p = nlm4_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
 	 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
 		return NULL;
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 
 	start = loff_t_to_s64(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		
 		p = xdr_encode_hyper(p, start);
 		p = xdr_encode_hyper(p, len);
-		dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
-			resp->status, fl->fl_pid, fl->fl_type,
+		dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+			resp->status, (int)resp->lock.svid, fl->fl_type,
 			(long long)fl->fl_start,  (long long)fl->fl_end);
 	}
 
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d7a5cc4cfa97..bb0a0f1caa91 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -28,6 +28,7 @@ struct nlm_lock {
 	int			len; 	/* length of "caller" */
 	struct nfs_fh		fh;
 	struct xdr_netobj	oh;
+	u32			svid;
 	struct file_lock	fl;
 };
 
-- 
cgit v1.2.3


From 755c1e20cd2ad56e5c567fa05769eb98a3eef72b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:06 -0500
Subject: NFS: writes should not clobber utimes() calls

Ensure that we flush out writes in the case when someone calls utimes() in
order to set the file times.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8d5b6691ae3d..5746dc841a6c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -859,11 +859,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	lock_kernel();
 	nfs_begin_data_update(inode);
-	/* Write all dirty data if we're changing file permissions or size */
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-		filemap_write_and_wait(inode->i_mapping);
-		nfs_wb_all(inode);
-	}
+	/* Write all dirty data */
+	filemap_write_and_wait(inode->i_mapping);
+	nfs_wb_all(inode);
 	/*
 	 * Return any delegations if we're going to change ACLs
 	 */
-- 
cgit v1.2.3


From ca62b9c3f7b8679ada4de94d2ab7098c6860c3d7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:07 -0500
Subject: NFSv4: Don't invalidate cached attributes if change attribute is
 unchanged

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5746dc841a6c..0e1ef97bcf54 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1299,34 +1299,35 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 
+	/* Has the inode gone and changed behind our back? */
+	if (nfsi->fileid != fattr->fileid
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+		return -EIO;
+	}
+
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-			nfsi->change_attr != fattr->change_attr) {
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+		if (nfsi->change_attr == fattr->change_attr)
+			goto out;
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
-	/* Has the inode gone and changed behind our back? */
-	if (nfsi->fileid != fattr->fileid
-			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-		return -EIO;
-	}
-
-	cur_size = i_size_read(inode);
- 	new_isize = nfs_size_to_loff_t(fattr->size);
-
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
+
+	cur_size = i_size_read(inode);
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
 	if (cur_size != new_isize) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (nfsi->npages == 0)
@@ -1343,6 +1344,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (inode->i_nlink != fattr->nlink)
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
+out:
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
@@ -1481,15 +1483,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfsi->cache_change_attribute = jiffies;
 	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4)
-	    && nfsi->change_attr != fattr->change_attr) {
-		dprintk("NFS: change_attr change on server for file %s/%ld\n",
-		       inode->i_sb->s_id, inode->i_ino);
-		nfsi->change_attr = fattr->change_attr;
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = jiffies;
-	}
-
 	/* If ctime has changed we should definitely clear access+acl caches */
 	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
 		invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,6 +1512,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  		inode->i_blksize = fattr->du.nfs2.blocksize;
  	}
 
+	if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+		if (nfsi->change_attr != fattr->change_attr) {
+			dprintk("NFS: change_attr change on server for file %s/%ld\n",
+					inode->i_sb->s_id, inode->i_ino);
+			nfsi->change_attr = fattr->change_attr;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			nfsi->cache_change_attribute = jiffies;
+		} else
+			invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+	}
+
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
-- 
cgit v1.2.3


From c8d149f3dbd582a101aa7da7bdd6c3316efd11b4 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Mon, 20 Mar 2006 13:44:07 -0500
Subject: NFS: "const static" vs "static const" in nfs4

My previous "const static" vs "static const" cleanup missed a single case,
patch below takes care of it.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e1..305bea201cd3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2958,7 +2958,7 @@ static void nfs4_delegreturn_release(void *calldata)
 	kfree(calldata);
 }
 
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
 	.rpc_call_prepare = nfs4_delegreturn_prepare,
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
-- 
cgit v1.2.3


From fb374d24f225f38f13dbffb65dd7ec72daf08dba Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:08 -0500
Subject: NFS: reduce the number of false cache invalidations.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0e1ef97bcf54..24988f430e4b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1328,11 +1328,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 	cur_size = i_size_read(inode);
  	new_isize = nfs_size_to_loff_t(fattr->size);
-	if (cur_size != new_isize) {
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-		if (nfsi->npages == 0)
-			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-	}
+	if (cur_size != new_isize && nfsi->npages == 0)
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
-- 
cgit v1.2.3


From 12de3b35ea549c5819f287508d7afab0bf3ac44d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:09 -0500
Subject: SUNRPC: Ensure that rpc_mkpipe returns a refcounted dentry

If not, we cannot guarantee that idmap->idmap_dentry, gss_auth->dentry and
clnt->cl_dentry are valid dentries.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c                 | 2 ++
 net/sunrpc/auth_gss/auth_gss.c | 2 ++
 net/sunrpc/clnt.c              | 3 +++
 net/sunrpc/rpc_pipe.c          | 4 ++--
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333b..32c95a0d93f3 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -132,6 +132,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
+	dput(idmap->idmap_dentry);
+	idmap->idmap_dentry = NULL;
 	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bb46efd92e57..900ef31f5a0e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	rpc_unlink(gss_auth->path);
+	dput(gss_auth->dentry);
+	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
 
 	rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d78479782045..a6d7c332dd49 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -240,6 +240,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
+	dget(new->cl_dentry);
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
@@ -314,6 +315,8 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
 out_free:
+	if (clnt->cl_dentry)
+		dput(clnt->cl_dentry);
 	kfree(clnt);
 	return 0;
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 567abbe25bc8..72b22172f0af 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -669,7 +669,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 out:
 	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
-	return dentry;
+	return dget(dentry);
 err_depopulate:
 	rpc_depopulate(dentry);
 	__rpc_rmdir(dir, dentry);
@@ -733,7 +733,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
 out:
 	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
-	return dentry;
+	return dget(dentry);
 err_dput:
 	dput(dentry);
 	dentry = ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From 967b9281361481aecf323563886ef972ee88c681 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:09 -0500
Subject: NFSv4: Do not call rpciod_down() before call to destroy_nfsv4_state()

The reason is that the idmapper cleanup may call flush_workqueue() on
rpciod_workqueue.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 24988f430e4b..b81149eb26e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2025,10 +2025,11 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	rpciod_down();		/* release rpciod */
 
 	destroy_nfsv4_state(server);
 
+	rpciod_down();
+
 	kfree(server->hostname);
 	kfree(server);
 }
-- 
cgit v1.2.3


From a162a6b804b48c605d1fd35e1861a5d32d00ad3f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:10 -0500
Subject: NFSv4: Kill braindead gcc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nfs4_open_revalidate: 'res' may be used uninitialized
nfs4_callback_compound: ‘hdr_res.nops’ may be used uninitialized
			'op_nr’ may be used uninitialized
encode_getattr_res: ‘savep’ may be used uninitialized

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_xdr.c | 28 ++++++++++++++++------------
 fs/nfs/nfs4proc.c     |  2 +-
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a94..05c38cf40b69 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
 
 static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-	uint32_t *savep;
+	uint32_t *savep = NULL;
 	unsigned status = res->status;
 	
 	if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
-	struct callback_op *op;
-	unsigned int op_nr;
+	struct callback_op *op = &callback_ops[0];
+	unsigned int op_nr = OP_CB_ILLEGAL;
 	unsigned int status = 0;
 	long maxlen;
 	unsigned res;
 
 	dprintk("%s: start\n", __FUNCTION__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (unlikely(status != 0)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-		status = htonl(NFS4ERR_OP_ILLEGAL);
-	} else
-		op = &callback_ops[op_nr];
+	if (likely(status == 0)) {
+		switch (op_nr) {
+			case OP_CB_GETATTR:
+			case OP_CB_RECALL:
+				op = &callback_ops[op_nr];
+				break;
+			default:
+				op_nr = OP_CB_ILLEGAL;
+				op = &callback_ops[0];
+				status = htonl(NFS4ERR_OP_ILLEGAL);
+		}
+	}
 
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
+	hdr_res.nops = NULL;
 	encode_compound_hdr_res(&xdr_out, &hdr_res);
 
 	for (;;) {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 305bea201cd3..77a565eba562 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -908,7 +908,7 @@ out_put_state_owner:
 static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
-	struct nfs4_state *res;
+	struct nfs4_state *res = ERR_PTR(-EIO);
 	int err;
 
 	do {
-- 
cgit v1.2.3


From bd6475454c774bd9dbe6078d94bbf72b1d3b65f4 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Mon, 20 Mar 2006 13:44:10 -0500
Subject: NFS: kzalloc conversion in fs/nfs

this converts fs/nfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c  | 4 +---
 fs/nfs/inode.c  | 6 ++----
 fs/nfs/unlink.c | 3 +--
 fs/nfs/write.c  | 6 ++----
 4 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 32c95a0d93f3..b89d27f93d66 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,11 +101,9 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	if (clp->cl_idmap != NULL)
 		return;
-        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return;
 
-	memset(idmap, 0, sizeof(*idmap));
-
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b81149eb26e5..521d1dcb4cf0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1638,10 +1638,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 #endif /* CONFIG_NFS_V3 */
 
 	s = ERR_PTR(-ENOMEM);
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		goto out_err;
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1942,10 +1941,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 		return ERR_PTR(-EINVAL);
 	}
 
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		return ERR_PTR(-ENOMEM);
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d558..0e28189c2151 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		goto out;
-	memset(data, 0, sizeof(*data));
 
 	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
 	if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d6ad449041eb..92ecf24455c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -100,10 +100,8 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 			p->pagevec = &p->page_array[0];
 		else {
 			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
+			p->pagevec = kzalloc(size, GFP_NOFS);
+			if (!p->pagevec) {
 				mempool_free(p, nfs_commit_mempool);
 				p = NULL;
 			}
-- 
cgit v1.2.3


From c9d5128a10a4974f72674ff3463da4db439e8b04 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Mar 2006 13:44:11 -0500
Subject: NFS: sem2mutex idmap.c

semaphore to mutex conversion.

the conversion was generated via scripts, and the result was validated
automatically via a script as well.

build and boot tested.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b89d27f93d66..3fab5b0cfc5a 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
-	struct semaphore      idmap_lock;    /* Serializes upcalls */
-	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct mutex          idmap_lock;    /* Serializes upcalls */
+	struct mutex          idmap_im_lock; /* Protects the hashtable */
 	struct idmap_hashtable idmap_user_hash;
 	struct idmap_hashtable idmap_group_hash;
 };
@@ -114,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
 		return;
 	}
 
-        init_MUTEX(&idmap->idmap_lock);
-        init_MUTEX(&idmap->idmap_im_lock);
+        mutex_init(&idmap->idmap_lock);
+        mutex_init(&idmap->idmap_im_lock);
 	init_waitqueue_head(&idmap->idmap_wq);
 	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	if (namelen >= IDMAP_NAMESZ)
 		return -EINVAL;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_name(h, name, namelen);
 	if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return (ret);
 }
 
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
 	im = &idmap->idmap_im;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_id(h, id);
 	if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return ret;
 }
 
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         if (copy_from_user(&im_in, src, mlen) != 0)
 		return (-EFAULT);
 
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	ret = mlen;
 	im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
 	ret = mlen;
 out:
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	return ret;
 }
 
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 
 	if (msg->errno >= 0)
 		return;
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
 	wake_up(&idmap->idmap_wq);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 }
 
 /* 
-- 
cgit v1.2.3


From b4629fe2f094b719847f31be1ee5ab38300038b2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:12 -0500
Subject: VFS: New /proc file /proc/self/mountstats

Create a new file under /proc/self, called mountstats, where mounted file
systems can export information (configuration options, performance counters,
and so on).  Use a mechanism similar to /proc/mounts and s_ops->show_options.

This mechanism does not violate namespace security, and is safe to use while
other processes are unmounting file systems.

Thanks to Mike Waychison for his review and comments.

Test-plan:
Test concurrent mount/unmount operations while cat'ing /proc/self/mountstats.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/namespace.c     | 38 ++++++++++++++++++++++++++++++++++++++
 fs/proc/base.c     | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  1 +
 3 files changed, 78 insertions(+)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d6316..71e75bcf4d28 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+	struct vfsmount *mnt = v;
+	int err = 0;
+
+	/* device */
+	if (mnt->mnt_devname) {
+		seq_puts(m, "device ");
+		mangle(m, mnt->mnt_devname);
+	} else
+		seq_puts(m, "no device");
+
+	/* mount point */
+	seq_puts(m, " mounted on ");
+	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_putc(m, ' ');
+
+	/* file system type */
+	seq_puts(m, "with fstype ");
+	mangle(m, mnt->mnt_sb->s_type->name);
+
+	/* optional statistics */
+	if (mnt->mnt_sb->s_op->show_stats) {
+		seq_putc(m, ' ');
+		err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+	}
+
+	seq_putc(m, '\n');
+	return err;
+}
+
+struct seq_operations mountstats_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_vfsstat,
+};
+
 /**
  * may_umount_tree - check if a mount tree is busy
  * @mnt: root of mount tree
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568deb..8f1f49ceebec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
 	PROC_TGID_MAPS,
 	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
+	PROC_TGID_MOUNTSTATS,
 	PROC_TGID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
 	PROC_TID_MAPS,
 	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
+	PROC_TID_MOUNTSTATS,
 	PROC_TID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+	E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
 	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
 #endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
 	.poll		= mounts_poll,
 };
 
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &mountstats_op);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		struct namespace *namespace;
+		task_lock(task);
+		namespace = task->namespace;
+		if (namespace)
+			get_namespace(namespace);
+		task_unlock(task);
+
+		if (namespace)
+			m->private = namespace;
+		else {
+			seq_release(inode, file);
+			ret = -EINVAL;
+		}
+	}
+	return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+	.open		= mountstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+};
+
 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 			inode->i_fop = &proc_smaps_operations;
 			break;
 #endif
+		case PROC_TID_MOUNTSTATS:
+		case PROC_TGID_MOUNTSTATS:
+			inode->i_fop = &proc_mountstats_operations;
+			break;
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 128d0082522c..be21e860a9f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1086,6 +1086,7 @@ struct super_operations {
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
+	int (*show_stats)(struct seq_file *, struct vfsmount *);
 
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
-- 
cgit v1.2.3


From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:12 -0500
Subject: NFS: show retransmit settings when displaying mount options

Sometimes it's important to know the exact RPC retransmit settings the
kernel is using for an NFS mount point.  Add this facility to the NFS
client's show_options method.

Test plan:
Set various retransmit settings via the mount command, and check that the
settings are reflected in /proc/mounts.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c            | 8 ++++++++
 include/linux/nfs_fs_sb.h | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 521d1dcb4cf0..48683d4bf0f6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -396,6 +396,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -629,6 +632,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
+	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", nfss->retrans_count);
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
 	return 0;
@@ -1800,6 +1805,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3d3a305488cf..a522ab97358d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -26,6 +26,8 @@ struct nfs_server {
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
+	unsigned long		retrans_timeo;	/* retransmit timeout */
+	unsigned int		retrans_count;	/* number of retransmit tries */
 	unsigned int		namelen;
 	char *			hostname;	/* remote hostname */
 	struct nfs_fh		fh;
-- 
cgit v1.2.3


From c8bded96aa8735823e53c95a26177987ebb19a90 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:13 -0500
Subject: NFS: clean up some mount options

Get rid of "lock" and "posix", and spell out "vers=".

Test plan:
None.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 48683d4bf0f6..827d69255b1b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -591,10 +591,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
-		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
-		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
@@ -603,7 +602,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	char buf[12];
 	char *proto;
 
-	seq_printf(m, ",v%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ)
-- 
cgit v1.2.3


From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:13 -0500
Subject: NFS: introduce mechanism for tracking NFS client metrics

Add a per-superblock performance counter facility to the NFS client.  This
facility mimics the counters available for block devices and for
networking.  Expose these new counters via the new /proc/self/mountstats
interface.

Thanks to Andrew Morton and Trond Myklebust for their review and comments.

Test plan:
fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c            | 103 +++++++++++++++++++++++++++++--
 fs/nfs/iostat.h           | 152 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h |   3 +
 3 files changed, 252 insertions(+), 6 deletions(-)
 create mode 100644 fs/nfs/iostat.h

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 827d69255b1b..86b756f44e27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -42,6 +42,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
@@ -65,6 +66,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
@@ -78,6 +80,7 @@ static struct super_operations nfs_sops = {
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -290,6 +293,12 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		no_root_error = -ENOMEM;
+		goto out_no_root;
+	}
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -582,7 +591,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 
 }
 
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
 	static struct proc_nfs_info {
 		int flag;
@@ -598,20 +607,19 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
 	char buf[12];
 	char *proto;
 
 	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
-	if (nfss->acregmin != 3*HZ)
+	if (nfss->acregmin != 3*HZ || showdefaults)
 		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
-	if (nfss->acregmax != 60*HZ)
+	if (nfss->acregmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
-	if (nfss->acdirmin != 30*HZ)
+	if (nfss->acdirmin != 30*HZ || showdefaults)
 		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
-	if (nfss->acdirmax != 60*HZ)
+	if (nfss->acdirmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
@@ -633,8 +641,89 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",proto=%s", proto);
 	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
 	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+	nfs_show_mount_options(m, nfss, 0);
+
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
+
+	return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+	int i, cpu;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct rpc_auth *auth = nfss->client->cl_auth;
+	struct nfs_iostats totals = { };
+
+	seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+	/*
+	 * Display all mount option settings
+	 */
+	seq_printf(m, "\n\topts:\t");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+	nfs_show_mount_options(m, nfss, 1);
+
+	seq_printf(m, "\n\tcaps:\t");
+	seq_printf(m, "caps=0x%x", nfss->caps);
+	seq_printf(m, ",wtmult=%d", nfss->wtmult);
+	seq_printf(m, ",dtsize=%d", nfss->dtsize);
+	seq_printf(m, ",bsize=%d", nfss->bsize);
+	seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+	if (nfss->rpc_ops->version == 4) {
+		seq_printf(m, "\n\tnfsv4:\t");
+		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+	}
+#endif
+
+	/*
+	 * Display security flavor in effect for this mount
+	 */
+	seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+	if (auth->au_flavor)
+		seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+	/*
+	 * Display superblock I/O counters
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct nfs_iostats *stats;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		preempt_disable();
+		stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+		for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+			totals.events[i] += stats->events[i];
+		for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+			totals.bytes[i] += stats->bytes[i];
+
+		preempt_enable();
+	}
+
+	seq_printf(m, "\n\tevents:\t");
+	for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+		seq_printf(m, "%lu ", totals.events[i]);
+	seq_printf(m, "\n\tbytes:\t");
+	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+		seq_printf(m, "%Lu ", totals.bytes[i]);
+
 	return 0;
 }
 
@@ -1742,6 +1831,7 @@ static struct super_operations nfs4_sops = {
 	.clear_inode	= nfs4_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -2015,6 +2105,7 @@ out_err:
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
+	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	return s;
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 000000000000..dc080e50ec57
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,152 @@
+/*
+ *  linux/fs/nfs/iostat.h
+ *
+ *  Declarations for NFS client per-mount statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the health of
+ *  the NFS client and the health of each NFS mount point.  Generally these
+ *  are not for detailed problem diagnosis, but simply to indicate that there
+ *  is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS		"1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written to the
+ *     server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications via
+ *     the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files opened
+ *     with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client.  Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface.  A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+	NFSIOS_NORMALREADBYTES = 0,
+	NFSIOS_NORMALWRITTENBYTES,
+	NFSIOS_DIRECTREADBYTES,
+	NFSIOS_DIRECTWRITTENBYTES,
+	NFSIOS_SERVERREADBYTES,
+	NFSIOS_SERVERWRITTENBYTES,
+	NFSIOS_READPAGES,
+	NFSIOS_WRITEPAGES,
+	__NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging.  The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches.  This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+	NFSIOS_INODEREVALIDATE = 0,
+	NFSIOS_DENTRYREVALIDATE,
+	NFSIOS_DATAINVALIDATE,
+	NFSIOS_ATTRINVALIDATE,
+	NFSIOS_VFSOPEN,
+	NFSIOS_VFSLOOKUP,
+	NFSIOS_VFSACCESS,
+	NFSIOS_VFSUPDATEPAGE,
+	NFSIOS_VFSREADPAGE,
+	NFSIOS_VFSREADPAGES,
+	NFSIOS_VFSWRITEPAGE,
+	NFSIOS_VFSWRITEPAGES,
+	NFSIOS_VFSGETDENTS,
+	NFSIOS_VFSSETATTR,
+	NFSIOS_VFSFLUSH,
+	NFSIOS_VFSFSYNC,
+	NFSIOS_VFSLOCK,
+	NFSIOS_VFSRELEASE,
+	NFSIOS_CONGESTIONWAIT,
+	NFSIOS_SETATTRTRUNC,
+	NFSIOS_EXTENDWRITE,
+	NFSIOS_SILLYRENAME,
+	NFSIOS_SHORTREAD,
+	NFSIOS_SHORTWRITE,
+	__NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+	unsigned long long	bytes[__NFSIOS_BYTESMAX];
+	unsigned long		events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats->events[stat] ++;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats->bytes[stat] += addend;
+	put_cpu_no_resched();
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+	return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+	free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a522ab97358d..d65e69a06b72 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
 #include <linux/list.h>
 #include <linux/backing-dev.h>
 
+struct nfs_iostats;
+
 /*
  * NFS client parameters stored in the superblock.
  */
@@ -12,6 +14,7 @@ struct nfs_server {
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
+	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
-- 
cgit v1.2.3


From 91d5b47023b608227d605d1e916b29dd0215bff7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:14 -0500
Subject: NFS: add I/O performance counters

Invoke the byte and event counter macros where we want to count bytes and
events.

Clean-up: fix a possible NULL dereference in nfs_lock, and simplify
nfs_file_open.

Test-plan:
fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c    |  8 ++++++++
 fs/nfs/direct.c |  7 +++++++
 fs/nfs/file.c   | 20 +++++++++-----------
 fs/nfs/inode.c  |  8 ++++++++
 fs/nfs/read.c   | 12 ++++++++++++
 fs/nfs/write.c  | 18 ++++++++++++++++++
 6 files changed, 62 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead692..151b8dd0ac3b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -507,6 +508,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct nfs_fattr fattr;
 	long		res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
 	lock_kernel();
 
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -713,6 +716,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	parent = dget_parent(dentry);
 	lock_kernel();
 	dir = parent->d_inode;
+	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
 	if (!inode) {
@@ -844,6 +848,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -1241,6 +1246,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
 		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1640,6 +1646,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct rpc_cred *cred;
 	int res = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
 	if (mask == 0)
 		goto out;
 	/* Is this sys_access() ? */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4e9b3a1b36c5..fc07ce4885da 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -54,6 +54,8 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
@@ -67,6 +69,7 @@ struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 	struct list_head	list;		/* nfs_read_data structs */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct inode *		inode;		/* target file of I/O */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 	atomic_t		complete,	/* i/os we're waiting for */
@@ -357,7 +360,9 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
 	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
 				 file_offset);
@@ -572,6 +577,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
                         return page_count;
                 }
 
+		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
 		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
 		nfs_free_user_pages(pages, page_count, 0);
@@ -581,6 +587,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
 				break;
 			return result;
 		}
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 		tot_bytes += result;
 		file_offset += result;
 		if (result < size)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 387809f2d188..1cf07e4ad136 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
-	int (*open)(struct inode *, struct file *);
 	int res;
 
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	/* Do NFSv4 open() call */
-	if ((open = server->rpc_ops->file_open) != NULL)
-		res = open(inode, filp);
+	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
 		filemap_fdatawrite(filp->f_mapping);
+	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
 
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	lock_kernel();
 	/* Ensure that data+attribute caches are up to date after close() */
 	status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_file(inode, iocb->ki_filp);
+	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	lock_kernel();
 	status = nfs_wb_all(inode);
 	if (!status) {
@@ -378,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	if (!count)
 		goto out;
 
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, buf, count, pos);
 out:
 	return result;
@@ -517,9 +520,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
-
-	if (!inode)
-		return -EINVAL;
+	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -544,9 +545,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
-	if (!inode)
-		return -EINVAL;
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 86b756f44e27..8ee74111e519 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -753,6 +753,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -940,6 +942,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct nfs_fattr fattr;
 	int error;
 
+	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
@@ -993,6 +997,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
 		vmtruncate(inode, attr->ia_size);
 	}
@@ -1278,6 +1283,7 @@ int nfs_attribute_timeout(struct inode *inode)
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1294,6 +1300,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		if (S_ISREG(inode->i_mode))
 			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
@@ -1615,6 +1622,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8e..ae3ddd24cf8f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,8 @@
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
@@ -133,6 +135,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		}
 		count -= result;
 		rdata->args.pgbase += result;
+		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
 		/* Note: result == 0 should only happen if we're caching
 		 * a write that extends the file and punches a hole.
 		 */
@@ -458,8 +462,11 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
 		task->tk_pid, status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
+
 	/* Is this a short read? */
 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Yes, so retry the read at the end of the data */
@@ -491,6 +498,9 @@ int nfs_readpage(struct file *file, struct page *page)
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page->index);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
 	/*
 	 * Try to flush any pending writes to the file..
 	 *
@@ -570,6 +580,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			nr_pages);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +593,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
 		if (!ret)
+			nfs_add_stats(inode, NFSIOS_READPAGES, err);
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 92ecf24455c3..e7c8361cf201 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -134,6 +135,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
 }
 
@@ -223,6 +225,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	        wdata->args.pgbase += result;
 		written += result;
 		count -= result;
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 	} while (count);
 	/* Update file length */
 	nfs_grow_file(page, offset, written);
@@ -279,6 +282,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	int priority = wb_priority(wbc);
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
 	/*
 	 * Note: We need to ensure that we have a reference to the inode
 	 *       if we are to do asynchronous writes. If not, waiting
@@ -343,6 +349,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	struct inode *inode = mapping->host;
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
@@ -354,6 +362,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	if (err < 0)
 		goto out;
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
@@ -596,6 +605,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 
 	if (!bdi_write_congested(bdi))
 		return 0;
+
+	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
 	if (intr) {
 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
 		sigset_t oldset;
@@ -749,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 	struct nfs_page	*req;
 	int		status = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name, count,
@@ -1152,6 +1166,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1177,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
+		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Was this an NFSv2 write or an NFSv3 stable write? */
-- 
cgit v1.2.3


From 006ea73e5fa82915d0ac7a3f15ee7c688433236d Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:14 -0500
Subject: NFS: add hooks to account for NFSERR_JUKEBOX errors

Make an inode or an nfs_server struct available in the logic that handles
JUKEBOX/DELAY type errors so the NFS client can account for them.

This patch is split out from the main nfs iostat patch to highlight minor
architectural changes required to support this statistic.

Test plan:
None.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/iostat.h   | 19 +++++++++++++++----
 fs/nfs/nfs3proc.c | 13 ++++++++-----
 fs/nfs/nfs4proc.c |  5 ++++-
 3 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index dc080e50ec57..7a7495153317 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -103,6 +103,7 @@ enum nfs_stat_eventcounters {
 	NFSIOS_SILLYRENAME,
 	NFSIOS_SHORTREAD,
 	NFSIOS_SHORTWRITE,
+	NFSIOS_DELAY,
 	__NFSIOS_COUNTSMAX,
 };
 
@@ -116,28 +117,38 @@ struct nfs_iostats {
 	unsigned long		events[__NFSIOS_COUNTSMAX];
 } ____cacheline_aligned;
 
-static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
 {
 	struct nfs_iostats *iostats;
 	int cpu;
 
 	cpu = get_cpu();
-	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->events[stat] ++;
 	put_cpu_no_resched();
 }
 
-static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
 {
 	struct nfs_iostats *iostats;
 	int cpu;
 
 	cpu = get_cpu();
-	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->bytes[stat] += addend;
 	put_cpu_no_resched();
 }
 
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
 static inline struct nfs_iostats *nfs_alloc_iostats(void)
 {
 	return alloc_percpu(struct nfs_iostats);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f0556..7204ba5b2bf8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 extern struct rpc_procinfo nfs3_procedures[];
@@ -58,10 +60,11 @@ nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, i
 		nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
 	if (task->tk_status != -EJUKEBOX)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -447,7 +450,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 	struct rpc_message *msg = &task->tk_msg;
 	struct nfs_fattr	*dir_attr;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, dir->d_inode))
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -748,7 +751,7 @@ static void nfs3_read_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
@@ -786,7 +789,7 @@ static void nfs3_write_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
@@ -833,7 +836,7 @@ static void nfs3_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 77a565eba562..f1ff4fa6cce5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -2755,8 +2756,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 				rpc_wake_up_task(task);
 			task->tk_status = 0;
 			return -EAGAIN;
-		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+			nfs_inc_server_stats((struct nfs_server *) server,
+						NFSIOS_DELAY);
+		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
-- 
cgit v1.2.3


From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:15 -0500
Subject: NFS: report how long an NFS file system has been mounted

Add a field in nfs_server to record a timestamp when a mount succeeds.
Report the number of seconds the file system has been mounted via
nfs_show_stats().

Test plan:
Mount an NFS file system, watch the mountstats reports and compare with
clock time.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c            | 5 +++++
 include/linux/nfs_fs_sb.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8ee74111e519..1b2d782374b5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -299,6 +299,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 		goto out_no_root;
 	}
 
+	/* mount time stamp, in seconds */
+	server->mount_time = jiffies;
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -674,6 +677,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
 	nfs_show_mount_options(m, nfss, 1);
 
+	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
 	seq_printf(m, "\n\tcaps:\t");
 	seq_printf(m, "caps=0x%x", nfss->caps);
 	seq_printf(m, ",wtmult=%d", nfss->wtmult);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d65e69a06b72..65dec21af774 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -35,6 +35,7 @@ struct nfs_server {
 	char *			hostname;	/* remote hostname */
 	struct nfs_fh		fh;
 	struct sockaddr_in	addr;
+	unsigned long		mount_time;	/* when this fs was mounted */
 #ifdef CONFIG_NFS_V4
 	/* Our own IP address, as a null-terminated string.
 	 * This is used to generate the clientid, and the callback address.
-- 
cgit v1.2.3


From 4ece3a2d18fd7fe1d4972284a8c98c569020093f Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:22 -0500
Subject: NFS: add RPC I/O statistics to /proc/self/mountstats

NFS client now shows various RPC I/O metrics in /proc/self/mountstats.

Test plan:
Mount/umount while doing "cat /proc/self/mountstats", multiple iterations
of connectathon locking suite.  Test with NFS version 2, 3, and 4.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1b2d782374b5..b9f35ca266c2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -728,6 +729,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, "\n\tbytes:\t");
 	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
 		seq_printf(m, "%Lu ", totals.bytes[i]);
+	seq_printf(m, "\n");
+
+	rpc_print_iostats(m, nfss->client);
 
 	return 0;
 }
-- 
cgit v1.2.3


From cc0175c1dc1de8f6af0eb0631dcc5b999a6fcc42 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:22 -0500
Subject: SUNRPC: display human-readable procedure name in rpc_iostats output

Add fields to the rpc_procinfo struct that allow the display of a
human-readable name for each procedure in the rpc_iostats output.

Also fix it so that the NFSv4 stats are broken up correctly by
sub-procedure number.  NFSv4 uses only two real RPC procedures:
NULL, and COMPOUND.

Test plan:
Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats".

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c              |  4 ++++
 fs/lockd/xdr.c              |  4 +++-
 fs/lockd/xdr4.c             |  4 +++-
 fs/nfs/mount_clnt.c         |  4 ++++
 fs/nfs/nfs2xdr.c            |  4 +++-
 fs/nfs/nfs3xdr.c            |  6 +++++-
 fs/nfs/nfs4xdr.c            |  2 ++
 fs/nfsd/nfs4callback.c      |  2 ++
 include/linux/sunrpc/clnt.h |  2 ++
 net/sunrpc/pmap_clnt.c      |  6 ++++++
 net/sunrpc/stats.c          | 14 ++++++++++++--
 11 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0edc03e67966..84ee39e6a3a2 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -214,12 +214,16 @@ static struct rpc_procinfo	nsm_procedures[] = {
 		.p_encode	= (kxdrproc_t) xdr_encode_mon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat_res,
 		.p_bufsiz	= MAX(SM_mon_sz, SM_monres_sz) << 2,
+		.p_statidx	= SM_MON,
+		.p_name		= "MONITOR",
 	},
 [SM_UNMON] = {
 		.p_proc		= SM_UNMON,
 		.p_encode	= (kxdrproc_t) xdr_encode_unmon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat,
 		.p_bufsiz	= MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+		.p_statidx	= SM_UNMON,
+		.p_name		= "UNMONITOR",
 	},
 };
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1e984ab14d3f..766ce06146b5 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -546,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlmclt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 906ddc203186..36eb175ec335 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -551,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm4_procedures[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f678f8..4a1340358223 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -137,6 +137,8 @@ static struct rpc_procinfo	mnt_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MNTPROC_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
@@ -146,6 +148,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MOUNTPROC3_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560c89c9..8cdc792ff3c7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
 	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
 	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
 	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
-	.p_timer    =  timer						\
+	.p_timer    =  timer,						\
+	.p_statidx  =  NFSPROC_##proc,					\
+	.p_name     =  #proc,						\
 	}
 struct rpc_procinfo	nfs_procedures[] = {
     PROC(GETATTR,	fhandle,	attrstat, 1),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b5012bce..2d8701a230f0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
 	.p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,			\
 	.p_decode    = (kxdrproc_t) nfs3_xdr_##restype,			\
 	.p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,	\
-	.p_timer     = timer						\
+	.p_timer     = timer,						\
+	.p_statidx   = NFS3PROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 struct rpc_procinfo	nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
 		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
 		.p_timer = 1,
+		.p_name = "GETACL",
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
 		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
 		.p_timer = 0,
+		.p_name = "SETACL",
 	},
 };
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef57785..b95675349ba3 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
 	.p_encode = (kxdrproc_t) nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t) nfs4_xdr_##restype,		\
 	.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+	.p_statidx = NFSPROC4_CLNT_##proc,			\
+	.p_name   = #proc,					\
     }
 
 struct rpc_procinfo	nfs4_procedures[] = {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d828662d737d..4f391cbf2fd1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
         .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
         .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+        .p_statidx = NFSPROC4_CB_##call,				\
+	.p_name   = #proc,                                              \
 }
 
 static struct rpc_procinfo     nfs4_cb_procedures[] = {
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 0f3662002ffc..3bec751ee249 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -101,6 +101,8 @@ struct rpc_procinfo {
 	unsigned int		p_bufsiz;	/* req. buffer size */
 	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
+	u32			p_statidx;	/* Which procedure to account */
+	char *			p_name;		/* name of procedure */
 };
 
 #define RPC_CONGESTED(clnt)	(RPCXPRT_CONGESTED((clnt)->cl_xprt))
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 332cc5d362d5..efa00bd9ff9e 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -261,6 +261,8 @@ static struct rpc_procinfo	pmap_procedures[] = {
 	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_SET,
+	  .p_name		= "SET",
 	},
 [PMAP_UNSET] = {
 	  .p_proc		= PMAP_UNSET,
@@ -268,6 +270,8 @@ static struct rpc_procinfo	pmap_procedures[] = {
 	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_UNSET,
+	  .p_name		= "UNSET",
 	},
 [PMAP_GETPORT] = {
 	  .p_proc		= PMAP_GETPORT,
@@ -275,6 +279,8 @@ static struct rpc_procinfo	pmap_procedures[] = {
 	  .p_decode		= (kxdrproc_t) xdr_decode_port,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_GETPORT,
+	  .p_name		= "GETPORT",
 	},
 };
 
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 24ac7163b9c7..53746793eca2 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -151,7 +151,7 @@ void rpc_count_iostats(struct rpc_task *task)
 
 	if (!stats || !req)
 		return;
-	op_metrics = &stats[task->tk_msg.rpc_proc->p_proc];
+	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
 
 	op_metrics->om_ops++;
 	op_metrics->om_ntrans += req->rq_ntrans;
@@ -176,6 +176,16 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_execute += execute;
 }
 
+void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
+{
+	if (procs[op].p_name)
+		seq_printf(seq, "\t%12s: ", procs[op].p_name);
+	else if (op == 0)
+		seq_printf(seq, "\t        NULL: ");
+	else
+		seq_printf(seq, "\t%12u: ", op);
+}
+
 #define MILLISECS_PER_JIFFY	(1000UL / HZ)
 
 void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
@@ -197,7 +207,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 	seq_printf(seq, "\tper-op statistics\n");
 	for (op = 0; op < maxproc; op++) {
 		struct rpc_iostats *metrics = &stats[op];
-		seq_printf(seq, "%12u: ", op);
+		_print_name(seq, op, clnt->cl_procinfo);
 		seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
 				metrics->om_ops,
 				metrics->om_ntrans,
-- 
cgit v1.2.3


From dead28da8e3fb32601d38fb32b7021122e0a3d21 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:23 -0500
Subject: SUNRPC: eliminate rpc_call()

Clean-up: replace rpc_call() helper with direct call to rpc_call_sync.

This makes NFSv2 and NFSv3 synchronous calls more computationally
efficient, and reduces stack consumption in functions that used to
invoke rpc_call more than once.

Test plan:
Compile kernel with CONFIG_NFS enabled.  Connectathon on NFS version 2,
version 3, and version 4 mount points.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c               |   7 ++-
 fs/nfs/mount_clnt.c          |  13 +++-
 fs/nfs/nfs3acl.c             |  16 +++--
 fs/nfs/nfs3proc.c            | 140 +++++++++++++++++++++++++++++++------------
 fs/nfs/proc.c                | 107 ++++++++++++++++++++++++++-------
 include/linux/sunrpc/clnt.h  |  14 -----
 include/linux/sunrpc/sched.h |   1 -
 net/sunrpc/pmap_clnt.c       |  34 +++++++----
 8 files changed, 237 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 84ee39e6a3a2..5dd52b70859a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	struct rpc_clnt	*clnt;
 	int		status;
 	struct nsm_args	args;
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= res,
+	};
 
 	clnt = nsm_create();
 	if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	args.proc = NLMPROC_NSM_NOTIFY;
 	memset(res, 0, sizeof(*res));
 
-	status = rpc_call(clnt, proc, &args, res, 0);
+	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	status = rpc_call_sync(clnt, &msg, 0);
 	if (status < 0)
 		printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
 			status);
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 4a1340358223..c44d87bdddb3 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
+	struct rpc_message msg	= {
+		.rpc_argp	= path,
+		.rpc_resp	= &result,
+	};
 	char			hostname[32];
 	int			status;
-	int			call;
 
 	dprintk("NFS:      nfs_mount(%08x:%s)\n",
 			(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	if (IS_ERR(mnt_clnt))
 		return PTR_ERR(mnt_clnt);
 
-	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
-	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	if (version == NFS_MNT3_VERSION)
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+	else
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	return status < 0? status : (result.status? -EACCES : 0);
 }
 
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae941..33287879bd23 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 	struct nfs3_getaclres res = {
 		.fattr =	&fattr,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+	};
 	struct posix_acl *acl;
 	int status, count;
 
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 		return NULL;
 
 	dprintk("NFS call getacl\n");
-	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
-			  &args, &res, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	dprintk("NFS reply getacl: %d\n", status);
 
 	/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		.acl_access = acl,
 		.pages = pages,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int status, count;
 
 	status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 
 	dprintk("NFS call setacl\n");
 	nfs_begin_data_update(inode);
-	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
-			  &args, &fattr, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
 	spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7204ba5b2bf8..740f8b1ab04d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -43,21 +43,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	return res;
 }
 
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
-		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
-		nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags)	nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
 nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
@@ -75,14 +61,21 @@ static int
 do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(client, &msg, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-		status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_resp = info->fattr;
+		status = rpc_call_sync(client, &msg, 0);
 		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
@@ -110,12 +103,16 @@ static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFS3PROC_GETATTR,
-			  fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -129,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh		= NFS_FH(inode),
 		.sattr		= sattr,
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -155,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
-	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
-		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
-			 fhandle, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_argp = fhandle;
+		msg.rpc_resp = fattr;
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	}
 	dprintk("NFS reply lookup: %d\n", status);
 	if (status >= 0)
 		status = nfs_refresh_inode(dir, &dir_attr);
@@ -183,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= entry->cred
+		.rpc_cred	= entry->cred,
 	};
 	int mode = entry->mask;
 	int status;
@@ -229,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
-			  &args, &fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
@@ -330,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -346,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_post_op_update_inode(dir, &dir_attr);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -477,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.fromattr	= &old_dir_attr,
 		.toattr		= &new_dir_attr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	nfs_fattr_init(&old_dir_attr);
 	nfs_fattr_init(&new_dir_attr);
-	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_post_op_update_inode(old_dir, &old_dir_attr);
 	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
@@ -503,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.dir_attr	= &dir_attr,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
@@ -534,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	if (path->len > NFS3_MAXPATHLEN)
@@ -541,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -563,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int mode = sattr->ia_mode;
 	int status;
 
@@ -572,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -594,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -675,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKNOD],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -693,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -710,11 +759,16 @@ static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsstat *stat)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSSTAT],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= stat,
+	};
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
 }
@@ -723,11 +777,16 @@ static int
 nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -736,11 +795,16 @@ static int
 nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 		   struct nfs_pathconf *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_PATHCONF],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03d..2b051ab8bea8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 {
 	struct nfs_fattr *fattr = info->fattr;
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
-	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+	msg.rpc_resp = &fsinfo;
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -90,12 +97,16 @@ static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFSPROC_GETATTR,
-				fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh	= NFS_FH(inode),
 		.sattr	= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READLINK],
+		.rpc_argp	= &args,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
 }
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int status, mode;
 
 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	}
 
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
 		nfs_fattr_init(&fattr);
-		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
 	struct rpc_message	msg = { 
 		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= NULL
 	};
 	int			status;
 
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.toname		= new_name->name,
 		.tolen		= new_name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RENAME],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_mark_for_revalidate(old_dir);
 	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.toname		= name->name,
 		.tolen		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_mark_for_revalidate(inode);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(fattr);
 	fhandle->size = 0;
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RMDIR],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
 		.count		= count,
-		.pages		= &page
+		.pages		= &page,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= cred
+		.rpc_cred	= cred,
 	};
 	int			status;
 
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsstat *stat)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  statfs\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
 		goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsinfo *info)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
 		goto out;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3bec751ee249..e37c06128e51 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -140,20 +140,6 @@ size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
-static __inline__
-int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-		.rpc_cred	= NULL
-	};
-	return rpc_call_sync(clnt, &msg, flags);
-}
-		
-extern void rpciod_wake_up(void);
-
 /*
  * Helper function for NFSroot support
  */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 45a64ae963ee..82a91bb22362 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -276,7 +276,6 @@ void *		rpc_malloc(struct rpc_task *, size_t);
 void		rpc_free(struct rpc_task *);
 int		rpciod_up(void);
 void		rpciod_down(void);
-void		rpciod_wake_up(void);
 int		__rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
 #ifdef RPC_DEBUG
 void		rpc_show_tasks(void);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index efa00bd9ff9e..d25b054ec921 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -104,6 +104,11 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 		.pm_prot	= prot,
 		.pm_port	= 0
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
+		.rpc_argp	= &map,
+		.rpc_resp	= &map.pm_port,
+	};
 	struct rpc_clnt	*pmap_clnt;
 	char		hostname[32];
 	int		status;
@@ -117,7 +122,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 		return PTR_ERR(pmap_clnt);
 
 	/* Setup the call info struct */
-	status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+	status = rpc_call_sync(pmap_clnt, &msg, 0);
 
 	if (status >= 0) {
 		if (map.pm_port != 0)
@@ -162,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
 int
 rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
-	struct sockaddr_in	sin;
-	struct rpc_portmap	map;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	struct rpc_portmap	map = {
+		.pm_prog	= prog,
+		.pm_vers	= vers,
+		.pm_prot	= prot,
+		.pm_port	= port,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
+		.rpc_argp	= &map,
+		.rpc_resp	= okay,
+	};
 	struct rpc_clnt		*pmap_clnt;
 	int error = 0;
 
 	dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
 			prog, vers, prot, port);
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 	pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
 	if (IS_ERR(pmap_clnt)) {
 		error = PTR_ERR(pmap_clnt);
@@ -179,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 		return error;
 	}
 
-	map.pm_prog = prog;
-	map.pm_vers = vers;
-	map.pm_prot = prot;
-	map.pm_port = port;
-
-	error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
-					&map, okay, 0);
+	error = rpc_call_sync(pmap_clnt, &msg, 0);
 
 	if (error < 0) {
 		printk(KERN_WARNING
-- 
cgit v1.2.3


From 1e7cb3dc12dbbac690d78c84f9c7cb11132ed121 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:24 -0500
Subject: NFS: directory trace messages

Reuse NFSDBG_DIRCACHE and NFSDBG_LOOKUPCACHE to provide additional
diagnostic messages that trace the operation of the NFS client's
directory behavior.  A few new messages are now generated when NFSDBG_VFS
is active, as well, to trace normal VFS activity.  This compromise
provides better trace debugging for those who use pre-built kernels,
without adding a lot of extra noise to the standard debug settings.

Test-plan:
Enable NFS trace debugging with flags 1, 2, or 4.  You should be able to
see different types of trace messages with each flag setting.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 96 +++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 151b8dd0ac3b..609185a15c99 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -130,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
 {
 	int res = 0;
 
+	dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+			inode->i_sb->s_id, inode->i_ino);
+
 	lock_kernel();
 	/* Call generic open code in order to cache credentials */
 	if (!res)
@@ -173,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	unsigned long	timestamp;
 	int		error;
 
-	dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+			__FUNCTION__, (long long)desc->entry->cookie,
+			page->index);
 
  again:
 	timestamp = jiffies;
@@ -245,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+				__FUNCTION__, (unsigned long long)entry->cookie);
 		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
@@ -253,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
 	return status;
 }
 
@@ -277,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 		if (status)
 			break;
 
-		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+		dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+				(unsigned long long)entry->cookie, desc->current_index);
 
 		if (desc->file->f_pos == desc->current_index) {
 			*desc->dir_cookie = entry->cookie;
@@ -289,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
 	return status;
 }
 
@@ -304,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	struct page	*page;
 	int		status;
 
-	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+	dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+			__FUNCTION__, desc->page_index,
+			(long long) *desc->dir_cookie);
 
 	page = read_cache_page(inode->i_mapping, desc->page_index,
 			       (filler_t *)nfs_readdir_filler, desc);
@@ -325,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	if (status < 0)
 		dir_page_release(desc);
  out:
-	dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
 	return status;
  read_error:
 	page_cache_release(page);
@@ -347,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 
 	/* Always search-by-index from the beginning of the cache */
 	if (*desc->dir_cookie == 0) {
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+				(long long)desc->file->f_pos);
 		desc->page_index = 0;
 		desc->entry->cookie = desc->entry->prev_cookie = 0;
 		desc->entry->eof = 0;
 		desc->current_index = 0;
 	} else
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+				(unsigned long long)*desc->dir_cookie);
 
 	for (;;) {
 		res = find_dirent_page(desc);
@@ -366,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
 	return res;
 }
 
@@ -391,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+			(unsigned long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -428,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+			(unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -454,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+			(unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
@@ -486,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->entry->cookie = desc->entry->prev_cookie = 0;
 	desc->entry->eof = 0;
  out:
-	dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+			__FUNCTION__, status);
 	return status;
  out_release:
 	dir_page_release(desc);
@@ -508,6 +522,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct nfs_fattr fattr;
 	long		res;
 
+	dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			(long long)filp->f_pos);
 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
 
 	lock_kernel();
@@ -569,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 	}
 	unlock_kernel();
-	if (res < 0)
-		return res;
-	return 0;
+	if (res > 0)
+		res = 0;
+	dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			res);
+	return res;
 }
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -602,6 +622,10 @@ out:
  */
 int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 {
+	dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			datasync);
+
 	return 0;
 }
 
@@ -726,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+				__FUNCTION__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
 		goto out_bad;
 	}
 
@@ -759,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
  out_valid:
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 1;
 out_zap_parent:
 	nfs_zap_caches(dir);
@@ -775,6 +803,9 @@ out_zap_parent:
 	d_drop(dentry);
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 0;
 }
 
@@ -917,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	struct dentry *res = NULL;
 	int error;
 
+	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;
@@ -1124,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	int error;
 	int open_flags = 0;
 
-	dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
@@ -1158,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1191,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct iattr attr;
 	int error;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
@@ -1217,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
@@ -1274,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 		sillycounter++;
 		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 
-		dfprintk(VFS, "trying to rename %s to %s\n",
-			 dentry->d_name.name, silly);
+		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+				dentry->d_name.name, silly);
 		
 		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
 		/*
@@ -1687,13 +1721,15 @@ force_lookup:
 		res = PTR_ERR(cred);
 	unlock_kernel();
 out:
+	dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+		inode->i_sb->s_id, inode->i_ino, mask, res);
 	return res;
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
 		res = generic_permission(inode, mask, NULL);
 	unlock_kernel();
-	return res;
+	goto out;
 }
 
 /*
-- 
cgit v1.2.3


From 15dadef9460ad8d3b1d5ede1c1697dc79af44a72 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:24 -0500
Subject: lockd: clean up nlmsvc_lock

Slightly more consistent dprintk error reporting, consolidate some up()'s.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a525a141dd3b..42dd105456c5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -300,6 +300,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	struct file_lock	*conflock;
 	struct nlm_block	*block;
 	int			error;
+	u32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -329,24 +330,28 @@ again:
 		dprintk("lockd: posix_lock_file returned %d\n", -error);
 		switch(-error) {
 		case 0:
-			return nlm_granted;
+			ret = nlm_granted;
+			goto out;
 		case EDEADLK:
-			return nlm_deadlock;
+			ret = nlm_deadlock;
+			goto out;
 		case EAGAIN:
-			return nlm_lck_denied;
+			ret = nlm_lck_denied;
+			goto out;
 		default:			/* includes ENOLCK */
-			return nlm_lck_denied_nolocks;
+			ret = nlm_lck_denied_nolocks;
+			goto out;
 		}
 	}
 
 	if (!wait) {
-		up(&file->f_sema);
-		return nlm_lck_denied;
+		ret = nlm_lck_denied;
+		goto out_unlock;
 	}
 
 	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		up(&file->f_sema);
-		return nlm_deadlock;
+		ret = nlm_deadlock;
+		goto out_unlock;
 	}
 
 	/* If we don't have a block, create and initialize it. Then
@@ -371,8 +376,12 @@ again:
 		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
 	}
 
+	ret = nlm_lck_blocked;
+out_unlock:
 	up(&file->f_sema);
-	return nlm_lck_blocked;
+out:
+	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+	return ret;
 }
 
 /*
@@ -535,8 +544,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
 		posix_block_lock(conflock, &lock->fl);
-		up(&file->f_sema);
-		return;
+		goto out_unlock;
 	}
 
 	/* Alright, no conflicting lock. Now lock it for real. If the
@@ -547,8 +555,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		up(&file->f_sema);
-		return;
+		goto out_unlock;
 	}
 
 callback:
@@ -565,6 +572,7 @@ callback:
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlm_release_host(block->b_call.a_host);
+out_unlock:
 	up(&file->f_sema);
 }
 
-- 
cgit v1.2.3


From 5de0e5024a4e21251fd80dbfdb83316ce97086bc Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:25 -0500
Subject: lockd: simplify nlmsvc_grant_blocked

Reorganize nlmsvc_grant_blocked() to make full use of posix_lock_file().  Note
that there's no need for separate calls to posix_test_lock(),
posix_locks_deadlock(), or posix_block_lock().

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 42dd105456c5..58bbfede94ec 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -519,7 +519,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
 	struct nlm_lock		*lock = &block->b_call.a_args.lock;
-	struct file_lock	*conflock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
@@ -539,19 +538,15 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
-	if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
-		/* Bummer, we blocked again */
+	error = posix_lock_file(file->f_file, &lock->fl);
+	switch (error) {
+	case 0:
+		break;
+	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		posix_block_lock(conflock, &lock->fl);
 		goto out_unlock;
-	}
-
-	/* Alright, no conflicting lock. Now lock it for real. If the
-	 * following yields an error, this is most probably due to low
-	 * memory. Retry the lock in a few seconds.
-	 */
-	if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-- 
cgit v1.2.3


From a85f193e2fb7d53e48ae6a9d9ea990bfb4cea555 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:25 -0500
Subject: lockd: make nlmsvc_lock use only posix_lock_file

Reorganize nlmsvc_lock() to make full use of posix_lock_file(), which does
eveything nlmsvc_lock() needs - no need to call posix_test_lock(),
posix_locks_deadlock(), or posix_block_lock() separately.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 58bbfede94ec..f5398097b84b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -297,7 +297,6 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct file_lock	*conflock;
 	struct nlm_block	*block;
 	int			error;
 	u32			ret;
@@ -320,14 +319,15 @@ again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 
-	if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
-		error = posix_lock_file(file->f_file, &lock->fl);
+	error = posix_lock_file(file->f_file, &lock->fl);
+
+	dprintk("lockd: posix_lock_file returned %d\n", error);
 
+	if (error != -EAGAIN) {
 		if (block)
 			nlmsvc_delete_block(block, 0);
 		up(&file->f_sema);
 
-		dprintk("lockd: posix_lock_file returned %d\n", -error);
 		switch(-error) {
 		case 0:
 			ret = nlm_granted;
@@ -335,9 +335,6 @@ again:
 		case EDEADLK:
 			ret = nlm_deadlock;
 			goto out;
-		case EAGAIN:
-			ret = nlm_lck_denied;
-			goto out;
 		default:			/* includes ENOLCK */
 			ret = nlm_lck_denied_nolocks;
 			goto out;
@@ -349,11 +346,6 @@ again:
 		goto out_unlock;
 	}
 
-	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		ret = nlm_deadlock;
-		goto out_unlock;
-	}
-
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
@@ -369,13 +361,6 @@ again:
 	/* Append to list of blocked */
 	nlmsvc_insert_block(block, NLM_NEVER);
 
-	if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
-		/* Now add block to block list of the conflicting lock
-		   if we haven't done so. */
-		dprintk("lockd: blocking on this lock.\n");
-		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
-	}
-
 	ret = nlm_lck_blocked;
 out_unlock:
 	up(&file->f_sema);
-- 
cgit v1.2.3


From 2e0af86f618c697b44e2d67dff151256c58201c4 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: locks: remove unused posix_block_lock

posix_lock_file() is used to add a blocked lock to Lockd's block, so
posix_block_lock() is no longer needed.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c         | 15 ---------------
 include/linux/fs.h |  1 -
 2 files changed, 16 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index d2c5306e3db0..cb940b142c5f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1954,21 +1954,6 @@ void locks_remove_flock(struct file *filp)
 	unlock_kernel();
 }
 
-/**
- *	posix_block_lock - blocks waiting for a file lock
- *	@blocker: the lock which is blocking
- *	@waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-	locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
 /**
  *	posix_unblock_lock - stop waiting for a file lock
  *      @filp:   how the file was opened
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be21e860a9f2..b01482c721ae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -757,7 +757,6 @@ extern void locks_remove_flock(struct file *);
 extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern void posix_block_lock(struct file_lock *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
 extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
-- 
cgit v1.2.3


From 8dc7c3115b611c00006eac3ee5b108296432aab7 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: locks,lockd: fix race in nlmsvc_testlock

posix_test_lock() returns a pointer to a struct file_lock which is unprotected
and can be removed while in use by the caller.  Move the conflicting lock from
the return to a parameter, and copy the conflicting lock.

In most cases the caller ends up putting the copy of the conflicting lock on
the stack.  On i386, sizeof(struct file_lock) appears to be about 100 bytes.
We're assuming that's reasonable.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c  | 12 +++++-------
 fs/locks.c          | 21 +++++++++++++--------
 fs/nfs/file.c       |  7 +++----
 fs/nfsd/nfs4state.c | 13 ++++++-------
 include/linux/fs.h  |  2 +-
 5 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index f5398097b84b..d683dd022e08 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -376,8 +376,6 @@ u32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
-	struct file_lock	*fl;
-
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
 				file->f_file->f_dentry->d_inode->i_ino,
@@ -385,14 +383,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
 		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-				fl->fl_type, (long long)fl->fl_start,
-				(long long)fl->fl_end);
+				conflock->fl.fl_type,
+				(long long)conflock->fl.fl_start,
+				(long long)conflock->fl.fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
-		conflock->svid = fl->fl_pid;
-		conflock->fl = *fl;
+		conflock->svid = conflock->fl.fl_pid;
 		return nlm_lck_denied;
 	}
 
diff --git a/fs/locks.c b/fs/locks.c
index cb940b142c5f..231b23c12c3d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -672,8 +672,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
 	return result;
 }
 
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+		struct file_lock *conflock)
 {
 	struct file_lock *cfl;
 
@@ -684,9 +685,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 		if (posix_locks_conflict(cfl, fl))
 			break;
 	}
+	if (cfl) {
+		locks_copy_lock(conflock, cfl);
+		unlock_kernel();
+		return 1;
+	}
 	unlock_kernel();
-
-	return (cfl);
+	return 0;
 }
 
 EXPORT_SYMBOL(posix_test_lock);
@@ -1563,7 +1568,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock flock;
 	int error;
 
@@ -1587,7 +1592,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1717,7 +1722,7 @@ out:
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock64 flock;
 	int error;
 
@@ -1741,7 +1746,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1cf07e4ad136..ee140c53dba6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -392,15 +392,14 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct file_lock *cfl;
+	struct file_lock cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
 	/* Try local locking first */
-	cfl = posix_test_lock(filp, fl);
-	if (cfl != NULL) {
-		locks_copy_lock(fl, cfl);
+	if (posix_test_lock(filp, fl, &cfl)) {
+		locks_copy_lock(fl, &cfl);
 		goto out;
 	}
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb64549..f6ab762bea99 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
-	struct file_lock *conflock;
+	struct file_lock conflock;
 	int status = 0;
 	unsigned int strhashval;
 
@@ -2775,11 +2775,11 @@ conflicting_lock:
 	/* XXX There is a race here. Future patch needed to provide 
 	 * an atomic posix_lock_and_test_file
 	 */
-	if (!(conflock = posix_test_lock(filp, &file_lock))) {
+	if (!posix_test_lock(filp, &file_lock, &conflock)) {
 		status = nfserr_serverfault;
 		goto out;
 	}
-	nfs4_set_lock_denied(conflock, &lock->lk_denied);
+	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
-	struct file_lock *conflicting_lock;
+	struct file_lock conflock;
 	int status;
 
 	if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	file.f_dentry = current_fh->fh_dentry;
 
 	status = nfs_ok;
-	conflicting_lock = posix_test_lock(&file, &file_lock);
-	if (conflicting_lock) {
+	if (posix_test_lock(&file, &file_lock, &conflock)) {
 		status = nfserr_denied;
-		nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+		nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
 	}
 out:
 	nfs4_unlock_state();
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b01482c721ae..8ef4dd788a83 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -754,7 +754,7 @@ extern void locks_init_lock(struct file_lock *);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
-extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
-- 
cgit v1.2.3


From 7117bf3dfb10b534a017260d9fc643bc1d0afd2a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: lockd: Remove FL_LOCKD flag

Currently lockd identifies its own locks using the FL_LOCKD flag.  This
doesn't scale well to multiple lock managers--if we did this in nfsv4 too,
for example, we'd be left with only one free flag bit.

Instead, we just check whether the file manager ops (fl_lmops) set on this
lock are our own.

The only use for this is in nlm_traverse_locks, which uses it to find locks
that need cleaning up when freeing a host or a file.

In the long run it might be nice to do reference counting instead of
traversing all the locks like this....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 2 --
 fs/lockd/svcsubs.c | 2 +-
 include/linux/fs.h | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d683dd022e08..d50946dcddd9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -313,8 +313,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	/* Get existing block (in case client is busy-waiting) */
 	block = nlmsvc_lookup_block(file, lock, 0);
 
-	lock->fl.fl_flags |= FL_LOCKD;
-
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177f..601e5b3dfe20 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
 again:
 	file->f_locks = 0;
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_LOCKD))
+		if (fl->fl_lmops != &nlmsvc_lock_operations)
 			continue;
 
 		/* update current lock count */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8ef4dd788a83..d2cffee8fc11 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -667,7 +667,6 @@ extern spinlock_t files_lock;
 #define FL_POSIX	1
 #define FL_FLOCK	2
 #define FL_ACCESS	8	/* not trying to lock, just looking */
-#define FL_LOCKD	16	/* lock held by rpc.lockd */
 #define FL_LEASE	32	/* lease held on this file */
 #define FL_SLEEP	128	/* A blocking lock */
 
-- 
cgit v1.2.3


From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:27 -0500
Subject: NFS: Cleanup of NFS write code in preparation for asynchronous
 o_direct

This patch inverts the callback hierarchy for NFS write calls.

Instead of having the NFSv2/v3/v4-specific code set up the RPC callback
ops, we allow the original caller to do so. This allows for more
flexibility w.r.t. how to set up and tear down the nfs_write_data
structure while still allowing the NFSv3/v4 code to perform error
handling.

The greater flexibility is needed by the asynchronous O_DIRECT code, which
wants to be able to hold on to the original nfs_write_data structures after
the WRITE RPC call has completed in order to be able to replay them if the
COMMIT call determines that the server has rebooted.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c       | 66 +++++++++--------------------------
 fs/nfs/nfs4proc.c       | 54 +++++++----------------------
 fs/nfs/proc.c           | 24 +++----------
 fs/nfs/write.c          | 92 +++++++++++++++++++++++++++++++++++--------------
 include/linux/nfs_fs.h  |  7 ----
 include/linux/nfs_xdr.h |  3 +-
 6 files changed, 103 insertions(+), 143 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 740f8b1ab04d..c4f7de8830e9 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -849,29 +849,17 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_write_ops = {
-	.rpc_call_done = nfs3_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			stable;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -879,45 +867,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
+	data->args.stable = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
-		if (!NFS_I(inode)->ncommit)
-			stable = NFS_FILE_SYNC;
-		else
-			stable = NFS_DATA_SYNC;
-	} else
-		stable = NFS_UNSTABLE;
-	data->args.stable = stable;
-
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+		data->args.stable = NFS_FILE_SYNC;
+		if (NFS_I(data->inode)->ncommit)
+			data->args.stable = NFS_DATA_SYNC;
+	}
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_commit_ops = {
-	.rpc_call_done = nfs3_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
 		.rpc_argp	= &data->args,
@@ -925,12 +896,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int
@@ -970,7 +936,9 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
 	.write_setup	= nfs3_proc_write_setup,
+	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_done	= nfs3_commit_done,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f1ff4fa6cce5..ef4dc315ecc2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2388,32 +2388,23 @@ nfs4_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
-	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_write_ops = {
-	.rpc_call_done = nfs4_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
 		.rpc_argp = &data->args,
@@ -2423,7 +2414,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	struct inode *inode = data->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
-	int flags;
 	
 	if (how & FLUSH_STABLE) {
 		if (!NFS_I(inode)->ncommit)
@@ -2438,57 +2428,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 
 	data->timestamp   = jiffies;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
-	/* Call back common NFS writeback processing */
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_commit_ops = {
-	.rpc_call_done = nfs4_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};	
-	struct inode *inode = data->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	int flags;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 	
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);	
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 /*
@@ -3648,7 +3618,9 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.write_setup	= nfs4_proc_write_setup,
+	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_done	= nfs4_commit_done,
 	.file_open      = nfs_open,
 	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 2b051ab8bea8..608aa5932a1d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -654,26 +654,15 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_write_ops = {
-	.rpc_call_done = nfs_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -684,12 +673,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static void
@@ -736,6 +721,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.write_setup	= nfs_proc_write_setup,
+	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e7c8361cf201..5912274ff1a1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -77,12 +77,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
+static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 			   unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
 mempool_t *nfs_wdata_mempool;
@@ -872,10 +874,12 @@ static inline int flush_task_priority(int how)
  */
 static void nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
 		int how)
 {
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -896,6 +900,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -959,14 +966,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_writeback_done_partial;
 
 		if (nbytes > wsize) {
-			nfs_write_rpcsetup(req, data, wsize, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					wsize, offset, how);
 			offset += wsize;
 			nbytes -= wsize;
 		} else {
-			nfs_write_rpcsetup(req, data, nbytes, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					nbytes, offset, how);
 			nbytes = 0;
 		}
 		nfs_execute_write(data);
@@ -1020,9 +1028,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_writeback_done_full;
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, count, 0, how);
+	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
 	nfs_execute_write(data);
 	return 0;
@@ -1066,8 +1073,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 /*
  * Handle a write reply that flushed part of a page.
  */
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req = data->req;
 	struct page		*page = req->wb_page;
 
@@ -1077,11 +1085,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		req->wb_bytes,
 		(long long)req_offset(req));
 
-	if (status < 0) {
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	if (task->tk_status < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		req->wb_context->error = status;
-		dprintk(", error = %d\n", status);
+		req->wb_context->error = task->tk_status;
+		dprintk(", error = %d\n", task->tk_status);
 	} else {
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1102,6 +1113,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		nfs_writepage_release(req);
 }
 
+static const struct rpc_call_ops nfs_write_partial_ops = {
+	.rpc_call_done = nfs_writeback_done_partial,
+	.rpc_release = nfs_writedata_release,
+};
+
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1109,11 +1125,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	struct page		*page;
 
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1126,13 +1146,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (status < 0) {
+		if (task->tk_status < 0) {
 			ClearPageUptodate(page);
 			SetPageError(page);
-			req->wb_context->error = status;
+			req->wb_context->error = task->tk_status;
 			end_page_writeback(page);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", status);
+			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
 		end_page_writeback(page);
@@ -1154,18 +1174,28 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_write_full_ops = {
+	.rpc_call_done = nfs_writeback_done_full,
+	.rpc_release = nfs_writedata_release,
+};
+
+
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	int status;
 
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	status = NFS_PROTO(data->inode)->write_done(task, data);
+	if (status != 0)
+		return status;
 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1210,7 +1240,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 				argp->stable = NFS_FILE_SYNC;
 			}
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
@@ -1221,11 +1251,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-
-	/*
-	 * Process the nfs_page list
-	 */
-	data->complete(data, task->tk_status);
+	return 0;
 }
 
 
@@ -1239,10 +1265,12 @@ void nfs_commit_release(void *wdata)
  * Set up the argument/result storage required for the RPC call.
  */
 static void nfs_commit_rpcsetup(struct list_head *head,
-		struct nfs_write_data *data, int how)
+		struct nfs_write_data *data,
+		int how)
 {
 	struct nfs_page		*first;
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -1262,7 +1290,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
-	
+
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -1303,7 +1334,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
@@ -1312,6 +1343,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
@@ -1345,6 +1380,11 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
 	}
 	sub_page_state(nr_unstable,res);
 }
+
+static const struct rpc_call_ops nfs_commit_ops = {
+	.rpc_call_done = nfs_commit_done,
+	.rpc_release = nfs_commit_release,
+};
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b71da4d4b137..782e59765696 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -407,13 +407,6 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task, void *data);
-extern void nfs_writedata_release(void *data);
-
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *, void *data);
-extern void nfs_commit_release(void *data);
-#endif
 
 /*
  * Try to write back everything synchronously (but check the
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6d6f69ec5675..277750cc70c0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -714,7 +714,6 @@ struct nfs_write_data {
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
-	void (*complete) (struct nfs_write_data *, int);
 	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -770,7 +769,9 @@ struct nfs_rpc_ops {
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, int how);
+	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_write_data *, int how);
+	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *);
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*lock)(struct file *, int, struct file_lock *);
-- 
cgit v1.2.3


From ec06c096edec0755534c7126f4caded69de131c2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:27 -0500
Subject: NFS: Cleanup of NFS read code

Same callback hierarchy inversion as for the NFS write calls. This patch is
not strictly speaking needed by the O_DIRECT code, but avoids confusing
differences between the asynchronous read and write code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c         | 17 +++++++++++----
 fs/nfs/nfs3proc.c       | 27 +++++------------------
 fs/nfs/nfs4proc.c       | 33 ++++++++--------------------
 fs/nfs/proc.c           | 25 +++++----------------
 fs/nfs/read.c           | 58 ++++++++++++++++++++++++++++++++++---------------
 include/linux/nfs_fs.h  |  4 ++--
 include/linux/nfs_xdr.h |  2 +-
 7 files changed, 77 insertions(+), 89 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fc07ce4885da..3f87a72bd137 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -218,14 +218,17 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
  */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	if (likely(status >= 0))
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (likely(task->tk_status >= 0))
 		atomic_add(data->res.count, &dreq->count);
 	else
-		atomic_set(&dreq->error, status);
+		atomic_set(&dreq->error, task->tk_status);
 
 	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
 		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
@@ -234,6 +237,11 @@ static void nfs_direct_read_result(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_direct_ops = {
+	.rpc_call_done = nfs_direct_read_result,
+	.rpc_release = nfs_readdata_release,
+};
+
 /**
  * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
  * @dreq: address of nfs_direct_req struct for this request
@@ -280,10 +288,11 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		data->res.eof = 0;
 		data->res.count = bytes;
 
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_read_direct_ops, data);
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
 		rpc_execute(&data->task);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c4f7de8830e9..cf186f0d2b3b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -811,29 +811,18 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_read_ops = {
-	.rpc_call_done = nfs3_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
 		.rpc_argp	= &data->args,
@@ -841,12 +830,7 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -935,6 +919,7 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
+	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ef4dc315ecc2..bad1eae5608a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2345,47 +2345,31 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, server) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status > 0)
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task, calldata);
+		renew_lease(server, data->timestamp);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_read_ops = {
-	.rpc_call_done = nfs4_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};
-	struct inode *inode = data->inode;
-	int flags;
 
 	data->timestamp   = jiffies;
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -3617,6 +3601,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
+	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 608aa5932a1d..9dd85cac2df0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -613,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -625,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_read_ops = {
-	.rpc_call_done = nfs_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
 		.rpc_argp	= &data->args,
@@ -646,12 +635,7 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -720,6 +704,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
+	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ae3ddd24cf8f..2da255f0247f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -36,8 +36,8 @@
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
 mempool_t *nfs_rdata_mempool;
@@ -200,9 +200,11 @@ static void nfs_readpage_release(struct nfs_page *req)
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
 	struct inode		*inode;
+	int flags;
 
 	data->req	  = req;
 	data->inode	  = inode = req->wb_context->dentry->d_inode;
@@ -220,6 +222,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->res.eof     = 0;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct. */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
@@ -307,14 +312,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_readpage_result_partial;
 
 		if (nbytes > rsize) {
-			nfs_read_rpcsetup(req, data, rsize, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					rsize, offset);
 			offset += rsize;
 			nbytes -= rsize;
 		} else {
-			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					nbytes, offset);
 			nbytes = 0;
 		}
 		nfs_execute_read(data);
@@ -360,8 +366,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_readpage_result_full;
-	nfs_read_rpcsetup(req, data, count, 0);
+	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
 	nfs_execute_read(data);
 	return 0;
@@ -395,12 +400,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (status >= 0) {
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (task->tk_status >= 0) {
 		unsigned int request = data->args.count;
 		unsigned int result = data->res.count;
 
@@ -419,20 +427,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+	.rpc_call_done = nfs_readpage_result_partial,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	unsigned int count = data->res.count;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 		struct page *page = req->wb_page;
 		nfs_list_remove_request(req);
 
-		if (status >= 0) {
+		if (task->tk_status >= 0) {
 			if (count < PAGE_CACHE_SIZE) {
 				if (count < req->wb_bytes)
 					memclear_highpage_flush(page,
@@ -448,19 +464,27 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+	.rpc_call_done = nfs_readpage_result_full,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
-	int status = task->tk_status;
+	int status;
 
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, status);
+		task->tk_pid, task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
@@ -474,14 +498,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		task->tk_status = -EIO;
 	}
 	spin_lock(&data->inode->i_lock);
 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&data->inode->i_lock);
-	data->complete(data, status);
+	return 0;
 }
 
 /*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 782e59765696..f55827be4f8e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -492,8 +492,8 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *, void *);
-extern void  nfs_readdata_release(void *data);
+extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern void nfs_readdata_release(void *data);
 
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 277750cc70c0..7fafc4c546b7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -695,7 +695,6 @@ struct nfs_read_data {
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
-	void (*complete) (struct nfs_read_data *, int);
 	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -768,6 +767,7 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
+	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, int how);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_write_data *, int how);
-- 
cgit v1.2.3


From b8a32e2b8b7fefff994c89d398b6ac920a195b43 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:28 -0500
Subject: NFS: clean up NFS client's a_ops->direct_IO method

The NFS client's a_ops->direct_IO method, nfs_direct_IO, is required to
be present to allow NFS files to be opened with O_DIRECT, but is never
called because the NFS client shunts reads and writes to files opened
with O_DIRECT directly to its own routines.

Gut the nfs_direct_IO function.  This eliminates the only part of the
NFS client's direct I/O path that requires support for multi-segment
iovs, allowing further simplification in subsequent patches.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions
of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 70 +++++++++++++++++++--------------------------------------
 1 file changed, 23 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3f87a72bd137..8096d326bd79 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -78,6 +78,29 @@ struct nfs_direct_req {
 };
 
 
+/**
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O.  However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
+ */
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+	struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+			dentry->d_name.name, (long long) pos, nr_segs);
+
+	return -EINVAL;
+}
+
 /**
  * nfs_get_user_pages - find and set up pages underlying user's buffer
  * rw: direction (read or write)
@@ -605,53 +628,6 @@ static ssize_t nfs_direct_write(struct inode *inode,
 	return tot_bytes;
 }
 
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- */
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t result = -EINVAL;
-	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx;
-	struct dentry *dentry = file->f_dentry;
-	struct inode *inode = dentry->d_inode;
-
-	/*
-	 * No support for async yet
-	 */
-	if (!is_sync_kiocb(iocb))
-		return result;
-
-	ctx = (struct nfs_open_context *)file->private_data;
-	switch (rw) {
-	case READ:
-		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_read(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	case WRITE:
-		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_write(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	default:
-		break;
-	}
-	return result;
-}
-
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
-- 
cgit v1.2.3


From d4cc948ba97980c55a308eab167a695109796456 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:28 -0500
Subject: NFS: update comments and function definitions in fs/nfs/direct.c

Update to latest coding style standards.  Remove block comments on
statically defined functions, and place function definitions all on
one line.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 125 +++++++-------------------------------------------------
 1 file changed, 15 insertions(+), 110 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8096d326bd79..d38c3dc052a7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -101,16 +101,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
-/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
- */
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
-		struct page ***pages)
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
 	unsigned long page_count;
@@ -147,14 +138,7 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
 	return result;
 }
 
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 {
 	int i;
 	for (i = 0; i < npages; i++) {
@@ -166,22 +150,13 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 	kfree(pages);
 }
 
-/**
- * nfs_direct_req_release - release  nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
- *
+/*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
@@ -232,11 +207,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 	return dreq;
 }
 
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
+/*
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
@@ -265,21 +236,11 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 	.rpc_release = nfs_readdata_release,
 };
 
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+/*
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-		struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
 {
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
@@ -337,11 +298,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 	} while (count != 0);
 }
 
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
+/*
  * Collects and returns the final error value/byte-count.
  */
 static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
@@ -364,22 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-/**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		unsigned int nr_pages)
+static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -404,22 +346,11 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
 	return result;
 }
 
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- *                   then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
  * We've already pushed out any non-direct writes so that this read
  * will see them when we read from the server.
  */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
+static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -457,21 +388,7 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
 	return tot_bytes;
 }
 
-/**
- * nfs_direct_write_seg - Write out one iov segment.  Generate separate
- *                        write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		int nr_pages)
+static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
 	size_t request;
@@ -573,22 +490,12 @@ sync_retry:
 	goto retry;
 }
 
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- *                    then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
  * Upon return, generic_file_direct_IO invalidates any cached pages
  * that non-direct readers might access, so they will pick up these
  * writes immediately.
  */
-static ssize_t nfs_direct_write(struct inode *inode,
-		struct nfs_open_context *ctx, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
+static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -649,8 +556,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
 	loff_t *ppos = &iocb->ki_pos;
@@ -717,8 +623,7 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
-- 
cgit v1.2.3


From 5dd602f20688e08c85ac91e0451c4e6321ed25d7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:29 -0500
Subject: NFS: use size_t type for holding rsize bytes in NFS O_DIRECT read
 path

size_t is used for holding byte counts, so use it for variables storing rsize.
Note that the write path will be updated as we add support for async
O_DIRECT writes.

Test plan:
Need to verify that existing comparisons against new size_t variables behave
correctly.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d38c3dc052a7..8f5d2dfd5a8a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -161,7 +161,7 @@ static void nfs_direct_req_release(struct kref *kref)
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
@@ -244,14 +244,14 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *
 {
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
-	unsigned int rsize = NFS_SERVER(inode)->rsize;
 
 	curpage = 0;
 	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
-		unsigned int bytes;
+		size_t bytes;
 
 		bytes = rsize;
 		if (count < rsize)
-- 
cgit v1.2.3


From 0cdd80d07fb0f558dfdb30f6e0b9905f5e5475f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:29 -0500
Subject: NFS: remove support for multi-segment iovs in the direct read path

Eliminate the persistent use of automatic storage in all parts of the NFS
client's direct read path to pave the way for introducing support for aio
against files opened with the O_DIRECT flag.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 66 +++++++++++++--------------------------------------------
 1 file changed, 15 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f5d2dfd5a8a..6ecde9602f99 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -321,7 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -346,48 +346,6 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context
 	return result;
 }
 
-/*
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
-
-		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
-	}
-
-	return tot_bytes;
-}
-
 static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
@@ -559,16 +517,13 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
-	loff_t *ppos = &iocb->ki_pos;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = buf,
-		.iov_len = count,
-	};
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -580,7 +535,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
 	retval = 0;
 	if (!count)
@@ -590,9 +545,18 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	page_count = nfs_get_user_pages(READ, (unsigned long) buf,
+						count, &pages);
+	if (page_count < 0) {
+		nfs_free_user_pages(pages, 0, 0);
+		retval = page_count;
+		goto out;
+	}
+
+	retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos,
+						pages, page_count);
 	if (retval > 0)
-		*ppos = pos + retval;
+		iocb->ki_pos = pos + retval;
 
 out:
 	return retval;
-- 
cgit v1.2.3


From 99514f8fdda2beef1ca922b7f9d89c1a2c57fec0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:30 -0500
Subject: NFS: make iocb available everywhere in direct read path

Pass the iocb argument all the way down to the direct read request
scheduler, and make it available in nfs_direct_read_result.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6ecde9602f99..6cbddc51acbc 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -68,6 +68,8 @@ static kmem_cache_t *nfs_direct_cachep;
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 	struct list_head	list;		/* nfs_read_data structs */
+	struct file *		filp;		/* file descriptor */
+	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of I/O */
 	struct page **		pages;		/* pages in our buffer */
@@ -240,8 +242,12 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
 {
+	struct file *file = dreq->filp;
+	struct inode *inode = file->f_mapping->host;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t rsize = NFS_SERVER(inode)->rsize;
@@ -321,10 +327,11 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -335,11 +342,11 @@ static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	dreq->inode = inode;
+	dreq->filp = iocb->ki_filp;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
-				 file_offset);
+	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
 	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -520,10 +527,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	int page_count;
 	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -553,7 +557,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 		goto out;
 	}
 
-	retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos,
+	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
 						pages, page_count);
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
-- 
cgit v1.2.3


From 487b83723ed4d4eaafd5109f36560da4f15c6578 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:30 -0500
Subject: NFS: support EIOCBQUEUED return in direct read path

For async iocb's, the NFS direct read path should return EIOCBQUEUED and
call aio_complete when all the requested reads are finished.  The
synchronous part of the NFS direct read path behaves exactly as it was
before.

Test plan:
aio-stress with "-O".  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6cbddc51acbc..094456c3df90 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -177,6 +177,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	kref_init(&dreq->kref);
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
+	dreq->iocb = NULL;
 	atomic_set(&dreq->count, 0);
 	atomic_set(&dreq->error, 0);
 
@@ -213,6 +214,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
  */
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
@@ -228,7 +233,13 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 
 	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
 		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		wake_up(&dreq->wait);
+		if (dreq->iocb) {
+			long res = atomic_read(&dreq->error);
+			if (!res)
+				res = atomic_read(&dreq->count);
+			aio_complete(dreq->iocb, res, 0);
+		} else
+			wake_up(&dreq->wait);
 		kref_put(&dreq->kref, nfs_direct_req_release);
 	}
 }
@@ -309,8 +320,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
  */
 static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 {
-	int result = 0;
+	int result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+ 	if (dreq->iocb)
+		goto out;
 
+	result = 0;
 	if (intr) {
 		result = wait_event_interruptible(dreq->wait,
 					(atomic_read(&dreq->complete) == 0));
@@ -323,6 +339,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	if (!result)
 		result = atomic_read(&dreq->count);
 
+out:
 	kref_put(&dreq->kref, nfs_direct_req_release);
 	return (ssize_t) result;
 }
@@ -343,6 +360,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	dreq->npages = nr_pages;
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
@@ -534,8 +553,6 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	if (!is_sync_kiocb(iocb))
-		goto out;
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-- 
cgit v1.2.3


From bc0fb201b34b12e2d16e8cbd5bb078c1db936304 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for waiting for direct I/O to complete

We're about to add asynchrony to the NFS direct write path.  Begin by
abstracting out the common pieces in the read path.

The first piece is nfs_direct_read_wait, which works the same whether the
process is waiting for a read or a write.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 57 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 094456c3df90..2593f47eaff0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -158,6 +158,30 @@ static void nfs_direct_req_release(struct kref *kref)
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+	int result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+	if (dreq->iocb)
+		goto out;
+
+	result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+out:
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
 /*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
@@ -213,7 +237,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 /*
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  *
  * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
  * can't trust the iocb is still valid here if this is a synchronous
@@ -315,35 +339,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-/*
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = -EIOCBQUEUED;
-
-	/* Async requests don't wait here */
- 	if (dreq->iocb)
-		goto out;
-
-	result = 0;
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-out:
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
 static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
@@ -366,7 +361,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
 	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
-	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
-- 
cgit v1.2.3


From 93619e5989173614bef0013b0bb8a3fe3dbd5a95 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for allocating nfs_direct_req

Factor out a small common piece of the path that allocate nfs_direct_req
structures.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2593f47eaff0..489f736d0f5d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -152,6 +152,24 @@ static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 	kfree(pages);
 }
 
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
+{
+	struct nfs_direct_req *dreq;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	dreq->iocb = NULL;
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	return dreq;
+}
+
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
@@ -194,17 +212,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return NULL;
 
-	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
-	INIT_LIST_HEAD(&dreq->list);
-	dreq->iocb = NULL;
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
-
 	list = &dreq->list;
 	for(;;) {
 		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
-- 
cgit v1.2.3


From 63ab46abc70b01cb0711301f5ddb08c1c0bb9b1c Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for handling direct I/O completion

Factor out the common piece of completing an NFS direct I/O request.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489f736d0f5d..4df21ce28e17 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -200,6 +200,30 @@ out:
 	return (ssize_t) result;
 }
 
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
+ *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+	if (dreq->iocb) {
+		long res = atomic_read(&dreq->error);
+		if (!res)
+			res = atomic_read(&dreq->count);
+		aio_complete(dreq->iocb, res, 0);
+	} else
+		wake_up(&dreq->wait);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
 /*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
@@ -245,15 +269,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	return dreq;
 }
 
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- *
- * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
- * can't trust the iocb is still valid here if this is a synchronous
- * request.  If the waiter is woken prematurely, the iocb is long gone.
- */
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
@@ -266,17 +281,8 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 	else
 		atomic_set(&dreq->error, task->tk_status);
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		if (dreq->iocb) {
-			long res = atomic_read(&dreq->error);
-			if (!res)
-				res = atomic_read(&dreq->count);
-			aio_complete(dreq->iocb, res, 0);
-		} else
-			wake_up(&dreq->wait);
-		kref_put(&dreq->kref, nfs_direct_req_release);
-	}
+	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+		nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
-- 
cgit v1.2.3


From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:32 -0500
Subject: NFS: make direct write path generate write requests concurrently

Duplicate infrastructure from direct read path that will allow write
path to generate multiple write requests concurrently.  This will
enable us to add support for aio in this path.

Temporarily we will lose the ability to do UNSTABLE writes followed by
a COMMIT in the direct write path.  However, all applications I am
aware of that use NFS O_DIRECT currently write in relatively small
chunks, so this should not be inconvenient in any way.

Test plan:
Millions of fsx-odirect ops. OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c        | 240 ++++++++++++++++++++++++++++++++-----------------
 fs/nfs/write.c         |   3 +-
 include/linux/nfs_fs.h |   2 +
 3 files changed, 162 insertions(+), 83 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4df21ce28e17..dea3239cdded 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -384,106 +384,185 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	return result;
 }
 
-static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
 {
-	const unsigned int wsize = NFS_SERVER(inode)->wsize;
-	size_t request;
-	int curpage, need_commit;
-	ssize_t result, tot_bytes;
-	struct nfs_writeverf first_verf;
-	struct nfs_write_data *wdata;
-
-	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
-	if (!wdata)
-		return -ENOMEM;
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int writes = 0;
+	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	wdata->inode = inode;
-	wdata->cred = ctx->cred;
-	wdata->args.fh = NFS_FH(inode);
-	wdata->args.context = ctx;
-	wdata->args.stable = NFS_UNSTABLE;
-	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata->args.stable = NFS_FILE_SYNC;
-	wdata->res.fattr = &wdata->fattr;
-	wdata->res.verf = &wdata->verf;
+	dreq = nfs_direct_req_alloc();
+	if (!dreq)
+		return NULL;
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_write_data *data = nfs_writedata_alloc(wpages);
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_write_data, pages);
+				list_del(&data->pages);
+				nfs_writedata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		writes++;
+		if (nbytes <= wsize)
+			break;
+		nbytes -= wsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, writes);
+	return dreq;
+}
+
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;
+
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = task->tk_status;
+
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+	/* If the server fell back to an UNSTABLE write, it's an error. */
+	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
+		status = -EIO;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+		nfs_direct_complete(dreq);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+	.rpc_call_done = nfs_direct_write_result,
+	.rpc_release = nfs_writedata_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ *
+ * XXX: For now, support only FILE_SYNC writes.  Later we may add
+ *      support for UNSTABLE + COMMIT.
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+{
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	unsigned int curpage, pgbase;
 
-	nfs_begin_data_update(inode);
-retry:
-	need_commit = 0;
-	tot_bytes = 0;
 	curpage = 0;
-	request = count;
-	wdata->args.pgbase = user_addr & ~PAGE_MASK;
-	wdata->args.offset = file_offset;
+	pgbase = user_addr & ~PAGE_MASK;
 	do {
-		wdata->args.count = request;
-		if (wdata->args.count > wsize)
-			wdata->args.count = wsize;
-		wdata->args.pages = &pages[curpage];
+		struct nfs_write_data *data;
+		size_t bytes;
+
+		bytes = wsize;
+		if (count < wsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_write_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.count = bytes;
+
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_write_direct_ops, data);
+		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 
-		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata->args.count, (long long) wdata->args.offset,
-			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+		data->task.tk_priority = RPC_PRIORITY_NORMAL;
+		data->task.tk_cookie = (unsigned long) inode;
 
 		lock_kernel();
-		result = NFS_PROTO(inode)->write(wdata);
+		rpc_execute(&data->task);
 		unlock_kernel();
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			goto out;
-		}
+		dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
 
-		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata->verf.verifier,
-						sizeof(first_verf.verifier));
-		if (wdata->verf.committed != NFS_FILE_SYNC) {
-			need_commit = 1;
-			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
-					sizeof(first_verf.verifier)))
-				goto sync_retry;
-		}
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
 
-		tot_bytes += result;
+		count -= bytes;
+	} while (count != 0);
+}
 
-		/* in case of a short write: stop now, let the app recover */
-		if (result < wdata->args.count)
-			break;
+static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
 
-		wdata->args.offset += result;
-		wdata->args.pgbase += result;
-		curpage += wdata->args.pgbase >> PAGE_SHIFT;
-		wdata->args.pgbase &= ~PAGE_MASK;
-		request -= result;
-	} while (request != 0);
+	dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize);
+	if (!dreq)
+		return -ENOMEM;
 
-	/*
-	 * Commit data written so far, even in the event of an error
-	 */
-	if (need_commit) {
-		wdata->args.count = tot_bytes;
-		wdata->args.offset = file_offset;
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->commit(wdata);
-		unlock_kernel();
+	nfs_begin_data_update(inode);
 
-		if (result < 0 || memcmp(&first_verf.verifier,
-					 &wdata->verf.verifier,
-					 sizeof(first_verf.verifier)) != 0)
-			goto sync_retry;
-	}
-	result = tot_bytes;
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count,
+				  file_offset);
+	result = nfs_direct_write_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
 
-out:
 	nfs_end_data_update(inode);
-	nfs_writedata_free(wdata);
-	return result;
 
-sync_retry:
-	wdata->args.stable = NFS_FILE_SYNC;
-	goto retry;
+	return result;
 }
 
 /*
@@ -515,7 +594,6 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
 		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
-		nfs_free_user_pages(pages, page_count, 0);
 
 		if (result <= 0) {
 			if (tot_bytes > 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5912274ff1a1..875f5b060533 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -77,7 +77,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1183,7 +1182,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
 /*
  * This function is called when the WRITE call is complete.
  */
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f55827be4f8e..6c130a6b0f4d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -407,6 +407,8 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
+extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writedata_release(void *);
 
 /*
  * Try to write back everything synchronously (but check the
-- 
cgit v1.2.3


From 47989d7454398827500d0e73766270986a3b488f Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:32 -0500
Subject: NFS: remove support for multi-segment iovs in the direct write path

Eliminate the persistent use of automatic storage in all parts of the
NFS client's direct write path to pave the way for introducing support
for aio against files opened with the O_DIRECT flag.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 68 +++++++++++++++------------------------------------------
 1 file changed, 17 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index dea3239cdded..9a7d45907054 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -510,6 +510,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.count = bytes;
+		data->res.verf = &data->verf;
 
 		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 				&nfs_write_direct_ops, data);
@@ -538,7 +539,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -552,6 +553,8 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
@@ -565,50 +568,6 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context
 	return result;
 }
 
-/*
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
-
-		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
-		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
-	}
-	return tot_bytes;
-}
-
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
@@ -701,14 +660,13 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = (char __user *)buf,
-	};
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -729,17 +687,25 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	retval = 0;
 	if (!count)
 		goto out;
-	iov.iov_len = count,
 
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_READ, buf, count))
 		goto out;
 
 	retval = nfs_sync_mapping(mapping);
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	page_count = nfs_get_user_pages(WRITE, (unsigned long) buf,
+						count, &pages);
+	if (page_count < 0) {
+		nfs_free_user_pages(pages, 0, 0);
+		retval = page_count;
+		goto out;
+	}
+
+	retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count,
+					pos, pages, page_count);
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
 	if (retval > 0)
-- 
cgit v1.2.3


From c89f2ee5f9223b864725f7344f24a037dfa76568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:33 -0500
Subject: NFS: make iocb available everywhere in direct write path

Pass the iocb argument all the way down to the direct write request
scheduler, and make it available in nfs_direct_write_result.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 46 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9a7d45907054..9d57a299824c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -424,29 +424,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 	return dreq;
 }
 
-/*
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = 0;
-
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -480,8 +457,12 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * XXX: For now, support only FILE_SYNC writes.  Later we may add
  *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
 {
+	struct file *file = dreq->filp;
+	struct inode *inode = file->f_mapping->host;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t wsize = NFS_SERVER(inode)->wsize;
@@ -539,10 +520,11 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -552,15 +534,18 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->filp = iocb->ki_filp;
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
 
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count,
-				  file_offset);
-	result = nfs_direct_write_wait(dreq, clnt->cl_intr);
+	nfs_direct_write_schedule(dreq, user_addr, count, file_offset);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	nfs_end_data_update(inode);
@@ -663,10 +648,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	int page_count;
 	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -704,7 +686,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 		goto out;
 	}
 
-	retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count,
+	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
-- 
cgit v1.2.3


From 9eafa8cc521b489f205bf7b0634c99e34e046606 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:33 -0500
Subject: NFS: support EIOCBQUEUED return in direct write path

For async iocb's, the NFS direct write path now returns EIOCBQUEUED,
and calls aio_complete when all the requested writes are finished.  The
synchronous part of the NFS direct write path behaves exactly as it
was before.

Shared mapped NFS files will have some coherency difficulties when
accessed concurrently with aio+dio.  Will need to explore how this
is handled in the local file system case.

Test plan:
aio-stress with "-O". OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9d57a299824c..df86e526702f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -441,8 +441,10 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 	else
 		atomic_set(&dreq->error, status);
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_end_data_update(data->inode);
 		nfs_direct_complete(dreq);
+	}
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -548,8 +550,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
-	nfs_end_data_update(inode);
-
 	return result;
 }
 
@@ -655,10 +655,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	retval = -EINVAL;
-	if (!is_sync_kiocb(iocb))
-		goto out;
-
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
 		goto out;
@@ -688,8 +684,18 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
+
+	/*
+	 * XXX: nfs_end_data_update() already ensures this file's
+	 *      cached data is subsequently invalidated.  Do we really
+	 *      need to call invalidate_inode_pages2() again here?
+	 *
+	 *      For aio writes, this invalidation will almost certainly
+	 *      occur before the writes complete.  Kind of racey.
+	 */
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
+
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
-- 
cgit v1.2.3


From 88467055f7654302c12df74e5fe4d12516656a39 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:34 -0500
Subject: NFS: clean up comments and tab damage in direct.c

Clean up tab damage and comments.  Replace "file_offset" with more commonly
used "pos".

Test plan:
Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 54 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index df86e526702f..bcbc213b4033 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
  *
  * There are important applications whose performance or correctness
  * depends on uncached access to file data.  Database clusters
- * (multiple copies of the same instance running on separate hosts) 
+ * (multiple copies of the same instance running on separate hosts)
  * implement their own cache coherency protocol that subsumes file
- * system cache protocols.  Applications that process datasets 
- * considerably larger than the client's memory do not always benefit 
- * from a local cache.  A streaming video server, for instance, has no 
+ * system cache protocols.  Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache.  A streaming video server, for instance, has no
  * need to cache the contents of a file.
  *
  * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
  * 08 Jun 2003	Port to 2.5 APIs  --cel
  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
  * 15 Sep 2004	Parallel async reads  --cel
+ * 04 May 2005	support O_DIRECT with aio  --cel
  *
  */
 
@@ -67,11 +68,11 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
-	struct list_head	list;		/* nfs_read_data structs */
+	struct list_head	list;		/* nfs_read/write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
-	struct inode *		inode;		/* target file of I/O */
+	struct inode *		inode;		/* target file of i/o */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 	atomic_t		complete,	/* i/os we're waiting for */
@@ -110,7 +111,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 	size_t array_size;
 
 	/* set an arbitrary limit to prevent type overflow */
-	/* XXX: this can probably be as large as INT_MAX */
 	if (size > MAX_DIRECTIO_SIZE) {
 		*pages = NULL;
 		return -EFBIG;
@@ -294,7 +294,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -322,7 +322,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -347,7 +347,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
@@ -356,7 +356,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -377,7 +377,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
+	nfs_direct_read_schedule(dreq, user_addr, count, pos);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -459,7 +459,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * XXX: For now, support only FILE_SYNC writes.  Later we may add
  *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -487,7 +487,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -513,7 +513,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
@@ -522,7 +522,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -546,7 +546,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, user_addr, count, file_offset);
+	nfs_direct_write_schedule(dreq, user_addr, count, pos);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -557,18 +557,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change.  So our preference is simply
+ * READ where the file size could change.  Our preference is simply
  * to do all reads the application wants, and the server will take
  * care of managing the end of file boundary.
- * 
+ *
  * This function also eliminates unnecessarily updating the file's
  * atime locally, as the NFS server sets the file's atime, and this
  * client must read the updated atime from the server back into its
@@ -621,8 +621,8 @@ out:
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -703,6 +703,10 @@ out:
 	return retval;
 }
 
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
 int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -715,6 +719,10 @@ int nfs_init_directcache(void)
 	return 0;
 }
 
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
 void nfs_destroy_directcache(void)
 {
 	if (kmem_cache_destroy(nfs_direct_cachep))
-- 
cgit v1.2.3


From 15ce4a0c1ce0d5e288398cb9e5493fd4e55e2025 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:34 -0500
Subject: NFS: Replace atomic_t variables in nfs_direct_req with a single spin
 lock

Three atomic_t variables cause a lot of bus locking.  Because they are all
used in the same places in the code, just use a single spin lock.

Now that the atomic_t variables are gone, we can remove the request size
limitation since the code no longer depends on the limited width of atomic_t
on some platforms.

Test plan:
Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions of fsx
operations, iozone, OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 81 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bcbc213b4033..3de7c4b07968 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -58,7 +58,6 @@
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
 static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
@@ -68,6 +67,8 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
+
+	/* I/O parameters */
 	struct list_head	list;		/* nfs_read/write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
@@ -75,12 +76,14 @@ struct nfs_direct_req {
 	struct inode *		inode;		/* target file of i/o */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
-	atomic_t		complete,	/* i/os we're waiting for */
-				count,		/* bytes actually processed */
+
+	/* completion state */
+	spinlock_t		lock;		/* protect completion state */
+	int			outstanding;	/* i/os we're waiting for */
+	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
 };
 
-
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -110,12 +113,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 	unsigned long page_count;
 	size_t array_size;
 
-	/* set an arbitrary limit to prevent type overflow */
-	if (size > MAX_DIRECTIO_SIZE) {
-		*pages = NULL;
-		return -EFBIG;
-	}
-
 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	page_count -= user_addr >> PAGE_SHIFT;
 
@@ -164,8 +161,10 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
 	dreq->iocb = NULL;
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
+	spin_lock_init(&dreq->lock);
+	dreq->outstanding = 0;
+	dreq->count = 0;
+	dreq->error = 0;
 
 	return dreq;
 }
@@ -181,19 +180,18 @@ static void nfs_direct_req_release(struct kref *kref)
  */
 static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 {
-	int result = -EIOCBQUEUED;
+	ssize_t result = -EIOCBQUEUED;
 
 	/* Async requests don't wait here */
 	if (dreq->iocb)
 		goto out;
 
-	result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
+	result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0));
 
 	if (!result)
-		result = atomic_read(&dreq->error);
+		result = dreq->error;
 	if (!result)
-		result = atomic_read(&dreq->count);
+		result = dreq->count;
 
 out:
 	kref_put(&dreq->kref, nfs_direct_req_release);
@@ -214,9 +212,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
 
 	if (dreq->iocb) {
-		long res = atomic_read(&dreq->error);
+		long res = (long) dreq->error;
 		if (!res)
-			res = atomic_read(&dreq->count);
+			res = (long) dreq->count;
 		aio_complete(dreq->iocb, res, 0);
 	} else
 		wake_up(&dreq->wait);
@@ -233,7 +231,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = nfs_direct_req_alloc();
@@ -259,13 +256,12 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		reads++;
+		dreq->outstanding++;
 		if (nbytes <= rsize)
 			break;
 		nbytes -= rsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, reads);
 	return dreq;
 }
 
@@ -276,13 +272,21 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+
+	spin_lock(&dreq->lock);
+
 	if (likely(task->tk_status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, task->tk_status);
+		dreq->error = task->tk_status;
+
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
+	}
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete)))
-		nfs_direct_complete(dreq);
+	spin_unlock(&dreq->lock);
+	nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -388,7 +392,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int writes = 0;
 	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = nfs_direct_req_alloc();
@@ -414,16 +417,19 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		writes++;
+		dreq->outstanding++;
 		if (nbytes <= wsize)
 			break;
 		nbytes -= wsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, writes);
 	return dreq;
 }
 
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -436,15 +442,22 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
 		status = -EIO;
 
+	spin_lock(&dreq->lock);
+
 	if (likely(status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, status);
+		dreq->error = status;
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_end_data_update(data->inode);
-		nfs_direct_complete(dreq);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
 	}
+
+	spin_unlock(&dreq->lock);
+
+	nfs_end_data_update(data->inode);
+	nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
-- 
cgit v1.2.3


From a37ec012d7fd352648c8455d3396ea24001efcd3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:35 -0500
Subject: NFS: fix data_update accounting in NFS direct I/O path

^C against "iozone -I" is hitting the assertion in nfs_clear_inode().

Test plan:
"iozone -i0 -I -a -c" against a slow server, then control C.  This should
not cause an oops.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3de7c4b07968..737990dd4dfe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -219,6 +219,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	} else
 		wake_up(&dreq->wait);
 
+	iput(dreq->inode);
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
 
@@ -374,6 +375,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	igrab(inode);
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
 	if (!is_sync_kiocb(iocb))
@@ -549,6 +551,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	igrab(inode);
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
 	if (!is_sync_kiocb(iocb))
-- 
cgit v1.2.3


From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:35 -0500
Subject: NFS: Make nfs_commit_alloc() extern

We need to use nfs_commit_alloc() in fs/nfs/direct.c.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 4 ++--
 include/linux/nfs_fs.h | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 875f5b060533..f7c8be0beccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -91,7 +91,7 @@ static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
@@ -112,7 +112,7 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 	return p;
 }
 
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_write_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6c130a6b0f4d..423f202b881c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -410,6 +410,11 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 extern void nfs_writedata_release(void *);
 
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+void nfs_commit_free(struct nfs_write_data *p);
+#endif
+
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
-- 
cgit v1.2.3


From fad61490419b3e494f300e9b2579810ef3bcda31 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:36 -0500
Subject: nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes

Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not
necessary.  This simplifies the internal logic, but this could be a
difficult workload for some servers.

Instead, let's send UNSTABLE writes, and after they all complete, send a
COMMIT for the dirty range.  After the COMMIT returns successfully, then do
the wake_up or fire off aio_complete().

Test plan:
Async direct I/O tests against Solaris (or any server that requires
committed unstable writes).  Reboot server during test.

Based on an earlier patch by Chuck Lever <cel@netapp.com>

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c        | 224 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/nfs_fs.h |   1 +
 2 files changed, 200 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 737990dd4dfe..f0f2053c7a61 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,11 +69,15 @@ struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 
 	/* I/O parameters */
-	struct list_head	list;		/* nfs_read/write_data structs */
+	struct list_head	list,		/* nfs_read/write_data structs */
+				rewrite_list;	/* saved nfs_write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
+	unsigned long		user_addr;	/* location of user's buffer */
+	size_t			user_count;	/* total bytes to move */
+	loff_t			pos;		/* starting offset in file */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 
@@ -82,8 +86,18 @@ struct nfs_direct_req {
 	int			outstanding;	/* i/os we're waiting for */
 	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+
+	/* commit state */
+	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	int			flags;
+#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
+
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -160,11 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	kref_init(&dreq->kref);
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
+	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
 	spin_lock_init(&dreq->lock);
 	dreq->outstanding = 0;
 	dreq->count = 0;
 	dreq->error = 0;
+	dreq->flags = 0;
 
 	return dreq;
 }
@@ -299,7 +315,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -307,11 +323,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
 	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
 		size_t bytes;
@@ -373,6 +391,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	if (!dreq)
 		return -ENOMEM;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	igrab(inode);
@@ -383,13 +404,137 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, user_addr, count, pos);
+	nfs_direct_read_schedule(dreq);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
 }
 
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
+{
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	while (!list_empty(&dreq->list)) {
+		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+		list_del(&data->pages);
+		nfs_writedata_release(data);
+	}
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+	struct list_head *pos;
+
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	list_for_each(pos, &dreq->list)
+		dreq->outstanding++;
+	dreq->count = 0;
+
+	nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+	if (unlikely(task->tk_status < 0)) {
+		dreq->error = task->tk_status;
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+	if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+
+	dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+	nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+	.rpc_call_done = nfs_direct_commit_result,
+	.rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+{
+	struct file *file = dreq->filp;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
+	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task *task = &data->task;
+
+	data->inode = dreq->inode;
+	data->cred = ctx->cred;
+
+	data->args.fh = NFS_FH(data->inode);
+	data->args.offset = dreq->pos;
+	data->args.count = dreq->user_count;
+	data->res.count = 0;
+	data->res.fattr = &data->fattr;
+	data->res.verf = &data->verf;
+
+	rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+				&nfs_commit_direct_ops, data);
+	NFS_PROTO(data->inode)->commit_setup(data, 0);
+
+	data->task.tk_priority = RPC_PRIORITY_NORMAL;
+	data->task.tk_cookie = (unsigned long)data->inode;
+	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+	dreq->commit_data = NULL;
+
+	dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
+
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+}
+
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	int flags = dreq->flags;
+
+	dreq->flags = 0;
+	switch (flags) {
+		case NFS_ODIRECT_DO_COMMIT:
+			nfs_direct_commit_schedule(dreq);
+			break;
+		case NFS_ODIRECT_RESCHED_WRITES:
+			nfs_direct_write_reschedule(dreq);
+			break;
+		default:
+			nfs_end_data_update(inode);
+			if (dreq->commit_data != NULL)
+				nfs_commit_free(dreq->commit_data);
+			nfs_direct_free_writedata(dreq);
+			nfs_direct_complete(dreq);
+	}
+}
+
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = nfs_commit_alloc(0);
+	if (dreq->commit_data != NULL)
+		dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = NULL;
+}
+
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	nfs_end_data_update(inode);
+	nfs_direct_free_writedata(dreq);
+	nfs_direct_complete(dreq);
+}
+#endif
+
 static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
 {
 	struct list_head *list;
@@ -424,14 +569,13 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 			break;
 		nbytes -= wsize;
 	}
+
+	nfs_alloc_commit_data(dreq);
+
 	kref_get(&dreq->kref);
 	return dreq;
 }
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -440,41 +584,62 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 
 	if (nfs_writeback_done(task, data) != 0)
 		return;
-	/* If the server fell back to an UNSTABLE write, it's an error. */
-	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
-		status = -EIO;
 
 	spin_lock(&dreq->lock);
 
 	if (likely(status >= 0))
 		dreq->count += data->res.count;
 	else
-		dreq->error = status;
+		dreq->error = task->tk_status;
 
+	if (data->res.verf->committed != NFS_FILE_SYNC) {
+		switch (dreq->flags) {
+			case 0:
+				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+				dreq->flags = NFS_ODIRECT_DO_COMMIT;
+				break;
+			case NFS_ODIRECT_DO_COMMIT:
+				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+					dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				}
+		}
+	}
+	/* In case we have to resend */
+	data->args.stable = NFS_FILE_SYNC;
+
+	spin_unlock(&dreq->lock);
+}
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+static void nfs_direct_write_release(void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	spin_lock(&dreq->lock);
 	if (--dreq->outstanding) {
 		spin_unlock(&dreq->lock);
 		return;
 	}
-
 	spin_unlock(&dreq->lock);
 
-	nfs_end_data_update(data->inode);
-	nfs_direct_complete(dreq);
+	nfs_direct_write_complete(dreq, data->inode);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
 	.rpc_call_done = nfs_direct_write_result,
-	.rpc_release = nfs_writedata_release,
+	.rpc_release = nfs_direct_write_release,
 };
 
 /*
  * For each nfs_write_data struct that was allocated on the list, dispatch
  * an NFS WRITE operation
- *
- * XXX: For now, support only FILE_SYNC writes.  Later we may add
- *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -482,11 +647,13 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int curpage, pgbase;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_write_data *data;
 		size_t bytes;
@@ -496,7 +663,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 			bytes = count;
 
 		data = list_entry(list->next, struct nfs_write_data, pages);
-		list_del_init(&data->pages);
+		list_move_tail(&data->pages, &dreq->rewrite_list);
 
 		data->inode = inode;
 		data->cred = ctx->cred;
@@ -512,7 +679,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 
 		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 				&nfs_write_direct_ops, data);
-		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
+		NFS_PROTO(inode)->write_setup(data, sync);
 
 		data->task.tk_priority = RPC_PRIORITY_NORMAL;
 		data->task.tk_cookie = (unsigned long) inode;
@@ -544,11 +711,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	int sync = 0;
 
-	dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize);
+	dreq = nfs_direct_write_alloc(count, wsize);
 	if (!dreq)
 		return -ENOMEM;
+	if (dreq->commit_data == NULL || count < wsize)
+		sync = FLUSH_STABLE;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	igrab(inode);
@@ -562,7 +736,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, user_addr, count, pos);
+	nfs_direct_write_schedule(dreq, sync);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 423f202b881c..9f84c8a5ea43 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -422,6 +422,7 @@ void nfs_commit_free(struct nfs_write_data *p);
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
+extern void nfs_commit_release(void *wdata);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
-- 
cgit v1.2.3


From a8881f5a5c723f82da84b786d3ca83a0df9e0c33 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:36 -0500
Subject: NFS: O_DIRECT async IO may lose context

The struct nfs_direct_req currently keeps a pointer to the file descriptor
without referencing it. This may cause problems if the parent process is
killed.

The nfs_open_context should normally have all the information that we're
currently using the filp for, and unlike fput(), is safe to release from
an rpciod process context.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f0f2053c7a61..e6585b9ff45a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -71,7 +71,7 @@ struct nfs_direct_req {
 	/* I/O parameters */
 	struct list_head	list,		/* nfs_read/write_data structs */
 				rewrite_list;	/* saved nfs_write_data structs */
-	struct file *		filp;		/* file descriptor */
+	struct nfs_open_context	*ctx;		/* file open context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
@@ -176,6 +176,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	INIT_LIST_HEAD(&dreq->list);
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
+	dreq->ctx = NULL;
 	spin_lock_init(&dreq->lock);
 	dreq->outstanding = 0;
 	dreq->count = 0;
@@ -188,6 +189,9 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+	if (dreq->ctx != NULL)
+		put_nfs_open_context(dreq->ctx);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
@@ -235,7 +239,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	} else
 		wake_up(&dreq->wait);
 
-	iput(dreq->inode);
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
 
@@ -317,10 +320,8 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  */
 static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
-	struct file *file = dreq->filp;
-	struct inode *inode = file->f_mapping->host;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t count = dreq->user_count;
@@ -396,9 +397,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
-	igrab(inode);
 	dreq->inode = inode;
-	dreq->filp = iocb->ki_filp;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
@@ -462,14 +462,11 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	struct file *file = dreq->filp;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
 	struct nfs_write_data *data = dreq->commit_data;
 	struct rpc_task *task = &data->task;
 
 	data->inode = dreq->inode;
-	data->cred = ctx->cred;
+	data->cred = dreq->ctx->cred;
 
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = dreq->pos;
@@ -641,10 +638,8 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  */
 static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 {
-	struct file *file = dreq->filp;
-	struct inode *inode = file->f_mapping->host;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t count = dreq->user_count;
@@ -725,9 +720,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
-	igrab(inode);
 	dreq->inode = inode;
-	dreq->filp = iocb->ki_filp;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-- 
cgit v1.2.3


From 5db3a7b2cabe8f0957683f798c4f8fa8605f9ebb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:37 -0500
Subject: NFS: Debugging code for nfs_direct_(read|write)_schedule()

Make sure that we're doing our list accounting correctly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6585b9ff45a..1e3725e51df1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -339,6 +339,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 		if (count < rsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_read_data, pages);
 		list_del_init(&data->pages);
 
@@ -378,6 +379,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
 static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
@@ -657,6 +659,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 		if (count < wsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_write_data, pages);
 		list_move_tail(&data->pages, &dreq->rewrite_list);
 
@@ -697,6 +700,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
 static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
-- 
cgit v1.2.3


From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:37 -0500
Subject: NFS: Uninline nfs_writedata_(alloc|free) and
 nfs_readdata_(alloc|free)

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c          | 32 +++++++++++++++++++++++-
 fs/nfs/write.c         | 32 +++++++++++++++++++++++-
 include/linux/nfs_fs.h | 67 +++-----------------------------------------------
 3 files changed, 66 insertions(+), 65 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2da255f0247f..3961524fd4ab 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -40,10 +40,40 @@ static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_rdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f7c8be0beccf..647e3217c2e1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -86,7 +86,7 @@ static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
@@ -119,6 +119,36 @@ void nfs_commit_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_commit_mempool);
 }
 
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_wdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_writedata_free(struct nfs_write_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_wdata_mempool);
+}
+
 void nfs_writedata_release(void *wdata)
 {
 	nfs_writedata_free(wdata);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9f84c8a5ea43..55de0770df4a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -462,37 +462,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
 /*
  * Allocate and free nfs_write_data structures
  */
-extern mempool_t *nfs_wdata_mempool;
-
-static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
-{
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		if (pagecount < NFS_PAGEVEC_SIZE)
-			p->pagevec = &p->page_array[0];
-		else {
-			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
-				mempool_free(p, nfs_wdata_mempool);
-				p = NULL;
-			}
-		}
-	}
-	return p;
-}
-
-static inline void nfs_writedata_free(struct nfs_write_data *p)
-{
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_wdata_mempool);
-}
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern void nfs_writedata_free(struct nfs_write_data *p);
 
 /*
  * linux/fs/nfs/read.c
@@ -503,41 +474,11 @@ extern int  nfs_readpages(struct file *, struct address_space *,
 extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
 extern void nfs_readdata_release(void *data);
 
-
 /*
  * Allocate and free nfs_read_data structures
  */
-extern mempool_t *nfs_rdata_mempool;
-
-static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
-{
-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		if (pagecount < NFS_PAGEVEC_SIZE)
-			p->pagevec = &p->page_array[0];
-		else {
-			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
-				mempool_free(p, nfs_rdata_mempool);
-				p = NULL;
-			}
-		}
-	}
-	return p;
-}
-
-static inline void nfs_readdata_free(struct nfs_read_data *p)
-{
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_rdata_mempool);
-}
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern void nfs_readdata_free(struct nfs_read_data *p);
 
 /*
  * linux/fs/nfs3proc.c
-- 
cgit v1.2.3


From 0996905f9301c2ff4c021982c42a15b35e74bf1c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:38 -0500
Subject: lockd: posix_test_lock() should not call locks_copy_lock()

The caller of posix_test_lock() should never need to look at the lock
private data, so do not copy that information. This also means that there
is no need to call the fl_release_private methods.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 231b23c12c3d..c83b5dbe0ed9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -239,17 +239,25 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 {
-	locks_release_private(new);
-
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
-	new->fl_file = fl->fl_file;
+	new->fl_file = NULL;
 	new->fl_flags = fl->fl_flags;
 	new->fl_type = fl->fl_type;
 	new->fl_start = fl->fl_start;
 	new->fl_end = fl->fl_end;
+	new->fl_ops = NULL;
+	new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	locks_release_private(new);
+
+	__locks_copy_lock(new, fl);
+	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
 
@@ -686,7 +694,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl,
 			break;
 	}
 	if (cfl) {
-		locks_copy_lock(conflock, cfl);
+		__locks_copy_lock(conflock, cfl);
 		unlock_kernel();
 		return 1;
 	}
-- 
cgit v1.2.3


From 09c7938c5640a6f22bef074ca6b803dccfdb93e3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:38 -0500
Subject: lockd: Fix server-side lock blocking code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 73 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d50946dcddd9..1d3a74df93f3 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -193,6 +193,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
+	block->b_call.a_args.lock.fl.fl_flags |= FL_SLEEP;
 	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
 	block->b_call.a_args.cookie = *cookie;	/* see above */
 
@@ -228,19 +229,18 @@ failed:
  * can be closed hereafter.
  */
 static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+nlmsvc_delete_block(struct nlm_block *block)
 {
 	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
 	struct nlm_file		*file = block->b_file;
 	struct nlm_block	**bp;
-	int status = 0;
+	int status;
 
 	dprintk("lockd: deleting block %p...\n", block);
 
 	/* Remove block from list */
 	nlmsvc_remove_block(block);
-	if (unlock)
-		status = posix_unblock_lock(file->f_file, fl);
+	status = posix_unblock_lock(file->f_file, fl);
 
 	/* If the block is in the middle of a GRANT callback,
 	 * don't kill it yet. */
@@ -282,7 +282,7 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 			block->b_host->h_inuse = 1;
 		else if (action == NLM_ACT_UNLOCK) {
 			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block, 1);
+				nlmsvc_delete_block(block);
 		}
 	}
 	up(&file->f_sema);
@@ -297,7 +297,7 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct nlm_block	*block;
+	struct nlm_block	*block, *newblock = NULL;
 	int			error;
 	u32			ret;
 
@@ -310,59 +310,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				wait);
 
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
-
+	lock->fl.fl_flags &= ~FL_SLEEP;
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
+	/* Get existing block (in case client is busy-waiting) */
+	block = nlmsvc_lookup_block(file, lock, 0);
+	if (block == NULL) {
+		if (newblock != NULL)
+			lock = &newblock->b_call.a_args.lock.fl;
+	} else
+		lock = &block->b_call.a_args.lock.fl;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
 
 	dprintk("lockd: posix_lock_file returned %d\n", error);
 
-	if (error != -EAGAIN) {
-		if (block)
-			nlmsvc_delete_block(block, 0);
-		up(&file->f_sema);
-
-		switch(-error) {
+	switch(error) {
 		case 0:
 			ret = nlm_granted;
 			goto out;
-		case EDEADLK:
+		case -EAGAIN:
+			break;
+		case -EDEADLK:
 			ret = nlm_deadlock;
 			goto out;
 		default:			/* includes ENOLCK */
 			ret = nlm_lck_denied_nolocks;
 			goto out;
-		}
 	}
 
-	if (!wait) {
-		ret = nlm_lck_denied;
-		goto out_unlock;
-	}
+	ret = nlm_lck_denied;
+	if (!wait)
+		goto out;
+
+	ret = nlm_lck_blocked;
+	if (block != NULL)
+		goto out;
 
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
 	 * to claim it while doing host garbage collection */
-	if (block == NULL) {
+	if (newblock == NULL) {
 		up(&file->f_sema);
 		dprintk("lockd: blocking on this lock (allocating).\n");
-		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+		if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
 			return nlm_lck_denied_nolocks;
 		goto again;
 	}
 
 	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
+	nlmsvc_insert_block(newblock, NLM_NEVER);
+	newblock = NULL;
 
-	ret = nlm_lck_blocked;
-out_unlock:
-	up(&file->f_sema);
 out:
+	up(&file->f_sema);
+	if (newblock != NULL)
+		nlmsvc_delete_block(newblock);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -445,7 +451,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 
 	down(&file->f_sema);
 	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block, 1);
+		status = nlmsvc_delete_block(block);
 	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
@@ -519,7 +525,11 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
+	posix_unblock_lock(file->f_file, &lock->fl);
+	lock->fl.fl_flags |= FL_SLEEP;
 	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
+
 	switch (error) {
 	case 0:
 		break;
@@ -630,11 +640,8 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
+			nlmsvc_delete_block(block);
 			up(&file->f_sema);
-			if (status == NLM_LCK_GRANTED)
-				nlmsvc_delete_block(block, 0);
-			else
-				nlmsvc_delete_block(block, 1);
 		}
 	}
 	nlm_release_file(file);
@@ -661,7 +668,7 @@ nlmsvc_retry_blocked(void)
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
 		if (block->b_done)
-			nlmsvc_delete_block(block, 0);
+			nlmsvc_delete_block(block);
 		else
 			nlmsvc_grant_blocked(block);
 	}
-- 
cgit v1.2.3


From 6849c0cab69f5d1a0fc7b05fa5bfb3dec53f86df Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Add refcounting to struct nlm_block

Otherwise, the block may disappear from underneath us when in
nlmsvc_retry_blocked.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c          | 94 ++++++++++++++++++++++-----------------------
 include/linux/lockd/lockd.h |  3 +-
 2 files changed, 47 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1d3a74df93f3..20caeceffb14 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,6 +39,7 @@
 #define nlm_deadlock	nlm_lck_denied
 #endif
 
+static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
@@ -58,6 +59,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
 	struct nlm_block **bp, *b;
 
 	dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+	kref_get(&block->b_count);
 	if (block->b_queued)
 		nlmsvc_remove_block(block);
 	bp = &nlm_blocked;
@@ -90,6 +92,7 @@ nlmsvc_remove_block(struct nlm_block *block)
 		if (b == block) {
 			*bp = block->b_next;
 			block->b_queued = 0;
+			nlmsvc_release_block(block);
 			return 1;
 		}
 	}
@@ -123,6 +126,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				*head = block->b_next;
 				block->b_queued = 0;
 			}
+			kref_get(&block->b_count);
 			return block;
 		}
 	}
@@ -155,6 +159,8 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
 			break;
 	}
 
+	if (block != NULL)
+		kref_get(&block->b_count);
 	return block;
 }
 
@@ -188,6 +194,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	memset(block, 0, sizeof(*block));
 	locks_init_lock(&block->b_call.a_args.lock.fl);
 	locks_init_lock(&block->b_call.a_res.lock.fl);
+	kref_init(&block->b_count);
 
 	if (!nlmclnt_setgrantargs(&block->b_call, lock))
 		goto failed_free;
@@ -228,27 +235,24 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static int
-nlmsvc_delete_block(struct nlm_block *block)
+static int nlmsvc_unlink_block(struct nlm_block *block)
 {
-	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
-	struct nlm_file		*file = block->b_file;
-	struct nlm_block	**bp;
 	int status;
-
-	dprintk("lockd: deleting block %p...\n", block);
+	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call.a_args.lock.fl);
 	nlmsvc_remove_block(block);
-	status = posix_unblock_lock(file->f_file, fl);
+	return status;
+}
 
-	/* If the block is in the middle of a GRANT callback,
-	 * don't kill it yet. */
-	if (block->b_incall) {
-		nlmsvc_insert_block(block, NLM_NEVER);
-		block->b_done = 1;
-		return status;
-	}
+static void nlmsvc_free_block(struct kref *kref)
+{
+	struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+	struct nlm_file		*file = block->b_file;
+	struct nlm_block	**bp;
+
+	dprintk("lockd: freeing block %p...\n", block);
 
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
@@ -262,7 +266,12 @@ nlmsvc_delete_block(struct nlm_block *block)
 		nlm_release_host(block->b_host);
 	nlmclnt_freegrantargs(&block->b_call);
 	kfree(block);
-	return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+	if (block != NULL)
+		kref_put(&block->b_count, nlmsvc_free_block);
 }
 
 /*
@@ -282,7 +291,7 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 			block->b_host->h_inuse = 1;
 		else if (action == NLM_ACT_UNLOCK) {
 			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block);
+				nlmsvc_unlink_block(block);
 		}
 	}
 	up(&file->f_sema);
@@ -318,9 +327,9 @@ again:
 	block = nlmsvc_lookup_block(file, lock, 0);
 	if (block == NULL) {
 		if (newblock != NULL)
-			lock = &newblock->b_call.a_args.lock.fl;
+			lock = &newblock->b_call.a_args.lock;
 	} else
-		lock = &block->b_call.a_args.lock.fl;
+		lock = &block->b_call.a_args.lock;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -363,12 +372,10 @@ again:
 
 	/* Append to list of blocked */
 	nlmsvc_insert_block(newblock, NLM_NEVER);
-	newblock = NULL;
-
 out:
 	up(&file->f_sema);
-	if (newblock != NULL)
-		nlmsvc_delete_block(newblock);
+	nlmsvc_release_block(newblock);
+	nlmsvc_release_block(block);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -450,8 +457,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block);
+	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) {
+		status = nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+	}
 	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
@@ -514,7 +523,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	down(&file->f_sema);
 
 	/* Unlink block request from list */
-	nlmsvc_remove_block(block);
+	nlmsvc_unlink_block(block);
 
 	/* If b_granted is true this means we've been here before.
 	 * Just retry the grant callback, possibly refreshing the RPC
@@ -525,7 +534,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
-	posix_unblock_lock(file->f_file, &lock->fl);
 	lock->fl.fl_flags |= FL_SLEEP;
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -548,16 +556,15 @@ callback:
 	/* Lock was granted by VFS. */
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
-	block->b_incall  = 1;
 
 	/* Schedule next grant callback in 30 seconds */
 	nlmsvc_insert_block(block, 30 * HZ);
 
 	/* Call the client */
-	nlm_get_host(block->b_call.a_host);
+	kref_get(&block->b_count);
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
-		nlm_release_host(block->b_call.a_host);
+		nlmsvc_release_block(block);
 out_unlock:
 	up(&file->f_sema);
 }
@@ -573,20 +580,10 @@ out_unlock:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block;
+	struct nlm_block	*block = container_of(call, struct nlm_block, b_call);
 	unsigned long		timeout;
-	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
-	dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
-		nlmdbg_cookie2a(&call->a_args.cookie),
-		NIPQUAD(peer_addr->sin_addr.s_addr));
-	if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
-		dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
-			nlmdbg_cookie2a(&call->a_args.cookie),
-			NIPQUAD(peer_addr->sin_addr.s_addr));
-		return;
-	}
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -603,9 +600,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	block->b_incall = 0;
-
-	nlm_release_host(call->a_host);
+	nlmsvc_release_block(block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
@@ -631,20 +626,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 
 	file->f_count++;
 	down(&file->f_sema);
-	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
-			up(&file->f_sema);
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
-			nlmsvc_delete_block(block);
-			up(&file->f_sema);
+			nlmsvc_unlink_block(block);
 		}
 	}
+	up(&file->f_sema);
 	nlm_release_file(file);
+	nlmsvc_release_block(block);
 }
 
 /*
@@ -667,10 +661,12 @@ nlmsvc_retry_blocked(void)
 			break;
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
+		kref_get(&block->b_count);
 		if (block->b_done)
-			nlmsvc_delete_block(block);
+			nlmsvc_unlink_block(block);
 		else
 			nlmsvc_grant_blocked(block);
+		nlmsvc_release_block(block);
 	}
 
 	if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ef21ed296039..08ab9773f762 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/in.h>
 #include <linux/fs.h>
+#include <linux/kref.h>
 #include <linux/utsname.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
@@ -110,6 +111,7 @@ struct nlm_file {
  */
 #define NLM_NEVER		(~(unsigned long) 0)
 struct nlm_block {
+	struct kref		b_count;	/* Reference count */
 	struct nlm_block *	b_next;		/* linked list (all blocks) */
 	struct nlm_block *	b_fnext;	/* linked list (per file) */
 	struct nlm_rqst		b_call;		/* RPC args & callback info */
@@ -119,7 +121,6 @@ struct nlm_block {
 	unsigned int		b_id;		/* block id */
 	unsigned char		b_queued;	/* re-queued */
 	unsigned char		b_granted;	/* VFS granted lock */
-	unsigned char		b_incall;	/* doing callback */
 	unsigned char		b_done;		/* callback complete */
 	struct nlm_file *	b_file;		/* file in question */
 };
-- 
cgit v1.2.3


From 5e1abf8cb713a0b94f5a400c7b9b797990cd9dec Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Clean up of the server-side GRANTED code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         | 43 ----------------------------------
 fs/lockd/svclock.c          | 56 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/lockd/lockd.h |  2 --
 3 files changed, 53 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index acc3eb13a02b..80ae3127699f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -147,49 +147,6 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 		fl->fl_ops->fl_release_private(fl);
 }
 
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
-	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
-	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
-	call->a_args.lock.oh.len = lock->oh.len;
-
-	/* set default data area */
-	call->a_args.lock.oh.data = call->a_owner;
-	call->a_args.lock.svid = lock->fl.fl_pid;
-
-	if (lock->oh.len > NLMCLNT_OHSIZE) {
-		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmclnt_freegrantargs(call);
-			return 0;
-		}
-		call->a_args.lock.oh.data = (u8 *) data;
-	}
-
-	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
-	return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
-		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-}
-
 /*
  * This is the main entry point for the NLM client.
  */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 20caeceffb14..3c7dd956d9c1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -43,6 +43,8 @@ static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
@@ -196,7 +198,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	locks_init_lock(&block->b_call.a_res.lock.fl);
 	kref_init(&block->b_count);
 
-	if (!nlmclnt_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(&block->b_call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
@@ -264,7 +266,7 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	if (block->b_host)
 		nlm_release_host(block->b_host);
-	nlmclnt_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(&block->b_call);
 	kfree(block);
 }
 
@@ -298,6 +300,49 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 	return 0;
 }
 
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.oh.len = lock->oh.len;
+
+	/* set default data area */
+	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
+
+	if (lock->oh.len > NLMCLNT_OHSIZE) {
+		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+		if (!data) {
+			nlmsvc_freegrantargs(call);
+			return 0;
+		}
+		call->a_args.lock.oh.data = (u8 *) data;
+	}
+
+	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+	return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+	struct file_lock *fl = &call->a_args.lock.fl;
+	/*
+	 * Check whether we allocated memory for the owner.
+	 */
+	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
+		kfree(call->a_args.lock.oh.data);
+	}
+	if (fl->fl_ops && fl->fl_ops->fl_release_private)
+		fl->fl_ops->fl_release_private(fl);
+	if (fl->fl_lmops && fl->fl_lmops->fl_release_private)
+		fl->fl_lmops->fl_release_private(fl);
+}
+
 /*
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
@@ -600,11 +645,16 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	nlmsvc_release_block(block);
+}
+
+void nlmsvc_grant_release(void *data)
+{
+	nlmsvc_release_block(data);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
 	.rpc_call_done = nlmsvc_grant_callback,
+	.rpc_release = nlmsvc_grant_release,
 };
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 08ab9773f762..860a93f6ce6d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -153,8 +153,6 @@ long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
 u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-int		  nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
-void		  nlmclnt_freegrantargs(struct nlm_rqst *);
 
 /*
  * Host cache
-- 
cgit v1.2.3


From 686517f1ad1630c11964d668b556aab79b8c942e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Make nlmsvc_create_block() use nlmsvc_lookup_host()

Currently it uses nlmclnt_lookup_host(), which puts the resulting host
structure on a different list.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svc4proc.c | 3 +--
 fs/lockd/svclock.c  | 3 +--
 fs/lockd/svcproc.c  | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06a..ac4a700af01a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -483,8 +483,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	if (!(call = nlmclnt_alloc_call()))
 		return rpc_system_err;
 
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (!host) {
 		kfree(call);
 		return rpc_system_err;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3c7dd956d9c1..a95d260b7137 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -185,8 +185,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	struct nlm_rqst		*call;
 
 	/* Create host handle for callback */
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1fc..4986fbe44540 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -508,8 +508,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	if (!(call = nlmclnt_alloc_call()))
 		return rpc_system_err;
 
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (!host) {
 		kfree(call);
 		return rpc_system_err;
-- 
cgit v1.2.3


From 04266473ecf5cdca242201d9f1ed890afe070fb6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:40 -0500
Subject: lockd: Make lockd use rpc_new_client() instead of rpc_create_client

When doing NLM_GRANTED requests, lockd may end up blocking if we use
rpc_create_client() due to the synchronous call to rpc_ping(). Instead, use
rpc_new_client().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8ae..100e78229700 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -191,11 +191,12 @@ nlm_bind_host(struct nlm_host *host)
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
 		if (IS_ERR(clnt))
 			goto forgetit;
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
+		clnt->cl_softrtry = 1; /* All queries are soft */
 
 		host->h_rpcclnt = clnt;
 	}
-- 
cgit v1.2.3


From 26bcbf965f857c710adafd16cf424f043006b5dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Mar 2006 13:44:40 -0500
Subject: lockd: stop abusing file_lock_list

Currently lockd directly access the file_lock_list from fs/locks.c.
It does so to mark locks granted or reclaimable.  This is very
suboptimal, because a) lockd needs to poke into locks.c internals, and
b) it needs to iterate over all locks in the system for marking locks
granted or reclaimable.

This patch adds lists for granted and reclaimable locks to the nlm_host
structure instead, and adds locks to those.

nlmclnt_lock:
	now adds the lock to h_granted instead of setting the
	NFS_LCK_GRANTED, still O(1)

nlmclnt_mark_reclaim:
	goes away completely, replaced by a list_splice_init.
	Complexity reduced from O(locks in the system) to O(1)

reclaimer:
	iterates over h_reclaim now, complexity reduced from
	O(locks in the system) to O(locks per nlm_host)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c         | 54 ++++++++-------------------------------------
 fs/lockd/clntproc.c         | 11 ++++-----
 fs/lockd/host.c             |  2 ++
 fs/locks.c                  |  5 +----
 include/linux/fs.h          |  2 --
 include/linux/lockd/lockd.h |  2 ++
 include/linux/nfs_fs_i.h    |  8 +------
 7 files changed, 21 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8ae79ae4b998..0fc0ee267b04 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -154,34 +154,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
  * server crash.
  */
 
-/*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- *        Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
-	struct file_lock *fl;
-	struct inode *inode;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
-			continue;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
-	}
-}
-
 /*
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
@@ -194,7 +166,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
 	host->h_state++;
 	host->h_nextrebind = 0;
 	nlm_rebind_host(host);
-	nlmclnt_mark_reclaim(host);
+
+	/*
+	 * Mark the locks for reclaiming.
+	 */
+	list_splice_init(&host->h_granted, &host->h_reclaim);
+
 	dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
@@ -223,9 +200,7 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
-	struct list_head *tmp;
-	struct file_lock *fl;
-	struct inode *inode;
+	struct file_lock *fl, *next;
 
 	daemonize("%s-reclaim", host->h_name);
 	allow_signal(SIGKILL);
@@ -237,20 +212,9 @@ reclaimer(void *ptr)
 
 	/* First, reclaim all locks that have been marked. */
 restart:
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
-			continue;
+	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+		list_del(&fl->fl_u.nfs_fl.list);
 
-		fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
 		nlmclnt_reclaim(host, fl);
 		if (signalled())
 			break;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 80ae3127699f..cb469431bd1d 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -465,7 +465,6 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 {
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
-	fl->fl_u.nfs_fl.flags = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
@@ -552,8 +551,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
 		fl->fl_flags |= FL_SLEEP;
+		list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
@@ -619,9 +618,11 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/* Clean the GRANTED flag now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call. */
-	fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
+	/*
+	 * Remove from the granted list now so the lock doesn't get
+	 * reclaimed while we're stuck in the unlock call.
+	 */
+	list_del(&fl->fl_u.nfs_fl.list);
 
 	/*
 	 * Note: the server is supposed to either grant us the unlock
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 100e78229700..f456f8ed9acd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 	nlm_hosts[hash]    = host;
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
+	INIT_LIST_HEAD(&host->h_granted);
+	INIT_LIST_HEAD(&host->h_reclaim);
 
 	if (++nrhosts > NLM_HOST_MAX)
 		next_gc = 0;
diff --git a/fs/locks.c b/fs/locks.c
index c83b5dbe0ed9..56f996e98bbc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
 static kmem_cache_t *filelock_cache;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d2cffee8fc11..5dc0fa288a4c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -730,8 +730,6 @@ struct file_lock {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-extern struct list_head file_lock_list;
-
 #include <linux/fcntl.h>
 
 extern int fcntl_getlk(struct file *, struct flock __user *);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 860a93f6ce6d..b0f63b6ab0d4 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -59,6 +59,8 @@ struct nlm_host {
 	unsigned long		h_expires;	/* eligible for GC */
 	struct list_head	h_lockowners;	/* Lockowners for the client */
 	spinlock_t		h_lock;
+	struct list_head	h_granted;	/* Locks in GRANTED state */
+	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
 };
 
 /*
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e2c18dabff86..861730275ba0 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -12,8 +12,8 @@ struct nlm_lockowner;
  */
 struct nfs_lock_info {
 	u32		state;
-	u32		flags;
 	struct nlm_lockowner *owner;
+	struct list_head list;
 };
 
 struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
 	struct nfs4_lock_state *owner;
 };
 
-/*
- * Lock flag values
- */
-#define NFS_LCK_GRANTED		0x0001		/* lock has been granted */
-#define NFS_LCK_RECLAIM		0x0002		/* lock marked for reclaiming */
-
 #endif
-- 
cgit v1.2.3


From 4c060b531006e0711db32a132d6ac7661594b280 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:41 -0500
Subject: lockd: Fix Oopses due to list manipulation errors.

The patch "stop abusing file_lock_list introduces a couple of bugs since
the locks may be copied and need to be removed from the lists when they are
destroyed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c |  7 ++++---
 fs/lockd/clntproc.c | 15 ++++++---------
 fs/lockd/host.c     |  7 +++++--
 3 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 0fc0ee267b04..7cf41c1e1a88 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -213,11 +213,12 @@ reclaimer(void *ptr)
 	/* First, reclaim all locks that have been marked. */
 restart:
 	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
-		list_del(&fl->fl_u.nfs_fl.list);
+		list_del_init(&fl->fl_u.nfs_fl.list);
 
-		nlmclnt_reclaim(host, fl);
 		if (signalled())
-			break;
+			continue;
+		if (nlmclnt_reclaim(host, fl) == 0)
+			list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		goto restart;
 	}
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index cb469431bd1d..3e90356b4882 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -446,12 +446,14 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-	memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
-	nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+	new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+	new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+	list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 	fl->fl_ops = NULL;
 }
@@ -466,6 +468,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+	INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
 
@@ -552,7 +555,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
 		fl->fl_flags |= FL_SLEEP;
-		list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
+		/* Ensure the resulting lock will get added to granted list */
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
@@ -618,12 +621,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/*
-	 * Remove from the granted list now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call.
-	 */
-	list_del(&fl->fl_u.nfs_fl.list);
-
 	/*
 	 * Note: the server is supposed to either grant us the unlock
 	 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f456f8ed9acd..112ebf8b8dfe 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -245,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
 {
 	if (host != NULL) {
 		dprintk("lockd: release host %s\n", host->h_name);
-		atomic_dec(&host->h_count);
 		BUG_ON(atomic_read(&host->h_count) < 0);
+		if (atomic_dec_and_test(&host->h_count)) {
+			BUG_ON(!list_empty(&host->h_lockowners));
+			BUG_ON(!list_empty(&host->h_granted));
+			BUG_ON(!list_empty(&host->h_reclaim));
+		}
 	}
 }
 
@@ -334,7 +338,6 @@ nlm_gc_hosts(void)
 					rpc_destroy_client(host->h_rpcclnt);
 				}
 			}
-			BUG_ON(!list_empty(&host->h_lockowners));
 			kfree(host);
 			nrhosts--;
 		}
-- 
cgit v1.2.3


From 35576cba57f1c042b87d6586b3229d13067264c6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:41 -0500
Subject: NLM: nlmclnt_cancel_callback should accept NLM_LCK_DENIED errors

NLM_LCK_DENIED is a valid error return for an NLM_CANCEL call by the
client.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3e90356b4882..c25044f3b660 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -750,6 +750,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	switch (req->a_res.status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
+	case NLM_LCK_DENIED:
 		/* Everything's good */
 		break;
 	case NLM_LCK_DENIED_NOLOCKS:
-- 
cgit v1.2.3


From 606bbba06b11ebcbdf3a4fcd8cce4507c5bd7a4b Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:42 -0500
Subject: NFS: fix compiler warnings on 64-bit platforms

Introduced by NFS aio+dio patches.

Test plan:
Compile kernel with CONFIG_NFS enabled on 64-bit hardware.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1e3725e51df1..b4bbf6d75923 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -365,7 +365,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
@@ -686,7 +686,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
-- 
cgit v1.2.3


From 6b45d858ed6821dd687efd3b68929de2e4954fec Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:43 -0500
Subject: NFS: Clean up nfs_get_user_pages

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 59 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b4bbf6d75923..58830429e42a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -59,7 +59,6 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
 
 /*
@@ -121,6 +120,18 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		struct page *page = pages[i];
+		if (do_dirty && !PageCompound(page))
+			set_page_dirty_lock(page);
+		page_cache_release(page);
+	}
+	kfree(pages);
+}
+
 static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
@@ -138,31 +149,23 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 					page_count, (rw == READ), 0,
 					*pages, NULL);
 		up_read(&current->mm->mmap_sem);
-		/*
-		 * If we got fewer pages than expected from get_user_pages(),
-		 * the user buffer runs off the end of a mapping; return EFAULT.
-		 */
-		if (result >= 0 && result < page_count) {
-			nfs_free_user_pages(*pages, result, 0);
+		if (result != page_count) {
+			/*
+			 * If we got fewer pages than expected from
+			 * get_user_pages(), the user buffer runs off the
+			 * end of a mapping; return EFAULT.
+			 */
+			if (result >= 0) {
+				nfs_free_user_pages(*pages, result, 0);
+				result = -EFAULT;
+			} else
+				kfree(*pages);
 			*pages = NULL;
-			result = -EFAULT;
 		}
 	}
 	return result;
 }
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
-	int i;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (do_dirty && !PageCompound(page))
-			set_page_dirty_lock(page);
-		page_cache_release(page);
-	}
-	kfree(pages);
-}
-
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
@@ -788,13 +791,11 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (retval)
 		goto out;
 
-	page_count = nfs_get_user_pages(READ, (unsigned long) buf,
+	retval = nfs_get_user_pages(READ, (unsigned long) buf,
 						count, &pages);
-	if (page_count < 0) {
-		nfs_free_user_pages(pages, 0, 0);
-		retval = page_count;
+	if (retval < 0)
 		goto out;
-	}
+	page_count = retval;
 
 	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
 						pages, page_count);
@@ -862,13 +863,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	if (retval)
 		goto out;
 
-	page_count = nfs_get_user_pages(WRITE, (unsigned long) buf,
+	retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
 						count, &pages);
-	if (page_count < 0) {
-		nfs_free_user_pages(pages, 0, 0);
-		retval = page_count;
+	if (retval < 0)
 		goto out;
-	}
+	page_count = retval;
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
-- 
cgit v1.2.3


From d72b7a6b26b9009b7a05117fe2e04b3a73ae4a5c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:43 -0500
Subject: NFS: O_DIRECT needs to use a completion

Now that we have aio writes, it is possible for dreq->outstanding to be
zero, but for the I/O not to have completed. Convert struct nfs_direct_req
to use a completion to signal when the I/O is done.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 58830429e42a..cbef57a16ffb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -72,7 +72,6 @@ struct nfs_direct_req {
 				rewrite_list;	/* saved nfs_write_data structs */
 	struct nfs_open_context	*ctx;		/* file open context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
-	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
 	unsigned long		user_addr;	/* location of user's buffer */
 	size_t			user_count;	/* total bytes to move */
@@ -85,6 +84,7 @@ struct nfs_direct_req {
 	int			outstanding;	/* i/os we're waiting for */
 	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+	struct completion	completion;	/* wait for i/o completion */
 
 	/* commit state */
 	struct nfs_write_data *	commit_data;	/* special write_data for commits */
@@ -175,7 +175,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 		return NULL;
 
 	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
+	init_completion(&dreq->completion);
 	INIT_LIST_HEAD(&dreq->list);
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
@@ -209,7 +209,7 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 	if (dreq->iocb)
 		goto out;
 
-	result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0));
+	result = wait_for_completion_interruptible(&dreq->completion);
 
 	if (!result)
 		result = dreq->error;
@@ -239,8 +239,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 		if (!res)
 			res = (long) dreq->count;
 		aio_complete(dreq->iocb, res, 0);
-	} else
-		wake_up(&dreq->wait);
+	}
+	complete_all(&dreq->completion);
 
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
-- 
cgit v1.2.3


From 3a649b884637c4fdff50a6beebc3dc0e6082e048 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:44 -0500
Subject: NLM: Simplify client locks

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c         | 43 ++++++++++++++++---------------------------
 fs/lockd/clntproc.c         | 39 ++++++++++++++++-----------------------
 include/linux/lockd/lockd.h |  7 +++----
 3 files changed, 35 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 7cf41c1e1a88..bce744468708 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
 {
 	struct nlm_wait *block;
 
-	BUG_ON(req->a_block != NULL);
 	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block == NULL)
-		return -ENOMEM;
-	block->b_host = host;
-	block->b_lock = fl;
-	init_waitqueue_head(&block->b_wait);
-	block->b_status = NLM_LCK_BLOCKED;
-
-	list_add(&block->b_list, &nlm_blocked);
-	req->a_block = block;
-
-	return 0;
+	if (block != NULL) {
+		block->b_host = host;
+		block->b_lock = fl;
+		init_waitqueue_head(&block->b_wait);
+		block->b_status = NLM_LCK_BLOCKED;
+		list_add(&block->b_list, &nlm_blocked);
+	}
+	return block;
 }
 
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block = req->a_block;
-
 	if (block == NULL)
 		return;
-	req->a_block = NULL;
 	list_del(&block->b_list);
 	kfree(block);
 }
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
 /*
  * Block on a lock
  */
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
-	struct nlm_wait	*block = req->a_block;
 	long ret;
 
 	/* A borken server might ask us to block even if we didn't
 	 * request it. Just say no!
 	 */
-	if (!req->a_args.block)
+	if (block == NULL)
 		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
 	ret = wait_event_interruptible_timeout(block->b_wait,
 			block->b_status != NLM_LCK_BLOCKED,
 			timeout);
-
-	if (block->b_status != NLM_LCK_BLOCKED) {
-		req->a_res.status = block->b_status;
-		block->b_status = NLM_LCK_BLOCKED;
-	}
-
-	return ret;
+	if (ret < 0)
+		return -ERESTARTSYS;
+	req->a_res.status = block->b_status;
+	return 0;
 }
 
 /*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index c25044f3b660..8af017105854 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -136,15 +136,14 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
 				system_utsname.nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
-	locks_copy_lock(&lock->fl, fl);
+	lock->fl.fl_start = fl->fl_start;
+	lock->fl.fl_end = fl->fl_end;
+	lock->fl.fl_type = fl->fl_type;
 }
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-	struct file_lock *fl = &req->a_args.lock.fl;
-
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
+	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /*
@@ -455,7 +454,6 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
 	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
-	fl->fl_ops = NULL;
 }
 
 static struct file_lock_operations nlmclnt_lock_ops = {
@@ -515,41 +513,36 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	long timeout;
-	int status;
+	struct nlm_wait *block = NULL;
+	int status = -ENOLCK;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
 					host->h_name);
-		status = -ENOLCK;
 		goto out;
 	}
 
-	if (req->a_args.block) {
-		status = nlmclnt_prepare_block(req, host, fl);
-		if (status < 0)
-			goto out;
-	}
+	block = nlmclnt_prepare_block(host, fl);
 	for(;;) {
 		status = nlmclnt_call(req, NLMPROC_LOCK);
 		if (status < 0)
 			goto out_unblock;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (!req->a_args.block)
 			break;
-		/* Wait on an NLM blocking lock */
-		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
 		/* Did a reclaimer thread notify us of a server reboot? */
 		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
 			continue;
 		if (resp->status != NLM_LCK_BLOCKED)
 			break;
-		if (timeout >= 0)
-			continue;
-		/* We were interrupted. Send a CANCEL request to the server
+		/* Wait on an NLM blocking lock */
+		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		/* if we were interrupted. Send a CANCEL request to the server
 		 * and exit
 		 */
-		status = (int)timeout;
-		goto out_unblock;
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
 	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
@@ -560,7 +553,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	}
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
-	nlmclnt_finish_block(req);
+	nlmclnt_finish_block(block);
 	/* Cancel the blocked request if it is still pending */
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b0f63b6ab0d4..cb9933d04091 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -86,7 +86,6 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
-	struct nlm_wait *	a_block;
 	unsigned int		a_retries;	/* Retry count */
 	char			a_owner[NLMCLNT_OHSIZE];
 };
@@ -149,9 +148,9 @@ extern unsigned long		nlmsvc_timeout;
  * Lockd client functions
  */
 struct nlm_rqst * nlmclnt_alloc_call(void);
-int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
-void		  nlmclnt_finish_block(struct nlm_rqst *req);
-long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
+struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
+void		  nlmclnt_finish_block(struct nlm_wait *block);
+int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
 u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-- 
cgit v1.2.3


From e4cd038a45a46ffbe06a1a72f3f15246e5b041ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:44 -0500
Subject: NLM: Fix nlmclnt_test to not copy private part of locks

The struct file_lock does not carry a properly initialised lock,
so don't copy it as if it were.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 4 +++-
 fs/nfs/file.c       | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8af017105854..7a239864b8bf 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -434,7 +434,9 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 		/*
 		 * Report the conflicting lock back to the application.
 		 */
-		locks_copy_lock(fl, &req->a_res.lock.fl);
+		fl->fl_start = req->a_res.lock.fl.fl_start;
+		fl->fl_end = req->a_res.lock.fl.fl_start;
+		fl->fl_type = req->a_res.lock.fl.fl_type;
 		fl->fl_pid = 0;
 	} else {
 		return nlm_stat_to_errno(req->a_res.status);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ee140c53dba6..6bcbc4d676c4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -399,7 +399,10 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 	lock_kernel();
 	/* Try local locking first */
 	if (posix_test_lock(filp, fl, &cfl)) {
-		locks_copy_lock(fl, &cfl);
+		fl->fl_start = cfl.fl_start;
+		fl->fl_end = cfl.fl_end;
+		fl->fl_type = cfl.fl_type;
+		fl->fl_pid = cfl.fl_pid;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 92737230dd3f1478033819d4bc20339f8da852da Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: NLM: Add nlmclnt_release_call

Add a helper function to simplify the freeing of NLM client requests.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         | 195 +++++++++++++++++---------------------------
 fs/lockd/svc4proc.c         |  34 ++++----
 fs/lockd/svclock.c          |  66 +++++++--------
 fs/lockd/svcproc.c          |  33 +++-----
 include/linux/lockd/lockd.h |  10 ++-
 5 files changed, 135 insertions(+), 203 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 7a239864b8bf..3f8ad7c54efa 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -152,9 +152,8 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
 	struct nlm_host		*host;
-	struct nlm_rqst		reqst, *call = &reqst;
+	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
 	int			status, proto, vers;
@@ -168,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
 
-	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	if (host == NULL)
 		return -ENOLCK;
 
-	/* Create RPC client handle if not there, and copy soft
-	 * and intr flags from NFS client. */
-	if (host->h_rpcclnt == NULL) {
-		struct rpc_clnt	*clnt;
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return -ENOMEM;
 
-		/* Bind an rpc client to this host handle (does not
-		 * perform a portmapper lookup) */
-		if (!(clnt = nlm_bind_host(host))) {
-			status = -ENOLCK;
-			goto done;
-		}
-		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr = nfssrv->client->cl_intr;
-	}
+	nlmclnt_locks_init_private(fl, host);
+	/* Set up the argument struct */
+	nlmclnt_setlockargs(call, fl);
 
 	/* Keep the old signal mask */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -197,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-		call = nlmclnt_alloc_call();
-		if (!call) {
-			status = -ENOMEM;
-			goto out_restore;
-		}
 		call->a_flags = RPC_TASK_ASYNC;
-	} else {
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-		memset(call, 0, sizeof(*call));
-		locks_init_lock(&call->a_args.lock.fl);
-		locks_init_lock(&call->a_res.lock.fl);
 	}
-	call->a_host = host;
-
-	nlmclnt_locks_init_private(fl, host);
-
-	/* Set up the argument struct */
-	nlmclnt_setlockargs(call, fl);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -229,24 +206,26 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	else
 		status = -EINVAL;
 
- out_restore:
+	fl->fl_ops->fl_release_private(fl);
+	fl->fl_ops = NULL;
+
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-done:
 	dprintk("lockd: clnt proc returns %d\n", status);
-	nlm_release_host(host);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
  */
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
 	struct nlm_rqst	*call;
 
@@ -255,16 +234,30 @@ nlmclnt_alloc_call(void)
 		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
+			call->a_host = host;
 			return call;
 		}
 		if (signalled())
 			break;
-		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
+	nlm_release_host(host);
 	return NULL;
 }
 
+void nlm_release_call(struct nlm_rqst *call)
+{
+	nlm_release_host(call->a_host);
+	nlmclnt_release_lockargs(call);
+	kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+	return nlm_release_call(data);
+}
+
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 {
 	DEFINE_WAIT(wait);
@@ -361,7 +354,7 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -369,48 +362,23 @@ int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops
 		.rpc_argp	= &req->a_args,
 		.rpc_resp	= &req->a_res,
 	};
-	int		status;
-
-	dprintk("lockd: call procedure %d on %s (async)\n",
-			(int)proc, host->h_name);
-
-	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
-
-        /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
-	return status;
-}
-
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
-{
-	struct nlm_host	*host = req->a_host;
-	struct rpc_clnt	*clnt;
-	struct nlm_args	*argp = &req->a_args;
-	struct nlm_res	*resp = &req->a_res;
-	struct rpc_message msg = {
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	int		status;
+	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
 			(int)proc, host->h_name);
 
 	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
+	clnt = nlm_bind_host(host);
+	if (clnt == NULL)
+		goto out_err;
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 
-	/* Increment host refcount */
-	nlm_get_host(host);
         /* bootstrap and kick off the async RPC call */
         status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-	if (status < 0)
-		nlm_release_host(host);
+	if (status == 0)
+		return 0;
+out_err:
+	nlm_release_call(req);
 	return status;
 }
 
@@ -423,26 +391,28 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 	int	status;
 
 	status = nlmclnt_call(req, NLMPROC_TEST);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
-	status = req->a_res.status;
-	if (status == NLM_LCK_GRANTED) {
-		fl->fl_type = F_UNLCK;
-	} if (status == NLM_LCK_DENIED) {
-		/*
-		 * Report the conflicting lock back to the application.
-		 */
-		fl->fl_start = req->a_res.lock.fl.fl_start;
-		fl->fl_end = req->a_res.lock.fl.fl_start;
-		fl->fl_type = req->a_res.lock.fl.fl_type;
-		fl->fl_pid = 0;
-	} else {
-		return nlm_stat_to_errno(req->a_res.status);
+	switch (req->a_res.status) {
+		case NLM_LCK_GRANTED:
+			fl->fl_type = F_UNLCK;
+			break;
+		case NLM_LCK_DENIED:
+			/*
+			 * Report the conflicting lock back to the application.
+			 */
+			fl->fl_start = req->a_res.lock.fl.fl_start;
+			fl->fl_end = req->a_res.lock.fl.fl_start;
+			fl->fl_type = req->a_res.lock.fl.fl_type;
+			fl->fl_pid = 0;
+			break;
+		default:
+			status = nlm_stat_to_errno(req->a_res.status);
 	}
-
-	return 0;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
@@ -560,7 +530,7 @@ out_unblock:
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
-	nlmclnt_release_lockargs(req);
+	nlm_release_call(req);
 	return status;
 }
 
@@ -623,32 +593,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	 */
 	do_vfs_lock(fl);
 
-	if (req->a_flags & RPC_TASK_ASYNC) {
-		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					&nlmclnt_unlock_ops);
-		/* Hrmf... Do the unlock early since locks_remove_posix()
-		 * really expects us to free the lock synchronously */
-		if (status < 0) {
-			nlmclnt_release_lockargs(req);
-			kfree(req);
-		}
-		return status;
-	}
+	if (req->a_flags & RPC_TASK_ASYNC)
+		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 
 	status = nlmclnt_call(req, NLMPROC_UNLOCK);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
+	status = 0;
 	if (resp->status == NLM_LCK_GRANTED)
-		return 0;
+		goto out;
 
 	if (resp->status != NLM_LCK_DENIED_NOLOCKS)
 		printk("lockd: unexpected unlock status: %d\n", resp->status);
-
 	/* What to do now? I'm out of my depth... */
-
-	return -ENOLCK;
+	status = -ENOLCK;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -670,9 +632,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 	if (status != NLM_LCK_GRANTED)
 		printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
  retry_rebind:
 	nlm_rebind_host(req->a_host);
@@ -682,6 +641,7 @@ die:
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
 	.rpc_call_done = nlmclnt_unlock_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
@@ -703,20 +663,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	req = nlmclnt_alloc_call();
+	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
 		return -ENOMEM;
-	req->a_host  = host;
 	req->a_flags = RPC_TASK_ASYNC;
 
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-	if (status < 0) {
-		nlmclnt_release_lockargs(req);
-		kfree(req);
-	}
+	status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
@@ -757,9 +712,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	}
 
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
 
 retry_cancel:
@@ -773,6 +725,7 @@ retry_cancel:
 
 static const struct rpc_call_ops nlmclnt_cancel_ops = {
 	.rpc_call_done = nlmclnt_cancel_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index ac4a700af01a..cb51c7025825 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -480,43 +480,37 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 
-	if (!(call = nlmclnt_alloc_call()))
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
 		return rpc_system_err;
 
-	host = nlmsvc_lookup_host(rqstp);
-	if (!host) {
-		kfree(call);
+	call = nlm_alloc_call(host);
+	if (call == NULL)
 		return rpc_system_err;
-	}
+
 
 	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		goto error;
-
+	if (nlm_async_call(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
 	return rpc_success;
- error:
-	kfree(call);
-	nlm_release_host(host);
-	return rpc_system_err;
 }
 
 static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = data;
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
 
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
 }
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
 	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
 };
 
 /*
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a95d260b7137..185bf7ea1c0c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -117,12 +117,12 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end, lock->fl.fl_type);
 	for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
-		fl = &block->b_call.a_args.lock.fl;
+		fl = &block->b_call->a_args.lock.fl;
 		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
 				block->b_file, fl->fl_pid,
 				(long long)fl->fl_start,
 				(long long)fl->fl_end, fl->fl_type,
-				nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
 			if (remove) {
 				*head = block->b_next;
@@ -156,7 +156,7 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
 	for (block = nlm_blocked; block; block = block->b_next) {
 		dprintk("cookie: head of blocked queue %p, block %p\n", 
 			nlm_blocked, block);
-		if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
 				&& nlm_cmp_addr(sin, &block->b_host->h_addr))
 			break;
 	}
@@ -182,28 +182,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 {
 	struct nlm_block	*block;
 	struct nlm_host		*host;
-	struct nlm_rqst		*call;
+	struct nlm_rqst		*call = NULL;
 
 	/* Create host handle for callback */
 	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return NULL;
+
 	/* Allocate memory for block, and initialize arguments */
-	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
 		goto failed;
-	memset(block, 0, sizeof(*block));
-	locks_init_lock(&block->b_call.a_args.lock.fl);
-	locks_init_lock(&block->b_call.a_res.lock.fl);
 	kref_init(&block->b_count);
 
-	if (!nlmsvc_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
-	block->b_call.a_args.lock.fl.fl_flags |= FL_SLEEP;
-	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
-	block->b_call.a_args.cookie = *cookie;	/* see above */
+	call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+	call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+	call->a_args.cookie = *cookie;	/* see above */
 
 	dprintk("lockd: created block %p...\n", block);
 
@@ -217,16 +219,16 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	file->f_blocks  = block;
 
 	/* Set up RPC arguments for callback */
-	call = &block->b_call;
-	call->a_host    = host;
+	block->b_call = call;
 	call->a_flags   = RPC_TASK_ASYNC;
+	call->a_block = block;
 
 	return block;
 
 failed_free:
 	kfree(block);
 failed:
-	nlm_release_host(host);
+	nlm_release_call(call);
 	return NULL;
 }
 
@@ -242,7 +244,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block)
 	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
-	status = posix_unblock_lock(block->b_file->f_file, &block->b_call.a_args.lock.fl);
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
 	nlmsvc_remove_block(block);
 	return status;
 }
@@ -263,9 +265,8 @@ static void nlmsvc_free_block(struct kref *kref)
 		}
 	}
 
-	if (block->b_host)
-		nlm_release_host(block->b_host);
-	nlmsvc_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(block->b_call);
+	nlm_release_call(block->b_call);
 	kfree(block);
 }
 
@@ -316,10 +317,8 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 	if (lock->oh.len > NLMCLNT_OHSIZE) {
 		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmsvc_freegrantargs(call);
+		if (!data)
 			return 0;
-		}
 		call->a_args.lock.oh.data = (u8 *) data;
 	}
 
@@ -329,17 +328,8 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 static void nlmsvc_freegrantargs(struct nlm_rqst *call)
 {
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
+	if (call->a_args.lock.oh.data != call->a_owner)
 		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_release_private)
-		fl->fl_lmops->fl_release_private(fl);
 }
 
 /*
@@ -371,9 +361,9 @@ again:
 	block = nlmsvc_lookup_block(file, lock, 0);
 	if (block == NULL) {
 		if (newblock != NULL)
-			lock = &newblock->b_call.a_args.lock;
+			lock = &newblock->b_call->a_args.lock;
 	} else
-		lock = &block->b_call.a_args.lock;
+		lock = &block->b_call->a_args.lock;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -523,7 +513,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
 
 	dprintk("lockd: VFS unblock notification for block %p\n", fl);
 	for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
-		if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
 			nlmsvc_insert_block(block, 0);
 			svc_wake_up(block->b_daemon);
 			return;
@@ -558,7 +548,7 @@ static void
 nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
-	struct nlm_lock		*lock = &block->b_call.a_args.lock;
+	struct nlm_lock		*lock = &block->b_call->a_args.lock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
@@ -606,7 +596,7 @@ callback:
 
 	/* Call the client */
 	kref_get(&block->b_count);
-	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlmsvc_release_block(block);
 out_unlock:
@@ -624,7 +614,7 @@ out_unlock:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block = container_of(call, struct nlm_block, b_call);
+	struct nlm_block	*block = call->a_block;
 	unsigned long		timeout;
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4986fbe44540..956d1d71e2af 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -505,43 +505,36 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 
-	if (!(call = nlmclnt_alloc_call()))
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
 		return rpc_system_err;
 
-	host = nlmsvc_lookup_host(rqstp);
-	if (!host) {
-		kfree(call);
+	call = nlm_alloc_call(host);
+	if (call == NULL)
 		return rpc_system_err;
-	}
 
 	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		goto error;
-
+	if (nlm_async_call(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
 	return rpc_success;
- error:
-	nlm_release_host(host);
-	kfree(call);
-	return rpc_system_err;
 }
 
 static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = data;
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
 
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
 }
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
 	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
 };
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index cb9933d04091..e7ba8110d579 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -86,8 +86,9 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
+	struct nlm_block *	a_block;
 	unsigned int		a_retries;	/* Retry count */
-	char			a_owner[NLMCLNT_OHSIZE];
+	u8			a_owner[NLMCLNT_OHSIZE];
 };
 
 /*
@@ -115,7 +116,7 @@ struct nlm_block {
 	struct kref		b_count;	/* Reference count */
 	struct nlm_block *	b_next;		/* linked list (all blocks) */
 	struct nlm_block *	b_fnext;	/* linked list (per file) */
-	struct nlm_rqst		b_call;		/* RPC args & callback info */
+	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
 	struct svc_serv *	b_daemon;	/* NLM service */
 	struct nlm_host *	b_host;		/* host handle for RPC clnt */
 	unsigned long		b_when;		/* next re-xmit */
@@ -147,7 +148,9 @@ extern unsigned long		nlmsvc_timeout;
 /*
  * Lockd client functions
  */
-struct nlm_rqst * nlmclnt_alloc_call(void);
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+void		  nlm_release_call(struct nlm_rqst *);
+int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
@@ -172,7 +175,6 @@ extern struct nlm_host *nlm_find_client(void);
 /*
  * Server-side lock handling
  */
-int		  nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
 u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
-- 
cgit v1.2.3


From d47166244860eb5dfdb12ee4703968beef8a0db2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: lockd: Add helper for *_RES callbacks

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         |  27 +++++---
 fs/lockd/svc4proc.c         | 150 ++++++++++++++++----------------------------
 fs/lockd/svcproc.c          | 143 ++++++++++++++++-------------------------
 include/linux/lockd/lockd.h |   1 +
 4 files changed, 129 insertions(+), 192 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3f8ad7c54efa..f96e38155b5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -354,14 +354,10 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
-	struct rpc_message msg = {
-		.rpc_argp	= &req->a_args,
-		.rpc_resp	= &req->a_res,
-	};
 	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
@@ -371,10 +367,10 @@ int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk
 	clnt = nlm_bind_host(host);
 	if (clnt == NULL)
 		goto out_err;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	msg->rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
+        status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
 	if (status == 0)
 		return 0;
 out_err:
@@ -382,6 +378,23 @@ out_err:
 	return status;
 }
 
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_args,
+		.rpc_resp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
+
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
+
 /*
  * TEST for the presence of a conflicting lock
  */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index cb51c7025825..a2dd9ccb9b32 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
 /*
  * Obtain client and file from arguments
  */
@@ -233,84 +229,90 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
 
-	if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
 
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
 }
 
 /*
@@ -471,48 +473,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	host = nlmsvc_lookup_host(rqstp);
-	if (host == NULL)
-		return rpc_system_err;
-
-	call = nlm_alloc_call(host);
-	if (call == NULL)
-		return rpc_system_err;
-
-
-	call->a_flags = RPC_TASK_ASYNC;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlm_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		return rpc_system_err;
-	return rpc_success;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
-			-task->tk_status);
-}
-
-static void nlm4svc_callback_release(void *data)
-{
-	nlm_release_call(data);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
-	.rpc_call_done = nlm4svc_callback_exit,
-	.rpc_release = nlm4svc_callback_release,
-};
-
 /*
  * NLM Server procedures.
  */
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 956d1d71e2af..d210cf304e92 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
 #ifdef CONFIG_LOCKD_V4
 static u32
 cast_to_nlm(u32 status, u32 vers)
@@ -261,84 +257,92 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
 
-	if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
 
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
 static int
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
 static int
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
 }
 
 /*
@@ -496,47 +500,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 	return rpc_success;
 }
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	host = nlmsvc_lookup_host(rqstp);
-	if (host == NULL)
-		return rpc_system_err;
-
-	call = nlm_alloc_call(host);
-	if (call == NULL)
-		return rpc_system_err;
-
-	call->a_flags = RPC_TASK_ASYNC;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlm_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		return rpc_system_err;
-	return rpc_success;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
-			-task->tk_status);
-}
-
-static void nlmsvc_callback_release(void *data)
-{
-	nlm_release_call(data);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
-	.rpc_call_done = nlmsvc_callback_exit,
-	.rpc_release = nlmsvc_callback_release,
-};
-
 /*
  * NLM Server procedures.
  */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index e7ba8110d579..a04137d0c5de 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -151,6 +151,7 @@ extern unsigned long		nlmsvc_timeout;
 struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
 void		  nlm_release_call(struct nlm_rqst *);
 int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
-- 
cgit v1.2.3


From 6041b79192bdf0e7ab18ea6859effa5d8311391b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: lockd: Fix a typo in nlmsvc_grant_release()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 185bf7ea1c0c..c16c94f5a856 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -638,7 +638,9 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 
 void nlmsvc_grant_release(void *data)
 {
-	nlmsvc_release_block(data);
+	struct nlm_rqst		*call = data;
+
+	nlmsvc_release_block(call->a_block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
-- 
cgit v1.2.3


From f25bc34967d76610d17bc70769d7c220976eeeb1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:46 -0500
Subject: NFSv4: Ensure nfs_callback_down() calls svc_destroy()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a778..2c042f8d70b5 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -73,6 +73,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 		svc_process(serv, rqstp);
 	}
 
+	svc_exit_thread(rqstp);
 	nfs_callback_info.pid = 0;
 	complete(&nfs_callback_info.stopped);
 	unlock_kernel();
-- 
cgit v1.2.3


From 3e4f6290ca4df7464ee066123f2bca4298c2dab4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:46 -0500
Subject: NFSv4: Send the delegation stateid for SETATTR calls

In the case where we hold a delegation stateid, use that in for inside
SETATTR calls.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 19 +++++++++++++++++++
 fs/nfs/delegation.h |  1 +
 fs/nfs/nfs4proc.c   | 27 +++++++++++++--------------
 3 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e6..d3be923d4e43 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 		nfs_free_delegation(delegation);
 	}
 }
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int res = 0;
+
+	if (nfsi->delegation_state == 0)
+		return 0;
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL) {
+		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+		res = 1;
+	}
+	spin_unlock(&clp->cl_lock);
+	return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce771..3858694652fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bad1eae5608a..62aed077fc2a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1018,12 +1018,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 	return res;
 }
 
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_setattrargs  arg = {
-                .fh             = fhandle,
+                .fh             = NFS_FH(inode),
                 .iap            = sattr,
 		.server		= server,
 		.bitmask = server->attr_bitmask,
@@ -1042,7 +1042,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 
-	if (state != NULL) {
+	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+		/* Use that stateid */
+	} else if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
 		nfs4_copy_stateid(&arg.stateid, state, current->files);
 	} else
@@ -1054,16 +1056,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 	return status;
 }
 
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(server,
-				_nfs4_do_setattr(server, fattr, fhandle, sattr,
-					state),
+				_nfs4_do_setattr(inode, fattr, sattr, state),
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -1504,8 +1505,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	if (ctx != NULL)
 		state = ctx->state;
 
-	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
-			NFS_FH(inode), sattr, state);
+	status = nfs4_do_setattr(inode, fattr, sattr, state);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	if (ctx != NULL)
@@ -1824,8 +1824,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	d_instantiate(dentry, igrab(state->inode));
 	if (flags & O_EXCL) {
 		struct nfs_fattr fattr;
-		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
-		                     NFS_FH(state->inode), sattr, state);
+		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
 	}
-- 
cgit v1.2.3


From 51581f3bf922512880f52a7777923fd6dcfc792b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:47 -0500
Subject: NFSv4: SETCLIENTID_CONFIRM should handle
 NFS4ERR_DELAY/NFS4ERR_RESOURCE

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 21 +++++++++++++++++++--
 fs/nfs/nfs4state.c |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62aed077fc2a..31000326aba4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2849,8 +2849,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2874,6 +2873,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	long timeout;
+	int err;
+	do {
+		err = _nfs4_proc_setclientid_confirm(clp, cred);
+		switch (err) {
+			case 0:
+				return err;
+			case -NFS4ERR_RESOURCE:
+				/* The IBM lawyers misread another document! */
+			case -NFS4ERR_DELAY:
+				err = nfs4_delay(clp->cl_rpcclient, &timeout);
+		}
+	} while (err == 0);
+	return err;
+}
+
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7db..96e5b82c153b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
 				NIPQUAD(clp->cl_addr.s_addr), -status);
+	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
 
-- 
cgit v1.2.3


From d9f6eb75d4900782a095b98470decfe98971f920 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:47 -0500
Subject: lockd: blocks should hold a reference to the nlm_file

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 81 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c16c94f5a856..ce754efe2841 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -103,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
 }
 
 /*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
  */
 static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	**head, *block;
 	struct file_lock	*fl;
@@ -124,10 +123,6 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				(long long)fl->fl_end, fl->fl_type,
 				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
-			if (remove) {
-				*head = block->b_next;
-				block->b_queued = 0;
-			}
 			kref_get(&block->b_count);
 			return block;
 		}
@@ -213,6 +208,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
 	block->b_file   = file;
+	file->f_count++;
 
 	/* Add to file's list of blocks */
 	block->b_fnext  = file->f_blocks;
@@ -257,6 +253,7 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	dprintk("lockd: freeing block %p...\n", block);
 
+	down(&file->f_sema);
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
 		if (*bp == block) {
@@ -264,9 +261,11 @@ static void nlmsvc_free_block(struct kref *kref)
 			break;
 		}
 	}
+	up(&file->f_sema);
 
 	nlmsvc_freegrantargs(block->b_call);
 	nlm_release_call(block->b_call);
+	nlm_release_file(block->b_file);
 	kfree(block);
 }
 
@@ -276,6 +275,36 @@ static void nlmsvc_release_block(struct nlm_block *block)
 		kref_put(&block->b_count, nlmsvc_free_block);
 }
 
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+		block->b_host->h_inuse = 1;
+	up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+restart:
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+		if (host != NULL && host != block->b_host)
+			continue;
+		if (!block->b_queued)
+			continue;
+		kref_get(&block->b_count);
+		up(&file->f_sema);
+		nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+		goto restart;
+	}
+	up(&file->f_sema);
+}
+
 /*
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
@@ -283,20 +312,10 @@ static void nlmsvc_release_block(struct nlm_block *block)
 int
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
-	struct nlm_block	*block, *next;
-	/* XXX: Will everything get cleaned up if we don't unlock here? */
-
-	down(&file->f_sema);
-	for (block = file->f_blocks; block; block = next) {
-		next = block->b_fnext;
-		if (action == NLM_ACT_MARK)
-			block->b_host->h_inuse = 1;
-		else if (action == NLM_ACT_UNLOCK) {
-			if (host == NULL || host == block->b_host)
-				nlmsvc_unlink_block(block);
-		}
-	}
-	up(&file->f_sema);
+	if (action == NLM_ACT_MARK)
+		nlmsvc_act_mark(host, file);
+	else
+		nlmsvc_act_unlock(host, file);
 	return 0;
 }
 
@@ -358,7 +377,7 @@ again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
+	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
 		if (newblock != NULL)
 			lock = &newblock->b_call->a_args.lock;
@@ -491,11 +510,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) {
+	block = nlmsvc_lookup_block(file, lock);
+	up(&file->f_sema);
+	if (block != NULL) {
 		status = nlmsvc_unlink_block(block);
 		nlmsvc_release_block(block);
 	}
-	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
 
@@ -553,9 +573,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 
 	dprintk("lockd: grant blocked lock %p\n", block);
 
-	/* First thing is lock the file */
-	down(&file->f_sema);
-
 	/* Unlink block request from list */
 	nlmsvc_unlink_block(block);
 
@@ -578,12 +595,12 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		goto out_unlock;
+		return;
 	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		goto out_unlock;
+		return;
 	}
 
 callback:
@@ -599,8 +616,6 @@ callback:
 	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlmsvc_release_block(block);
-out_unlock:
-	up(&file->f_sema);
 }
 
 /*
@@ -665,8 +680,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 		return;
 	file = block->b_file;
 
-	file->f_count++;
-	down(&file->f_sema);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
@@ -677,8 +690,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 			nlmsvc_unlink_block(block);
 		}
 	}
-	up(&file->f_sema);
-	nlm_release_file(file);
 	nlmsvc_release_block(block);
 }
 
-- 
cgit v1.2.3


From 01d0ae8beaee75d954900109619b700fe68707d9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFSv4: Fix an oops in nfs4_fill_super

The mount statistics patches introduced a call to nfs_free_iostats that is
not only redundant, but actually causes an oops.

Also fix a memory leak due to the lack of a call to nfs_free_iostats on
unmount.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c  | 13 ++++++-------
 fs/nfs/iostat.h |  3 ++-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b9f35ca266c2..17654bffc3c6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -281,6 +281,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 
 	sb->s_magic      = NFS_SUPER_MAGIC;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (server->io_stats == NULL)
+		return -ENOMEM;
+
 	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
 	/* Did getting the root inode fail? */
 	if (IS_ERR(root_inode)) {
@@ -294,12 +298,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
-	server->io_stats = nfs_alloc_iostats();
-	if (!server->io_stats) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
-	}
-
 	/* mount time stamp, in seconds */
 	server->mount_time = jiffies;
 
@@ -1822,6 +1820,7 @@ static void nfs_kill_super(struct super_block *s)
 
 	rpciod_down();		/* release rpciod */
 
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
@@ -2122,7 +2121,6 @@ out_err:
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
-	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	return s;
 }
@@ -2143,6 +2141,7 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	rpciod_down();
 
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 7a7495153317..6350ecbde589 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -156,7 +156,8 @@ static inline struct nfs_iostats *nfs_alloc_iostats(void)
 
 static inline void nfs_free_iostats(struct nfs_iostats *stats)
 {
-	free_percpu(stats);
+	if (stats != NULL)
+		free_percpu(stats);
 }
 
 #endif
-- 
cgit v1.2.3


From 03f28e3a2059fc466761d872122f30acb7be61ae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFS: Make nfs_fhget() return appropriate error values

Currently it returns NULL, which usually gets interpreted as ENOMEM. In
fact it can mean a host of issues.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 10 +++++-----
 fs/nfs/inode.c    | 15 +++++++--------
 fs/nfs/nfs4proc.c |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 609185a15c99..06c48b385c94 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -901,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
-	if (!inode)
+	res = (struct dentry *)inode;
+	if (IS_ERR(res))
 		goto out_unlock;
 no_entry:
 	res = d_add_unique(dentry, inode);
@@ -1096,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		return NULL;
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-	if (!inode) {
+	if (IS_ERR(inode)) {
 		dput(dentry);
 		return NULL;
 	}
@@ -1134,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		if (error < 0)
 			goto out_err;
 	}
-	error = -ENOMEM;
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
-	if (inode == NULL)
+	error = PTR_ERR(inode);
+	if (IS_ERR(inode))
 		goto out_err;
 	d_instantiate(dentry, inode);
 	return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 17654bffc3c6..a0cda53461b3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -241,7 +241,6 @@ static struct inode *
 nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
 {
 	struct nfs_server	*server = NFS_SB(sb);
-	struct inode *rooti;
 	int			error;
 
 	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -250,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
 		return ERR_PTR(error);
 	}
 
-	rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
-	if (!rooti)
-		return ERR_PTR(-ENOMEM);
-	return rooti;
+	return nfs_fhget(sb, rootfh, fsinfo->fattr);
 }
 
 /*
@@ -853,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		.fh	= fh,
 		.fattr	= fattr
 	};
-	struct inode *inode = NULL;
+	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -866,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 	hash = nfs_fattr_to_ino_t(fattr);
 
-	if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+	inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+	if (inode == NULL) {
+		inode = ERR_PTR(-ENOMEM);
 		goto out_no_inode;
+	}
 
 	if (inode->i_state & I_NEW) {
 		struct nfs_inode *nfsi = NFS_I(inode);
@@ -936,7 +935,7 @@ out:
 	return inode;
 
 out_no_inode:
-	printk("nfs_fhget: iget failed\n");
+	dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
 	goto out;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 31000326aba4..02c7d8c04c58 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -336,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
 	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 		goto out;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
-	if (inode == NULL)
+	if (IS_ERR(inode))
 		goto out;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
-- 
cgit v1.2.3


From a9a801787a761616589a6526d7a29c13f4deb3d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFS, NLM: Allow blocking locks to respect signals

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6bcbc4d676c4..5263b2864a44 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -443,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -464,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	else
 		status = do_vfs_lock(filp, fl);
 	unlock_kernel();
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -507,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	nfs_sync_mapping(filp->f_mapping);
 	nfs_zap_caches(inode);
 out:
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
-- 
cgit v1.2.3


From 1dd761e9070aa2e543df3db41bd75ed4b8f2fab9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:49 -0500
Subject: NFSv4: Ensure the callback daemon flushes signals

If the callback daemon is signalled, but is unable to exit because it still
has users, then we need to flush signals. If not, then svc_recv() can
never sleep, and so we hang.
If we flush signals, then we also have to be prepared to resend them when
we want the thread to exit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 2c042f8d70b5..99d2cfbce863 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 
 	complete(&nfs_callback_info.started);
 
-	while (nfs_callback_info.users != 0 || !signalled()) {
+	for(;;) {
+		if (signalled()) {
+			if (nfs_callback_info.users == 0)
+				break;
+			flush_signals(current);
+		}
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -135,11 +140,13 @@ int nfs_callback_down(void)
 
 	lock_kernel();
 	down(&nfs_callback_sema);
-	if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
-		goto out;
-	kill_proc(nfs_callback_info.pid, SIGKILL, 1);
-	wait_for_completion(&nfs_callback_info.stopped);
-out:
+	nfs_callback_info.users--;
+	do {
+		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+			break;
+		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+			break;
+	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	up(&nfs_callback_sema);
 	unlock_kernel();
 	return ret;
-- 
cgit v1.2.3


From deb7d638262019cbac5d15ab74ffd1c29242c7cb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:50 -0500
Subject: NFS: Fix a race with PG_private and nfs_release_page()

We don't need to set PG_private for readahead pages, since they never get
unlocked while I/O is in progress. However there is a small race in
nfs_readpage_release() whereby the page may be unlocked, and have
PG_private set.

Fix is to have PG_private set only for the case of writes...

Also fix a bug in nfs_clear_page_writeback(): Don't attempt to clear the
radix_tree tag if we've already deleted the radix tree entry.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 10 +++++-----
 fs/nfs/write.c    |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d6e076c9dbe1..106aca388ebc 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -88,7 +88,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	BUG_ON(PagePrivate(page));
 	BUG_ON(!PageLocked(page));
 	BUG_ON(page->mapping->host != inode);
-	SetPagePrivate(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -136,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
 
-	spin_lock(&nfsi->req_lock);
-	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-	spin_unlock(&nfsi->req_lock);
+	if (req->wb_page != NULL) {
+		spin_lock(&nfsi->req_lock);
+		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+		spin_unlock(&nfsi->req_lock);
+	}
 	nfs_unlock_request(req);
 }
 
@@ -153,7 +154,6 @@ void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
 	if (page != NULL) {
-		ClearPagePrivate(page);
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 647e3217c2e1..d9e5ee594572 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -429,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	SetPagePrivate(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -445,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
+	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
-- 
cgit v1.2.3


From 7d46a49f512e8d10b23353781a8ba85edd4fa640 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:50 -0500
Subject: NFS: Clean up nfs_flush_list()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 62 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d9e5ee594572..2beb1268bb1b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -963,7 +963,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page *req = nfs_list_entry(head->next);
 	struct page *page = req->wb_page;
@@ -1032,16 +1032,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_write_data	*data;
 	unsigned int		count;
 
-	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(head, inode, how);
-
 	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!data)
 		goto out_bad;
@@ -1074,24 +1071,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	return -ENOMEM;
 }
 
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
 {
 	LIST_HEAD(one_request);
-	struct nfs_page		*req;
-	int			error = 0;
-	unsigned int		pages = 0;
+	int (*flush_one)(struct inode *, struct list_head *, int);
+	struct nfs_page	*req;
+	int wpages = NFS_SERVER(inode)->wpages;
+	int wsize = NFS_SERVER(inode)->wsize;
+	int error;
 
-	while (!list_empty(head)) {
-		pages += nfs_coalesce_requests(head, &one_request, wpages);
+	flush_one = nfs_flush_one;
+	if (wsize < PAGE_CACHE_SIZE)
+		flush_one = nfs_flush_multi;
+	/* For single writes, FLUSH_STABLE is more efficient */
+	if (npages <= wpages && npages == NFS_I(inode)->npages
+			&& nfs_list_entry(head->next)->wb_bytes <= wsize)
+		how |= FLUSH_STABLE;
+
+	do {
+		nfs_coalesce_requests(head, &one_request, wpages);
 		req = nfs_list_entry(one_request.next);
-		error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+		error = flush_one(inode, &one_request, how);
 		if (error < 0)
-			break;
-	}
-	if (error >= 0)
-		return pages;
-
+			goto out_err;
+	} while (!list_empty(head));
+	return 0;
+out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
@@ -1423,24 +1428,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		struct nfs_server *server = NFS_SERVER(inode);
-
-		/* For single writes, FLUSH_STABLE is more efficient */
-		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
-			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
-				how |= FLUSH_STABLE;
-		}
-		error = nfs_flush_list(&head, server->wpages, how);
+		int error = nfs_flush_list(inode, &head, res, how);
+		if (error < 0)
+			return error;
 	}
-	if (error < 0)
-		return error;
 	return res;
 }
 
@@ -1449,14 +1446,13 @@ int nfs_commit_inode(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		error = nfs_commit_list(inode, &head, how);
+		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 	}
-- 
cgit v1.2.3


From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:51 -0500
Subject: NFS: Fix a race in nfs_sync_inode()

Kudos to Neil Brown for spotting the problem:

"in nfs_sync_inode, there is effectively the sequence:

   nfs_wait_on_requests
   nfs_flush_inode
   nfs_commit_inode

 This seems a bit racy to me as if the only requests are on the
 ->commit list, and nfs_commit_inode is called separately after
 nfs_wait_on_requests completes, and before nfs_commit_inode start
 (say: by nfs_write_inode) then none of these function will return
 >0, yet there will be some pending request that aren't waited for."

The solution is to search for requests to wait upon, search for dirty
requests, and search for uncommitted requests while holding the
nfsi->req_lock

The patch also cleans up nfs_sync_inode(), getting rid of the redundant
FLUSH_WAIT flag. It turns out that we were always setting it.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  4 +--
 fs/nfs/write.c         | 72 +++++++++++++++++++++++++++++++++++---------------
 include/linux/nfs_fs.h | 10 +++----
 3 files changed, 56 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a0cda53461b3..60aac58270a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 static int
 nfs_write_inode(struct inode *inode, int sync)
 {
-	int flags = sync ? FLUSH_WAIT : 0;
+	int flags = sync ? FLUSH_SYNC : 0;
 	int ret;
 
 	ret = nfs_commit_inode(inode, flags);
@@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int err;
 
 	/* Flush out writes to the server in order to update c/mtime */
-	nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+	nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2beb1268bb1b..3f5225404c97 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  *
  * Interruptible by signals only if mounted with intr flag.
  */
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
@@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 	else
 		idx_end = idx_start + npages - 1;
 
-	spin_lock(&nfsi->req_lock);
 	next = idx_start;
 	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
@@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 		spin_unlock(&nfsi->req_lock);
 		error = nfs_wait_on_request(req);
 		nfs_release_request(req);
+		spin_lock(&nfsi->req_lock);
 		if (error < 0)
 			return error;
-		spin_lock(&nfsi->req_lock);
 		res++;
 	}
-	spin_unlock(&nfsi->req_lock);
 	return res;
 }
 
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
+
+	spin_lock(&nfsi->req_lock);
+	ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
+}
+
 /*
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
@@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
 	}
 	return res;
 }
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	return 0;
+}
 #endif
 
 static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_call_done = nfs_commit_done,
 	.rpc_release = nfs_commit_release,
 };
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+	return 0;
+}
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
-		  unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+		unsigned int npages, int how)
 {
+	struct nfs_inode *nfsi = NFS_I(inode);
+	LIST_HEAD(head);
 	int nocommit = how & FLUSH_NOCOMMIT;
-	int wait = how & FLUSH_WAIT;
-	int error;
-
-	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+	int pages, ret;
 
+	how &= ~FLUSH_NOCOMMIT;
+	spin_lock(&nfsi->req_lock);
 	do {
-		if (wait) {
-			error = nfs_wait_on_requests(inode, idx_start, npages);
-			if (error != 0)
-				continue;
-		}
-		error = nfs_flush_inode(inode, idx_start, npages, how);
-		if (error != 0)
+		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+		if (ret != 0)
 			continue;
-		if (!nocommit)
-			error = nfs_commit_inode(inode, how);
-	} while (error > 0);
-	return error;
+		pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+		if (pages != 0) {
+			spin_unlock(&nfsi->req_lock);
+			ret = nfs_flush_list(inode, &head, pages, how);
+			spin_lock(&nfsi->req_lock);
+			continue;
+		}
+		if (nocommit)
+			break;
+		pages = nfs_scan_commit(inode, &head, 0, 0);
+		if (pages == 0)
+			break;
+		spin_unlock(&nfsi->req_lock);
+		ret = nfs_commit_list(inode, &head, how);
+		spin_lock(&nfsi->req_lock);
+	} while (ret >= 0);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
 }
 
 int nfs_init_writepagecache(void)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 55de0770df4a..cbebd7d1b9e8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -56,9 +56,7 @@
  * When flushing a cluster of dirty pages, there can be different
  * strategies:
  */
-#define FLUSH_AGING		0	/* only flush old buffers */
 #define FLUSH_SYNC		1	/* file being synced, or contention */
-#define FLUSH_WAIT		2	/* wait for completion */
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
@@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p);
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
+extern int  nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
 extern void nfs_commit_release(void *wdata);
@@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
 static inline int
 nfs_wb_all(struct inode *inode)
 {
-	int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
+	int error = nfs_sync_inode_wait(inode, 0, 0, 0);
 	return (error < 0) ? error : 0;
 }
 
@@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode)
  */
 static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
 {
-	int error = nfs_sync_inode(inode, page->index, 1,
-			how | FLUSH_WAIT | FLUSH_STABLE);
+	int error = nfs_sync_inode_wait(inode, page->index, 1,
+			how | FLUSH_STABLE);
 	return (error < 0) ? error : 0;
 }
 
-- 
cgit v1.2.3


From f38aa94224c5517a40ba56d453779f70d3229803 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 3 Nov 2005 15:57:06 +0000
Subject: [PATCH] Pass dentry, not just name, in fsnotify creation hooks.

The audit hooks (to be added shortly) will want to see dentry->d_inode
too, not just the name.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/namei.c               | 10 +++++-----
 include/linux/fsnotify.h |  9 +++++----
 kernel/auditsc.c         |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 8dc2b038d5d9..f6619af9e957 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1472,7 +1472,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1793,7 +1793,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1870,7 +1870,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
-		fsnotify_mkdir(dir, dentry->d_name.name);
+		fsnotify_mkdir(dir, dentry);
 	return error;
 }
 
@@ -2133,7 +2133,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 	DQUOT_INIT(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -2210,7 +2210,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&old_dentry->d_inode->i_mutex);
 	if (!error)
-		fsnotify_create(dir, new_dentry->d_name.name);
+		fsnotify_create(dir, new_dentry);
 	return error;
 }
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 03b8e7932b83..b5ff64d2f092 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -70,19 +70,20 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 /*
  * fsnotify_create - 'name' was linked in
  */
-static inline void fsnotify_create(struct inode *inode, const char *name)
+static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE, 0, name);
+	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name);
 }
 
 /*
  * fsnotify_mkdir - directory 'name' was created
  */
-static inline void fsnotify_mkdir(struct inode *inode, const char *name)
+static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
 	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, name);
+	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
+				  dentry->d_name.name);
 }
 
 /*
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 95076fa12202..55ba331757c5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -515,7 +515,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_INODE:
 			if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
-					if (audit_comparator(ctx->names[j].ino, op, value)) {
+					if ( audit_comparator(ctx->names[j].ino, op, value)) {
 						++result;
 						break;
 					}
-- 
cgit v1.2.3


From 73241ccca0f7786933f1d31b3d86f2456549953a Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 3 Nov 2005 16:00:25 +0000
Subject: [PATCH] Collect more inode information during syscall processing.

This patch augments the collection of inode info during syscall
processing. It represents part of the functionality that was provided
by the auditfs patch included in RHEL4.

Specifically, it:

- Collects information for target inodes created or removed during
  syscalls.  Previous code only collects information for the target
  inode's parent.

- Adds the audit_inode() hook to syscalls that operate on a file
  descriptor (e.g. fchown), enabling audit to do inode filtering for
  these calls.

- Modifies filtering code to check audit context for either an inode #
  or a parent inode # matching a given rule.

- Modifies logging to provide inode # for both parent and child.

- Protect debug info from NULL audit_names.name.

[AV: folded a later typo fix from the same author]

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |   1 +
 fs/open.c                |   8 ++-
 fs/xattr.c               |  11 +++-
 include/linux/audit.h    |  18 +++++-
 include/linux/fsnotify.h |   5 ++
 kernel/auditsc.c         | 142 +++++++++++++++++++++++++++++++++++++++--------
 6 files changed, 157 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index f6619af9e957..51cfc9c3ed00 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1353,6 +1353,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
+	audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino);
 
 	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
 	if (error)
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e77..70510004d06e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/audit.h>
 
 #include <asm/unistd.h>
 
@@ -626,6 +627,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
+	audit_inode(NULL, inode, 0);
+
 	err = -EROFS;
 	if (IS_RDONLY(inode))
 		goto out_putf;
@@ -775,7 +778,10 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 
 	file = fget(fd);
 	if (file) {
-		error = chown_common(file->f_dentry, user, group);
+		struct dentry * dentry;
+		dentry = file->f_dentry;
+		audit_inode(NULL, dentry->d_inode, 0);
+		error = chown_common(dentry, user, group);
 		fput(file);
 	}
 	return error;
diff --git a/fs/xattr.c b/fs/xattr.c
index 80eca7d3d69f..e416190f5e9c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/module.h>
 #include <linux/fsnotify.h>
+#include <linux/audit.h>
 #include <asm/uaccess.h>
 
 
@@ -234,12 +235,15 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 	      size_t size, int flags)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = setxattr(f->f_dentry, name, value, size, flags);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = setxattr(dentry, name, value, size, flags);
 	fput(f);
 	return error;
 }
@@ -458,12 +462,15 @@ asmlinkage long
 sys_fremovexattr(int fd, char __user *name)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = removexattr(f->f_dentry, name);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = removexattr(dentry, name);
 	fput(f);
 	return error;
 }
diff --git a/include/linux/audit.h b/include/linux/audit.h
index fd65078e794a..739b954cb242 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -260,7 +260,20 @@ extern void audit_syscall_entry(struct task_struct *task, int arch,
 extern void audit_syscall_exit(struct task_struct *task, int failed, long return_code);
 extern void audit_getname(const char *name);
 extern void audit_putname(const char *name);
-extern void audit_inode(const char *name, const struct inode *inode, unsigned flags);
+extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags);
+extern void __audit_inode_child(const char *dname, const struct inode *inode,
+				unsigned long pino);
+static inline void audit_inode(const char *name, const struct inode *inode,
+			       unsigned flags) {
+	if (unlikely(current->audit_context))
+		__audit_inode(name, inode, flags);
+}
+static inline void audit_inode_child(const char *dname, 
+				     const struct inode *inode, 
+				     unsigned long pino) {
+	if (unlikely(current->audit_context))
+		__audit_inode_child(dname, inode, pino);
+}
 
 				/* Private API (for audit.c only) */
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
@@ -283,7 +296,10 @@ extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
 #define audit_syscall_exit(t,f,r) do { ; } while (0)
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
+#define __audit_inode(n,i,f) do { ; } while (0)
+#define __audit_inode_child(d,i,p) do { ; } while (0)
 #define audit_inode(n,i,f) do { ; } while (0)
+#define audit_inode_child(d,i,p) do { ; } while (0)
 #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; })
 #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b5ff64d2f092..94919c376a72 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -15,6 +15,7 @@
 
 #include <linux/dnotify.h>
 #include <linux/inotify.h>
+#include <linux/audit.h>
 
 /*
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
@@ -45,6 +46,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL);
 	}
+	audit_inode_child(old_name, source, old_dir->i_ino);
+	audit_inode_child(new_name, target, new_dir->i_ino);
 }
 
 /*
@@ -74,6 +77,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name);
+	audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
 }
 
 /*
@@ -84,6 +88,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
 				  dentry->d_name.name);
+	audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
 }
 
 /*
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 55ba331757c5..73f932b7deb6 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2,6 +2,7 @@
  * Handles all system-call specific auditing features.
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2005 IBM Corporation
  * All Rights Reserved.
  *
@@ -31,11 +32,16 @@
  * The support of additional filter rules compares (>, <, >=, <=) was
  * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005.
  *
+ * Modified by Amy Griffis <amy.griffis@hp.com> to collect additional
+ * filesystem information.
  */
 
 #include <linux/init.h>
 #include <asm/types.h>
 #include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mount.h>
@@ -97,12 +103,12 @@ enum audit_state {
 struct audit_names {
 	const char	*name;
 	unsigned long	ino;
+	unsigned long	pino;
 	dev_t		dev;
 	umode_t		mode;
 	uid_t		uid;
 	gid_t		gid;
 	dev_t		rdev;
-	unsigned	flags;
 };
 
 struct audit_aux_data {
@@ -515,7 +521,8 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_INODE:
 			if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
-					if ( audit_comparator(ctx->names[j].ino, op, value)) {
+					if (audit_comparator(ctx->names[j].ino, op, value) ||
+					    audit_comparator(ctx->names[j].pino, op, value)) {
 						++result;
 						break;
 					}
@@ -696,17 +703,17 @@ static inline void audit_free_names(struct audit_context *context)
 #if AUDIT_DEBUG == 2
 	if (context->auditable
 	    ||context->put_count + context->ino_count != context->name_count) {
-		printk(KERN_ERR "audit.c:%d(:%d): major=%d in_syscall=%d"
+		printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d"
 		       " name_count=%d put_count=%d"
 		       " ino_count=%d [NOT freeing]\n",
-		       __LINE__,
+		       __FILE__, __LINE__,
 		       context->serial, context->major, context->in_syscall,
 		       context->name_count, context->put_count,
 		       context->ino_count);
 		for (i = 0; i < context->name_count; i++)
 			printk(KERN_ERR "names[%d] = %p = %s\n", i,
 			       context->names[i].name,
-			       context->names[i].name);
+			       context->names[i].name ?: "(null)");
 		dump_stack();
 		return;
 	}
@@ -932,27 +939,34 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
+		unsigned long ino  = context->names[i].ino;
+		unsigned long pino = context->names[i].pino;
+
 		ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
 		if (!ab)
 			continue; /* audit_panic has been called */
 
 		audit_log_format(ab, "item=%d", i);
-		if (context->names[i].name) {
-			audit_log_format(ab, " name=");
+
+		audit_log_format(ab, " name=");
+		if (context->names[i].name)
 			audit_log_untrustedstring(ab, context->names[i].name);
-		}
-		audit_log_format(ab, " flags=%x\n", context->names[i].flags);
-			 
-		if (context->names[i].ino != (unsigned long)-1)
-			audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o"
-					     " ouid=%u ogid=%u rdev=%02x:%02x",
-					 context->names[i].ino,
-					 MAJOR(context->names[i].dev),
-					 MINOR(context->names[i].dev),
-					 context->names[i].mode,
-					 context->names[i].uid,
-					 context->names[i].gid,
-					 MAJOR(context->names[i].rdev),
+		else
+			audit_log_format(ab, "(null)");
+
+		if (pino != (unsigned long)-1)
+			audit_log_format(ab, " parent=%lu",  pino);
+		if (ino != (unsigned long)-1)
+			audit_log_format(ab, " inode=%lu",  ino);
+		if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1))
+			audit_log_format(ab, " dev=%02x:%02x mode=%#o" 
+					 " ouid=%u ogid=%u rdev=%02x:%02x", 
+					 MAJOR(context->names[i].dev), 
+					 MINOR(context->names[i].dev), 
+					 context->names[i].mode, 
+					 context->names[i].uid, 
+					 context->names[i].gid, 
+					 MAJOR(context->names[i].rdev), 
 					 MINOR(context->names[i].rdev));
 		audit_log_end(ab);
 	}
@@ -1174,7 +1188,7 @@ void audit_putname(const char *name)
 			for (i = 0; i < context->name_count; i++)
 				printk(KERN_ERR "name[%d] = %p = %s\n", i,
 				       context->names[i].name,
-				       context->names[i].name);
+				       context->names[i].name ?: "(null)");
 		}
 #endif
 		__putname(name);
@@ -1204,7 +1218,7 @@ void audit_putname(const char *name)
  *
  * Called from fs/namei.c:path_lookup().
  */
-void audit_inode(const char *name, const struct inode *inode, unsigned flags)
+void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
@@ -1230,13 +1244,93 @@ void audit_inode(const char *name, const struct inode *inode, unsigned flags)
 		++context->ino_count;
 #endif
 	}
-	context->names[idx].flags = flags;
-	context->names[idx].ino   = inode->i_ino;
 	context->names[idx].dev	  = inode->i_sb->s_dev;
 	context->names[idx].mode  = inode->i_mode;
 	context->names[idx].uid   = inode->i_uid;
 	context->names[idx].gid   = inode->i_gid;
 	context->names[idx].rdev  = inode->i_rdev;
+	if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && 
+	    (strcmp(name, ".") != 0)) {
+		context->names[idx].ino   = (unsigned long)-1;
+		context->names[idx].pino  = inode->i_ino;
+	} else {
+		context->names[idx].ino   = inode->i_ino;
+		context->names[idx].pino  = (unsigned long)-1;
+	}
+}
+
+/**
+ * audit_inode_child - collect inode info for created/removed objects
+ * @dname: inode's dentry name
+ * @inode: inode being audited
+ * @pino: inode number of dentry parent
+ *
+ * For syscalls that create or remove filesystem objects, audit_inode
+ * can only collect information for the filesystem object's parent.
+ * This call updates the audit context with the child's information.
+ * Syscalls that create a new filesystem object must be hooked after
+ * the object is created.  Syscalls that remove a filesystem object
+ * must be hooked prior, in order to capture the target inode during
+ * unsuccessful attempts.
+ */
+void __audit_inode_child(const char *dname, const struct inode *inode,
+			 unsigned long pino)
+{
+	int idx;
+	struct audit_context *context = current->audit_context;
+
+	if (!context->in_syscall)
+		return;
+
+	/* determine matching parent */
+	if (dname)
+		for (idx = 0; idx < context->name_count; idx++)
+			if (context->names[idx].pino == pino) {
+				const char *n;
+				const char *name = context->names[idx].name;
+				int dlen = strlen(dname);
+				int nlen = name ? strlen(name) : 0;
+
+				if (nlen < dlen)
+					continue;
+				
+				/* disregard trailing slashes */
+				n = name + nlen - 1;
+				while ((*n == '/') && (n > name))
+					n--;
+
+				/* find last path component */
+				n = n - dlen + 1;
+				if (n < name)
+					continue;
+				else if (n > name) {
+					if (*--n != '/')
+						continue;
+					else
+						n++;
+				}
+
+				if (strncmp(n, dname, dlen) == 0)
+					goto update_context;
+			}
+
+	/* catch-all in case match not found */
+	idx = context->name_count++;
+	context->names[idx].name  = NULL;
+	context->names[idx].pino  = pino;
+#if AUDIT_DEBUG
+	context->ino_count++;
+#endif
+
+update_context:
+	if (inode) {
+		context->names[idx].ino   = inode->i_ino;
+		context->names[idx].dev	  = inode->i_sb->s_dev;
+		context->names[idx].mode  = inode->i_mode;
+		context->names[idx].uid   = inode->i_uid;
+		context->names[idx].gid   = inode->i_gid;
+		context->names[idx].rdev  = inode->i_rdev;
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 641e6f30a095f3752ed84fd9d279382f5d3ef4c1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 15:44:26 -0800
Subject: [PATCH] sysfs: sysfs_remove_dir() needs to invalidate the dentry

When calling sysfs_remove_dir() don't allow any further sysfs functions
to work for this kobject anymore.  This fixes a nasty USB cdc-acm oops
on disconnect.

Many thanks to Bob Copeland and Paul Fulghum for taking the time to
track this down.

Cc: Bob Copeland <email@bobcopeland.com>
Cc: Paul Fulghum <paulkf@microgate.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   | 1 +
 fs/sysfs/inode.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 49bd219275db..cfd290d3d6b1 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -302,6 +302,7 @@ void sysfs_remove_dir(struct kobject * kobj)
 	 * Drop reference from dget() on entrance.
 	 */
 	dput(dentry);
+	kobj->dentry = NULL;
 }
 
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 689f7bcfaf30..6beee6f6a674 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -227,12 +227,16 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
 void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 {
 	struct sysfs_dirent * sd;
-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
+	struct sysfs_dirent * parent_sd;
+
+	if (!dir)
+		return;
 
 	if (dir->d_inode == NULL)
 		/* no inode means this hasn't been made visible yet */
 		return;
 
+	parent_sd = dir->d_fsdata;
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
-- 
cgit v1.2.3


From 58383af629efb07e5a0694e445eda0c65b16e1de Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Mon, 6 Feb 2006 14:12:43 -0800
Subject: [PATCH] kobj_map semaphore to mutex conversion

Convert the kobj_map code to use a mutex instead of a semaphore.  It
converts the single two users as well, genhd.c and char_dev.c.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 block/genhd.c            | 31 ++++++++++++++++---------------
 drivers/base/map.c       | 21 +++++++++++----------
 fs/char_dev.c            | 17 +++++++++--------
 include/linux/kobj_map.h |  4 ++--
 4 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/block/genhd.c b/block/genhd.c
index db57546a709d..64510fd88621 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -15,12 +15,13 @@
 #include <linux/kmod.h>
 #include <linux/kobj_map.h>
 #include <linux/buffer_head.h>
+#include <linux/mutex.h>
 
 #define MAX_PROBE_HASH 255	/* random */
 
 static struct subsystem block_subsys;
 
-static DECLARE_MUTEX(block_subsys_sem);
+static DEFINE_MUTEX(block_subsys_lock);
 
 /*
  * Can be deleted altogether. Later.
@@ -46,7 +47,7 @@ struct blkdev_info {
 /*
  * iterate over a list of blkdev_info structures.  allows
  * the major_names array to be iterated over from outside this file
- * must be called with the block_subsys_sem held
+ * must be called with the block_subsys_lock held
  */
 void *get_next_blkdev(void *dev)
 {
@@ -85,20 +86,20 @@ out:
 
 void *acquire_blkdev_list(void)
 {
-        down(&block_subsys_sem);
+        mutex_lock(&block_subsys_lock);
         return get_next_blkdev(NULL);
 }
 
 void release_blkdev_list(void *dev)
 {
-        up(&block_subsys_sem);
+        mutex_unlock(&block_subsys_lock);
         kfree(dev);
 }
 
 
 /*
  * Count the number of records in the blkdev_list.
- * must be called with the block_subsys_sem held
+ * must be called with the block_subsys_lock held
  */
 int count_blkdev_list(void)
 {
@@ -118,7 +119,7 @@ int count_blkdev_list(void)
 /*
  * extract the major and name values from a blkdev_info struct
  * passed in as a void to *dev.  Must be called with
- * block_subsys_sem held
+ * block_subsys_lock held
  */
 int get_blkdev_info(void *dev, int *major, char **name)
 {
@@ -138,7 +139,7 @@ int register_blkdev(unsigned int major, const char *name)
 	struct blk_major_name **n, *p;
 	int index, ret = 0;
 
-	down(&block_subsys_sem);
+	mutex_lock(&block_subsys_lock);
 
 	/* temporary */
 	if (major == 0) {
@@ -183,7 +184,7 @@ int register_blkdev(unsigned int major, const char *name)
 		kfree(p);
 	}
 out:
-	up(&block_subsys_sem);
+	mutex_unlock(&block_subsys_lock);
 	return ret;
 }
 
@@ -197,7 +198,7 @@ int unregister_blkdev(unsigned int major, const char *name)
 	int index = major_to_index(major);
 	int ret = 0;
 
-	down(&block_subsys_sem);
+	mutex_lock(&block_subsys_lock);
 	for (n = &major_names[index]; *n; n = &(*n)->next)
 		if ((*n)->major == major)
 			break;
@@ -207,7 +208,7 @@ int unregister_blkdev(unsigned int major, const char *name)
 		p = *n;
 		*n = p->next;
 	}
-	up(&block_subsys_sem);
+	mutex_unlock(&block_subsys_lock);
 	kfree(p);
 
 	return ret;
@@ -301,7 +302,7 @@ static void *part_start(struct seq_file *part, loff_t *pos)
 	struct list_head *p;
 	loff_t l = *pos;
 
-	down(&block_subsys_sem);
+	mutex_lock(&block_subsys_lock);
 	list_for_each(p, &block_subsys.kset.list)
 		if (!l--)
 			return list_entry(p, struct gendisk, kobj.entry);
@@ -318,7 +319,7 @@ static void *part_next(struct seq_file *part, void *v, loff_t *pos)
 
 static void part_stop(struct seq_file *part, void *v)
 {
-	up(&block_subsys_sem);
+	mutex_unlock(&block_subsys_lock);
 }
 
 static int show_partition(struct seq_file *part, void *v)
@@ -377,7 +378,7 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
 
 static int __init genhd_device_init(void)
 {
-	bdev_map = kobj_map_init(base_probe, &block_subsys_sem);
+	bdev_map = kobj_map_init(base_probe, &block_subsys_lock);
 	blk_dev_init();
 	subsystem_register(&block_subsys);
 	return 0;
@@ -611,7 +612,7 @@ static void *diskstats_start(struct seq_file *part, loff_t *pos)
 	loff_t k = *pos;
 	struct list_head *p;
 
-	down(&block_subsys_sem);
+	mutex_lock(&block_subsys_lock);
 	list_for_each(p, &block_subsys.kset.list)
 		if (!k--)
 			return list_entry(p, struct gendisk, kobj.entry);
@@ -628,7 +629,7 @@ static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
 
 static void diskstats_stop(struct seq_file *part, void *v)
 {
-	up(&block_subsys_sem);
+	mutex_unlock(&block_subsys_lock);
 }
 
 static int diskstats_show(struct seq_file *s, void *v)
diff --git a/drivers/base/map.c b/drivers/base/map.c
index b449dae6f0d3..e87017f36853 100644
--- a/drivers/base/map.c
+++ b/drivers/base/map.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
 #include <linux/kdev_t.h>
 #include <linux/kobject.h>
 #include <linux/kobj_map.h>
@@ -25,7 +26,7 @@ struct kobj_map {
 		int (*lock)(dev_t, void *);
 		void *data;
 	} *probes[255];
-	struct semaphore *sem;
+	struct mutex *lock;
 };
 
 int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
@@ -53,7 +54,7 @@ int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
 		p->range = range;
 		p->data = data;
 	}
-	down(domain->sem);
+	mutex_lock(domain->lock);
 	for (i = 0, p -= n; i < n; i++, p++, index++) {
 		struct probe **s = &domain->probes[index % 255];
 		while (*s && (*s)->range < range)
@@ -61,7 +62,7 @@ int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
 		p->next = *s;
 		*s = p;
 	}
-	up(domain->sem);
+	mutex_unlock(domain->lock);
 	return 0;
 }
 
@@ -75,7 +76,7 @@ void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range)
 	if (n > 255)
 		n = 255;
 
-	down(domain->sem);
+	mutex_lock(domain->lock);
 	for (i = 0; i < n; i++, index++) {
 		struct probe **s;
 		for (s = &domain->probes[index % 255]; *s; s = &(*s)->next) {
@@ -88,7 +89,7 @@ void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range)
 			}
 		}
 	}
-	up(domain->sem);
+	mutex_unlock(domain->lock);
 	kfree(found);
 }
 
@@ -99,7 +100,7 @@ struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
 	unsigned long best = ~0UL;
 
 retry:
-	down(domain->sem);
+	mutex_lock(domain->lock);
 	for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
 		struct kobject *(*probe)(dev_t, int *, void *);
 		struct module *owner;
@@ -120,7 +121,7 @@ retry:
 			module_put(owner);
 			continue;
 		}
-		up(domain->sem);
+		mutex_unlock(domain->lock);
 		kobj = probe(dev, index, data);
 		/* Currently ->owner protects _only_ ->probe() itself. */
 		module_put(owner);
@@ -128,11 +129,11 @@ retry:
 			return kobj;
 		goto retry;
 	}
-	up(domain->sem);
+	mutex_unlock(domain->lock);
 	return NULL;
 }
 
-struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct semaphore *sem)
+struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct mutex *lock)
 {
 	struct kobj_map *p = kmalloc(sizeof(struct kobj_map), GFP_KERNEL);
 	struct probe *base = kzalloc(sizeof(*base), GFP_KERNEL);
@@ -149,6 +150,6 @@ struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct semaphore *sem)
 	base->get = base_probe;
 	for (i = 0; i < 255; i++)
 		p->probes[i] = base;
-	p->sem = sem;
+	p->lock = lock;
 	return p;
 }
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 21195c481637..5c36345c9bf7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -19,6 +19,7 @@
 #include <linux/kobject.h>
 #include <linux/kobj_map.h>
 #include <linux/cdev.h>
+#include <linux/mutex.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -28,7 +29,7 @@ static struct kobj_map *cdev_map;
 
 #define MAX_PROBE_HASH 255	/* random */
 
-static DECLARE_MUTEX(chrdevs_lock);
+static DEFINE_MUTEX(chrdevs_lock);
 
 static struct char_device_struct {
 	struct char_device_struct *next;
@@ -88,13 +89,13 @@ out:
 
 void *acquire_chrdev_list(void)
 {
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 	return get_next_chrdev(NULL);
 }
 
 void release_chrdev_list(void *dev)
 {
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	kfree(dev);
 }
 
@@ -151,7 +152,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 
 	memset(cd, 0, sizeof(struct char_device_struct));
 
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 
 	/* temporary */
 	if (major == 0) {
@@ -186,10 +187,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	}
 	cd->next = *cp;
 	*cp = cd;
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	return cd;
 out:
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	kfree(cd);
 	return ERR_PTR(ret);
 }
@@ -200,7 +201,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 	struct char_device_struct *cd = NULL, **cp;
 	int i = major_to_index(major);
 
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
 		if ((*cp)->major == major &&
 		    (*cp)->baseminor == baseminor &&
@@ -210,7 +211,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 		cd = *cp;
 		*cp = cd->next;
 	}
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	return cd;
 }
 
diff --git a/include/linux/kobj_map.h b/include/linux/kobj_map.h
index cbe7d8008042..bafe178a381f 100644
--- a/include/linux/kobj_map.h
+++ b/include/linux/kobj_map.h
@@ -1,6 +1,6 @@
 #ifdef __KERNEL__
 
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 typedef struct kobject *kobj_probe_t(dev_t, int *, void *);
 struct kobj_map;
@@ -9,6 +9,6 @@ int kobj_map(struct kobj_map *, dev_t, unsigned long, struct module *,
 	     kobj_probe_t *, int (*)(dev_t, void *), void *);
 void kobj_unmap(struct kobj_map *, dev_t, unsigned long);
 struct kobject *kobj_lookup(struct kobj_map *, dev_t, int *);
-struct kobj_map *kobj_map_init(kobj_probe_t *, struct semaphore *);
+struct kobj_map *kobj_map_init(kobj_probe_t *, struct mutex *);
 
 #endif
-- 
cgit v1.2.3


From 58d49283b87751f7af75e021a629dcddb027e8eb Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Wed, 22 Feb 2006 11:18:15 +0100
Subject: [PATCH] sysfs: kzalloc conversion

this converts fs/sysfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c  | 3 +--
 fs/sysfs/inode.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d0e3d8495165..e21f4022feb8 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -301,9 +301,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	/* No error? Great, allocate a buffer for the file, and store it
 	 * it in file->private_data for easy access.
 	 */
-	buffer = kmalloc(sizeof(struct sysfs_buffer),GFP_KERNEL);
+	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
 	if (buffer) {
-		memset(buffer,0,sizeof(struct sysfs_buffer));
 		init_MUTEX(&buffer->sem);
 		buffer->needs_read_fill = 1;
 		buffer->ops = ops;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 6beee6f6a674..4c29ac41ac3e 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -54,11 +54,10 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 
 	if (!sd_iattr) {
 		/* setting attributes for the first time, allocate now */
-		sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+		sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
 		if (!sd_iattr)
 			return -ENOMEM;
 		/* assign default attributes */
-		memset(sd_iattr, 0, sizeof(struct iattr));
 		sd_iattr->ia_mode = sd->s_mode;
 		sd_iattr->ia_uid = 0;
 		sd_iattr->ia_gid = 0;
-- 
cgit v1.2.3


From c516865cfbac0d862d4888df91793ad1e74ffd58 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Thu, 9 Mar 2006 19:40:14 +0530
Subject: [PATCH] sysfs: fix problem with duplicate sysfs directories and files

The following patch checks for existing sysfs_dirent before
preparing new one while creating sysfs directories and files.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c     | 32 +++++++++++++++++++++++++++++++-
 fs/sysfs/file.c    |  6 ++++--
 fs/sysfs/symlink.c |  5 +++--
 fs/sysfs/sysfs.h   |  1 +
 4 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index cfd290d3d6b1..bea1f4c02b9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -50,6 +50,32 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
 	return sd;
 }
 
+/**
+ *
+ * Return -EEXIST if there is already a sysfs element with the same name for
+ * the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
+			  const unsigned char *new)
+{
+	struct sysfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = sysfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
 			void * element, umode_t mode, int type)
 {
@@ -102,7 +128,11 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	mutex_lock(&p->d_inode->i_mutex);
 	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
-		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
+ 		if (sysfs_dirent_exist(p->d_fsdata, n))
+  			error = -EEXIST;
+  		else
+			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
+								SYSFS_DIR);
 		if (!error) {
 			error = sysfs_create(*d, mode, init_dir);
 			if (!error) {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e21f4022feb8..5e83e7246788 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -361,10 +361,12 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 {
 	struct sysfs_dirent * parent_sd = dir->d_fsdata;
 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
-	int error = 0;
+	int error = -EEXIST;
 
 	mutex_lock(&dir->d_inode->i_mutex);
-	error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
+	if (!sysfs_dirent_exist(parent_sd, attr->name))
+		error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
+					  mode, type);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return error;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index e38d6338a20d..fe23f47f6e43 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -82,12 +82,13 @@ exit1:
 int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
 	struct dentry * dentry = kobj->dentry;
-	int error = 0;
+	int error = -EEXIST;
 
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	error = sysfs_add_link(dentry, name, target);
+	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
+		error = sysfs_add_link(dentry, name, target);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 	return error;
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3f8953e0e5d0..cf11d5b789d9 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -5,6 +5,7 @@ extern kmem_cache_t *sysfs_dir_cachep;
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
+extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
 
-- 
cgit v1.2.3


From dd308bc355a1aa4f202fe9a3133b6c676cb9606c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 7 Mar 2006 21:41:59 +1100
Subject: [PATCH] debugfs: Add debugfs_create_blob() helper for exporting
 binary data

I wanted to export a binary blob via debugfs, and although it was pretty easy
it seems like it'd be easier if there was a helper for it. It's a pity we need
the wrapper struct but I can't see a cleaner way to do it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/file.c       | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/debugfs.h | 15 +++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'fs')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index d575452cd9f7..40c4fc973fad 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -251,3 +251,49 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_bool);
 
+static ssize_t read_file_blob(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	struct debugfs_blob_wrapper *blob = file->private_data;
+	return simple_read_from_buffer(user_buf, count, ppos, blob->data,
+			blob->size);
+}
+
+static struct file_operations fops_blob = {
+	.read =		read_file_blob,
+	.open =		default_open,
+};
+
+/**
+ * debugfs_create_blob - create a file in the debugfs filesystem that is
+ * used to read and write a binary blob.
+ *
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this paramater is NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer
+ *        to the blob data and the size of the data.
+ *
+ * This function creates a file in debugfs with the given name that exports
+ * @blob->data as a binary blob. If the @mode variable is so set it can be
+ * read from. Writing is not supported.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, NULL will be returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * returned.  It is not wise to check for this value, but rather, check for
+ * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * code.
+ */
+struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+				   struct dentry *parent,
+				   struct debugfs_blob_wrapper *blob)
+{
+	return debugfs_create_file(name, mode, parent, blob, &fops_blob);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_blob);
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index a5fa6a6eede8..4b0428e335be 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -21,6 +21,11 @@
 
 struct file_operations;
 
+struct debugfs_blob_wrapper {
+	void *data;
+	unsigned long size;
+};
+
 #if defined(CONFIG_DEBUG_FS)
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
@@ -39,6 +44,9 @@ struct dentry *debugfs_create_u32(const char *name, mode_t mode,
 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
 
+struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+				  struct dentry *parent,
+				  struct debugfs_blob_wrapper *blob);
 #else
 
 #include <linux/err.h>
@@ -94,6 +102,13 @@ static inline struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+				  struct dentry *parent,
+				  struct debugfs_blob_wrapper *blob)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 832c57e9afa7a263bb2f8ee6d04d527ef6709aae Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 11:23:21 -0700
Subject: [PATCH] sysfs: don't export dir symbols

These functions should only be used by the kobject core, and if any
driver tries to use them, bad things happen.  Unexport them to try to
prevent this from happening.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index bea1f4c02b9b..9ee956864445 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -510,7 +510,3 @@ struct file_operations sysfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysfs_readdir,
 };
-
-EXPORT_SYMBOL_GPL(sysfs_create_dir);
-EXPORT_SYMBOL_GPL(sysfs_remove_dir);
-EXPORT_SYMBOL_GPL(sysfs_rename_dir);
-- 
cgit v1.2.3


From b3229087c5e08589cea4f5040dab56f7dc11332a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 15:44:26 -0800
Subject: [PATCH] sysfs: fix a kobject leak in sysfs_add_link on the error path

As pointed out by Oliver Neukum.

Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index fe23f47f6e43..d2eac3ceed5f 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -66,6 +66,7 @@ static int sysfs_add_link(struct dentry * parent, const char * name, struct kobj
 	if (!error)
 		return 0;
 
+	kobject_put(target);
 	kfree(sl->link_name);
 exit2:
 	kfree(sl);
-- 
cgit v1.2.3


From 7a1218a277c45cba1fb8d7089407a1769c645c43 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 18:11:10 -0500
Subject: SUNRPC: Ensure rpc_call_async() always calls tk_ops->rpc_release()

Currently this will not happen if we exit before rpc_new_task() was called.
Also fix up rpc_run_task() to do the same (for consistency).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 32 +++++++++++++++-----------------
 net/sunrpc/clnt.c  | 10 +++++++---
 net/sunrpc/sched.c |  5 ++++-
 3 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 02c7d8c04c58..4aba15ad1d27 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -605,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -708,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -2959,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->rpc_status = 0;
 
 	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_delegreturn_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status == 0) {
 		status = data->rpc_status;
@@ -3182,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		struct nfs_seqid *seqid)
 {
 	struct nfs4_unlockdata *data;
-	struct rpc_task *task;
 
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
@@ -3192,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 	/* Unlock _before_ we do the RPC call */
 	do_vfs_lock(fl->fl_file, fl);
-	task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
-	if (IS_ERR(task))
-		nfs4_locku_release_calldata(data);
-	return task;
+	return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 }
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3376,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		data->arg.reclaim = 1;
 	task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
 			&nfs4_lock_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_lock_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	ret = nfs4_wait_for_completion_rpc_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e71d6ace5a3..aa8965e9d307 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -495,15 +495,16 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	int		status;
 
 	/* If this client is slain all further I/O fails */
+	status = -EIO;
 	if (clnt->cl_dead) 
-		return -EIO;
+		goto out_release;
 
 	flags |= RPC_TASK_ASYNC;
 
 	/* Create/initialize a new RPC task */
 	status = -ENOMEM;
 	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
-		goto out;
+		goto out_release;
 
 	/* Mask signals on GSS_AUTH upcalls */
 	rpc_task_sigmask(task, &oldset);		
@@ -518,7 +519,10 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 		rpc_release_task(task);
 
 	rpc_restore_sigmask(&oldset);		
-out:
+	return status;
+out_release:
+	if (tk_ops->rpc_release != NULL)
+		tk_ops->rpc_release(data);
 	return status;
 }
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cd51b5468332..3fc13bea302d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -921,8 +921,11 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 {
 	struct rpc_task *task;
 	task = rpc_new_task(clnt, flags, ops, data);
-	if (task == NULL)
+	if (task == NULL) {
+		if (ops->rpc_release != NULL)
+			ops->rpc_release(data);
 		return ERR_PTR(-ENOMEM);
+	}
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
 	return task;
-- 
cgit v1.2.3


From 096455a22acac06fb6d0d75f276170ab72d55ba6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:23:42 -0500
Subject: NFSv4: Dont list system.nfs4_acl for filesystems that don't support
 it.

Thanks to Frank Filz for pointing out that we list system.nfs4_acl extended
attribute even on filesystems where we don't actually support nfs4_acl.
This is inconsistent with the e.g. ext3 POSIX ACL behaviour, and seems to
annoy cp.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4aba15ad1d27..47ece1dd3c67 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3559,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
 {
 	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
 
+	if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+		return 0;
 	if (buf && buflen < len)
 		return -ERANGE;
 	if (buf)
-- 
cgit v1.2.3


From f3ee439f43381e45b191cf721b4a51d41f33301f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:24:13 -0500
Subject: LOCKD: nlmsvc_traverse_blocks return is unused

Note that we never return non-zero.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c          | 3 +--
 fs/lockd/svcsubs.c          | 5 +++--
 include/linux/lockd/lockd.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ce754efe2841..d2b66bad7d50 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -309,14 +309,13 @@ restart:
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
  */
-int
+void
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	if (action == NLM_ACT_MARK)
 		nlmsvc_act_mark(host, file);
 	else
 		nlmsvc_act_unlock(host, file);
-	return 0;
 }
 
 /*
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 601e5b3dfe20..2043011a1a2d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -224,8 +224,9 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
 		if (file->f_count || file->f_blocks || file->f_shares)
 			return 1;
 	} else {
-		if (nlmsvc_traverse_blocks(host, file, action)
-		 || nlmsvc_traverse_shares(host, file, action))
+		nlmsvc_traverse_blocks(host, file, action);
+
+		if (nlmsvc_traverse_shares(host, file, action))
 			return 1;
 	}
 	return nlm_traverse_locks(host, file, action);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index a04137d0c5de..995f89dc8c04 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -183,7 +183,7 @@ u32		  nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
 					struct nlm_lock *);
 u32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long	  nlmsvc_retry_blocked(void);
-int		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					int action);
 void	  nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
 
-- 
cgit v1.2.3


From 5f12191bc000ea31970339a5f54c11087506711c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:24:25 -0500
Subject: LOCKD: Make nlmsvc_traverse_shares return void

The nlmsvc_traverse_shares return value is always zero, hence useless.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svcshare.c         | 4 +---
 fs/lockd/svcsubs.c          | 4 +---
 include/linux/lockd/share.h | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836ce..27288c83da96 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
  * Traverse all shares for a given file (and host).
  * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
  */
-int
+void
 nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	struct nlm_share	*share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 		}
 		shpp = &share->s_next;
 	}
-
-	return 0;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2043011a1a2d..c7a6e3ae44d6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -225,9 +225,7 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
 			return 1;
 	} else {
 		nlmsvc_traverse_blocks(host, file, action);
-
-		if (nlmsvc_traverse_shares(host, file, action))
-			return 1;
+		nlmsvc_traverse_shares(host, file, action);
 	}
 	return nlm_traverse_locks(host, file, action);
 }
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
index 5d8aa325f140..c75a424ebe4c 100644
--- a/include/linux/lockd/share.h
+++ b/include/linux/lockd/share.h
@@ -25,6 +25,6 @@ u32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
 					       struct nlm_args *);
 u32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
 					       struct nlm_args *);
-int	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
+void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
 
 #endif /* LINUX_LOCKD_SHARE_H */
-- 
cgit v1.2.3


From df6db302cb236ac3a683d535a3e2073d9f4b2833 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:25:10 -0500
Subject: SUNRPC,RPCSEC_GSS: spkm3--fix config dependencies

Add default selection of CRYPTO_CAST5 when selecting RPCSEC_GSS_SPKM3.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index e9749b0eecd8..9a4158f2a3ac 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1567,6 +1567,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CAST5
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
-- 
cgit v1.2.3


From 4de151d8cd2553e7e89044ab5d72fcad4eb04afb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 22 Mar 2006 00:13:35 +0100
Subject: It's UTF-8

Fix some comments to "UTF-8".

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 Documentation/filesystems/isofs.txt | 4 ++--
 Documentation/filesystems/jfs.txt   | 2 +-
 Documentation/filesystems/vfat.txt  | 6 +++---
 fs/befs/linuxvfs.c                  | 2 +-
 fs/cifs/CHANGES                     | 2 +-
 fs/fat/dir.c                        | 2 +-
 fs/fat/inode.c                      | 2 +-
 fs/isofs/joliet.c                   | 2 +-
 fs/nls/Kconfig                      | 2 +-
 include/asm-mips/termbits.h         | 2 +-
 include/linux/msdos_fs.h            | 2 +-
 11 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
index 424585ff6ea1..758e50401c16 100644
--- a/Documentation/filesystems/isofs.txt
+++ b/Documentation/filesystems/isofs.txt
@@ -9,9 +9,9 @@ when using discs encoded using Microsoft's Joliet extensions.
   iocharset=name Character set to use for converting from Unicode to
 		ASCII.  Joliet filenames are stored in Unicode format, but
 		Unix for the most part doesn't know how to deal with Unicode.
-		There is also an option of doing UTF8 translations with the
+		There is also an option of doing UTF-8 translations with the
 		utf8 option.
-  utf8          Encode Unicode names in UTF8 format. Default is no.
+  utf8          Encode Unicode names in UTF-8 format. Default is no.
 
 Mount options unique to the isofs filesystem.
   block=512     Set the block size for the disk to 512 bytes
diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt
index 3e992daf99ad..bae128663748 100644
--- a/Documentation/filesystems/jfs.txt
+++ b/Documentation/filesystems/jfs.txt
@@ -6,7 +6,7 @@ The following mount options are supported:
 
 iocharset=name	Character set to use for converting from Unicode to
 		ASCII.  The default is to do no conversion.  Use
-		iocharset=utf8 for UTF8 translations.  This requires
+		iocharset=utf8 for UTF-8 translations.  This requires
 		CONFIG_NLS_UTF8 to be set in the kernel .config file.
 		iocharset=none specifies the default behavior explicitly.
 
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 5ead20c6c744..2001abbc60e6 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -28,16 +28,16 @@ iocharset=name -- Character set to use for converting between the
 		 know how to deal with Unicode.
 		 By default, FAT_DEFAULT_IOCHARSET setting is used.
 
-		 There is also an option of doing UTF8 translations
+		 There is also an option of doing UTF-8 translations
 		 with the utf8 option.
 
 		 NOTE: "iocharset=utf8" is not recommended. If unsure,
 		 you should consider the following option instead.
 
-utf8=<bool>   -- UTF8 is the filesystem safe version of Unicode that
+utf8=<bool>   -- UTF-8 is the filesystem safe version of Unicode that
 		 is used by the console.  It can be be enabled for the
 		 filesystem with this option. If 'uni_xlate' gets set,
-		 UTF8 gets disabled.
+		 UTF-8 gets disabled.
 
 uni_xlate=<bool> -- Translate unhandled Unicode characters to special
 		 escaped sequences.  This would let you backup and
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2d365cb8eec6..dd6048ce0532 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -561,7 +561,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
  * @sb: Superblock
  * @src: Input string buffer in NLS format
  * @srclen: Length of input string in bytes
- * @dest: The output string in UTF8 format
+ * @dest: The output string in UTF-8 format
  * @destlen: Length of the output buffer
  * 
  * Converts input string @src, which is in the format of the loaded NLS map,
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index d335015473a5..cb68efba35db 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -160,7 +160,7 @@ improperly zeroed buffer in CIFS Unix extensions set times call.
 Version 1.25
 ------------
 Fix internationalization problem in cifs readdir with filenames that map to 
-longer UTF8 strings than the string on the wire was in Unicode.  Add workaround
+longer UTF-8 strings than the string on the wire was in Unicode.  Add workaround
 for readdir to netapp servers. Fix search rewind (seek into readdir to return 
 non-consecutive entries).  Do not do readdir when server negotiates 
 buffer size to small to fit filename. Add support for reading POSIX ACLs from
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index db0de5c621c7..4095bc149eb1 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -114,7 +114,7 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
 }
 
 /*
- * Convert Unicode 16 to UTF8, translated Unicode, or ASCII.
+ * Convert Unicode 16 to UTF-8, translated Unicode, or ASCII.
  * If uni_xlate is enabled and we can't get a 1:1 conversion, use a
  * colon as an escape character since it is normally invalid on the vfat
  * filesystem. The following four characters are the hexadecimal digits
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e7f4aa7fc686..e78d7b4842cc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1101,7 +1101,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 			return -EINVAL;
 		}
 	}
-	/* UTF8 doesn't provide FAT semantics */
+	/* UTF-8 doesn't provide FAT semantics */
 	if (!strcmp(opts->iocharset, "utf8")) {
 		printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
 		       " for FAT filesystems, filesystem will be case sensitive!\n");
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 2931de7f1a6a..81a90e170ac3 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -11,7 +11,7 @@
 #include "isofs.h"
 
 /*
- * Convert Unicode 16 to UTF8 or ASCII.
+ * Convert Unicode 16 to UTF-8 or ASCII.
  */
 static int
 uni16_to_x8(unsigned char *ascii, u16 *uni, int len, struct nls_table *nls)
diff --git a/fs/nls/Kconfig b/fs/nls/Kconfig
index 0ab8f00bdbb2..976ecccd6f56 100644
--- a/fs/nls/Kconfig
+++ b/fs/nls/Kconfig
@@ -491,7 +491,7 @@ config NLS_KOI8_U
 	  (koi8-u) and Belarusian (koi8-ru) character sets.
 
 config NLS_UTF8
-	tristate "NLS UTF8"
+	tristate "NLS UTF-8"
 	depends on NLS
 	help
 	  If you want to display filenames with native language characters
diff --git a/include/asm-mips/termbits.h b/include/asm-mips/termbits.h
index c29c65b7818e..fa6d04dac56b 100644
--- a/include/asm-mips/termbits.h
+++ b/include/asm-mips/termbits.h
@@ -77,7 +77,7 @@ struct termios {
 #define IXANY	0004000		/* Any character will restart after stop.  */
 #define IXOFF	0010000		/* Enable start/stop input control.  */
 #define IMAXBEL	0020000		/* Ring bell when input queue is full.  */
-#define IUTF8	0040000		/* Input is UTF8 */
+#define IUTF8	0040000		/* Input is UTF-8 */
 
 /* c_oflag bits */
 #define OPOST	0000001		/* Perform output processing.  */
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index e933e2a355ad..8bcd9450d926 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -199,7 +199,7 @@ struct fat_mount_options {
 		 sys_immutable:1, /* set = system files are immutable */
 		 dotsOK:1,        /* set = hidden and system files are named '.filename' */
 		 isvfat:1,        /* 0=no vfat long filename support, 1=vfat support */
-		 utf8:1,	  /* Use of UTF8 character set (Default) */
+		 utf8:1,	  /* Use of UTF-8 character set (Default) */
 		 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
 		 atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
-- 
cgit v1.2.3


From f1fdc848aab7fb95b32e058b7f06cc07912b3734 Mon Sep 17 00:00:00 2001
From: Yingping Lu <yingping@sgi.com>
Date: Wed, 22 Mar 2006 12:44:15 +1100
Subject: [XFS] Fixing KDB's xrwtrc command, also added the current process id
 into the trace.

SGI-PV: 948300
SGI-Modid: xfs-linux-melb:xfs-kern:208069a

Signed-off-by: Yingping Lu <yingping@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 fs/xfs/linux-2.6/xfs_lrw.c  | 4 ++--
 fs/xfs/xfs_inode.c          | 8 ++++----
 fs/xfs/xfs_iomap.c          | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9bf130c63ba..965757cf15e9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -103,7 +103,7 @@ xfs_page_trace(
 		(void *)((unsigned long)delalloc),
 		(void *)((unsigned long)unmapped),
 		(void *)((unsigned long)unwritten),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL);
 }
 #else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 2a936bd38ce7..0169360475c4 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -83,7 +83,7 @@ xfs_rw_enter_trace(
 		(void *)((unsigned long)ioflags),
 		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
@@ -113,7 +113,7 @@ xfs_inval_cached_trace(
 		(void *)((unsigned long)(first & 0xffffffff)),
 		(void *)((unsigned long)((last >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(last & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7d0ded05a467..2424a4777949 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1372,10 +1372,10 @@ xfs_itrunc_trace(
 		     (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
 		     (void*)(unsigned long)(toss_finish & 0xffffffff),
 		     (void*)(unsigned long)current_cpu(),
-		     (void*)0,
-		     (void*)0,
-		     (void*)0,
-		     (void*)0);
+		     (void*)(unsigned long)current_pid(),
+		     (void*)NULL,
+		     (void*)NULL,
+		     (void*)NULL);
 }
 #else
 #define	xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 788917f355c4..d5dfedcb8922 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -76,7 +76,7 @@ xfs_iomap_enter_trace(
 		(void *)((unsigned long)count),
 		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
-- 
cgit v1.2.3


From 9fa8046f50bcb88ab9183ee1f22de5adc42bf92a Mon Sep 17 00:00:00 2001
From: Yingping Lu <yingping@sgi.com>
Date: Wed, 22 Mar 2006 12:44:35 +1100
Subject: [XFS] Fixing the error caused by the conflict between DIO Write's
 conversion and concurrent truncate operations. Use vn_iowait to wait for the
 completion of any pending DIOs. Since the truncate requires exclusive IOLOCK,
 so this blocks any further DIO operations since DIO write also needs
 exclusive IOBLOCK. This serves as a barrier and prevent any potential
 starvation.

SGI-PV: 947420
SGI-Modid: xfs-linux-melb:xfs-kern:208088a

Signed-off-by: Yingping Lu <yingping@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c    | 3 +++
 fs/xfs/xfs_vnodeops.c | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2424a4777949..8d2b36879f06 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1420,6 +1420,9 @@ xfs_itruncate_start(
 
 	mp = ip->i_mount;
 	vp = XFS_ITOV(ip);
+
+	vn_iowait(vp);  /* wait for the completion of any pending DIOs */
+	
 	/*
 	 * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers
 	 * overlapping the region being removed.  We have to use
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b5a44fe2865..697bf22a84f3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -615,6 +615,7 @@ xfs_setattr(
 			code = xfs_igrow_start(ip, vap->va_size, credp);
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		vn_iowait(vp); /* wait for the completion of any pending DIOs */
 		if (!code)
 			code = xfs_itruncate_data(ip, vap->va_size);
 		if (code) {
@@ -4310,8 +4311,10 @@ xfs_free_file_space(
 	ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
 	if (attr_flags & ATTR_NOLOCK)
 		need_iolock = 0;
-	if (need_iolock)
+	if (need_iolock) {
 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		vn_iowait(vp);	/* wait for the completion of any pending DIOs */
+	}
 
 	rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
 			(__uint8_t)NBPP);
-- 
cgit v1.2.3


From 38e2299a641d93d029eb559e096648ab75a22be2 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 22 Mar 2006 12:47:15 +1100
Subject: [XFS] Explain the race closed by the addition of vn_iowait() to the
 start of xfs_itruncate_start().

SGI-PV: 947420
SGI-Modid: xfs-linux-melb:xfs-kern:25527a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8d2b36879f06..88a517fad07b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1393,6 +1393,16 @@ xfs_itrunc_trace(
  * calling into the buffer/page cache code and we can't hold the
  * inode lock when we do so.
  *
+ * We need to wait for any direct I/Os in flight to complete before we
+ * proceed with the truncate. This is needed to prevent the extents
+ * being read or written by the direct I/Os from being removed while the
+ * I/O is in flight as there is no other method of synchronising
+ * direct I/O with the truncate operation.  Also, because we hold
+ * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
+ * started until the truncate completes and drops the lock. Essentially,
+ * the vn_iowait() call forms an I/O barrier that provides strict ordering
+ * between direct I/Os and the truncate operation.
+ *
  * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
  * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
  * in the case that the caller is locking things out of order and
-- 
cgit v1.2.3


From 3758dee9f64fa9692063e7167128f9cec3f5fd33 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 22 Mar 2006 12:47:28 +1100
Subject: [XFS] Fixup naming inconsistencies found by Pekka Enberg and one from
 Jan Engelhardt.

SGI-PV: 947038
SGI-Modid: xfs-linux-melb:xfs-kern:25529a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm_bhv.c |  4 ++--
 fs/xfs/support/ktrace.c   |  4 ++--
 fs/xfs/xfs_acl.h          |  4 ++--
 fs/xfs/xfs_vfsops.c       | 28 ++++++++++++++--------------
 4 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 90402a1c3983..6838b36d95a9 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -374,7 +374,7 @@ xfs_qm_exit(void)
 	vfs_bhv_clr_custom(&xfs_qmops);
 	xfs_qm_cleanup_procfs();
 	if (qm_dqzone)
-		kmem_cache_destroy(qm_dqzone);
+		kmem_zone_destroy(qm_dqzone);
 	if (qm_dqtrxzone)
-		kmem_cache_destroy(qm_dqtrxzone);
+		kmem_zone_destroy(qm_dqtrxzone);
 }
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 841aa4c15b8a..addf5a7ea06c 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -39,8 +39,8 @@ ktrace_init(int zentries)
 void
 ktrace_uninit(void)
 {
-	kmem_cache_destroy(ktrace_hdr_zone);
-	kmem_cache_destroy(ktrace_ent_zone);
+	kmem_zone_destroy(ktrace_hdr_zone);
+	kmem_zone_destroy(ktrace_ent_zone);
 }
 
 /*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index f9315bc960cb..538d0d65b04c 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -55,8 +55,8 @@ struct xfs_inode;
 
 extern struct kmem_zone *xfs_acl_zone;
 #define xfs_acl_zone_init(zone, name)	\
-		(zone) = kmem_zone_init(sizeof(xfs_acl_t), name)
-#define xfs_acl_zone_destroy(zone)	kmem_cache_destroy(zone)
+		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
+#define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
 
 extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *);
 extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 811a4261fa2c..c40e5883db43 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -190,18 +190,18 @@ xfs_cleanup(void)
 	ktrace_free(xfs_alloc_trace_buf);
 #endif
 
-	kmem_cache_destroy(xfs_bmap_free_item_zone);
-	kmem_cache_destroy(xfs_btree_cur_zone);
-	kmem_cache_destroy(xfs_inode_zone);
-	kmem_cache_destroy(xfs_trans_zone);
-	kmem_cache_destroy(xfs_da_state_zone);
-	kmem_cache_destroy(xfs_dabuf_zone);
-	kmem_cache_destroy(xfs_buf_item_zone);
-	kmem_cache_destroy(xfs_efd_zone);
-	kmem_cache_destroy(xfs_efi_zone);
-	kmem_cache_destroy(xfs_ifork_zone);
-	kmem_cache_destroy(xfs_ili_zone);
-	kmem_cache_destroy(xfs_chashlist_zone);
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+	kmem_zone_destroy(xfs_btree_cur_zone);
+	kmem_zone_destroy(xfs_inode_zone);
+	kmem_zone_destroy(xfs_trans_zone);
+	kmem_zone_destroy(xfs_da_state_zone);
+	kmem_zone_destroy(xfs_dabuf_zone);
+	kmem_zone_destroy(xfs_buf_item_zone);
+	kmem_zone_destroy(xfs_efd_zone);
+	kmem_zone_destroy(xfs_efi_zone);
+	kmem_zone_destroy(xfs_ifork_zone);
+	kmem_zone_destroy(xfs_ili_zone);
+	kmem_zone_destroy(xfs_chashlist_zone);
 }
 
 /*
@@ -632,7 +632,7 @@ xfs_quiesce_fs(
 	xfs_mount_t		*mp)
 {
 	int			count = 0, pincount;
-		
+
 	xfs_refcache_purge_mp(mp);
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
 	xfs_finish_reclaim_all(mp, 0);
@@ -643,7 +643,7 @@ xfs_quiesce_fs(
 	 * meta data (typically directory updates).
 	 * Which then must be flushed and logged before
 	 * we can write the unmount record.
-	 */ 
+	 */
 	do {
 		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-- 
cgit v1.2.3


From 2ddee844eef48bf9240ebdfd6c5ffc4333c7d639 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 22 Mar 2006 12:47:40 +1100
Subject: [XFS] Check that a page has dirty buffers before finding it
 acceptable for rewrite clustering. This prevents writing excessive amounts of
 clean data when doing random rewrites of a cached file.

SGI-PV: 951193
SGI-Modid: xfs-linux-melb:xfs-kern:25531a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 965757cf15e9..97fc056130eb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -647,7 +647,7 @@ xfs_is_delayed_page(
 				acceptable = (type == IOMAP_UNWRITTEN);
 			else if (buffer_delay(bh))
 				acceptable = (type == IOMAP_DELAY);
-			else if (buffer_mapped(bh))
+			else if (buffer_dirty(bh) && buffer_mapped(bh))
 				acceptable = (type == 0);
 			else
 				break;
-- 
cgit v1.2.3


From e15f195cfb2fb1f2af0fdfc21277643deb26c0df Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 22 Mar 2006 12:47:52 +1100
Subject: [XFS] Reenable the noikeep (delete inode cluster space) option by
 default.

SGI-PV: 951200
SGI-Modid: xfs-linux-melb:xfs-kern:25535a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_vfsops.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index c40e5883db43..d4ec4dfaf19c 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1698,10 +1698,7 @@ xfs_parseargs(
 	int			iosize;
 
 	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-
-#if 0	/* XXX: off by default, until some remaining issues ironed out */
-	args->flags |= XFSMNT_IDELETE; /* default to on */
-#endif
+	args->flags |= XFSMNT_IDELETE;
 
 	if (!options)
 		goto done;
@@ -1911,7 +1908,6 @@ xfs_showargs(
 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
 		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
 		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
-		{ XFS_MOUNT_IDELETE,		"," MNTOPT_NOIKEEP },
 		{ 0, NULL }
 	};
 	struct proc_xfs_info	*xfs_infop;
@@ -1947,6 +1943,8 @@ xfs_showargs(
 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
+	if (!(mp->m_flags & XFS_MOUNT_IDELETE))
+		seq_printf(m, "," MNTOPT_IKEEP);
 	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
 		seq_printf(m, "," MNTOPT_LARGEIO);
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
-- 
cgit v1.2.3


From bb19fba1937cb6ab2bb98ac893365f6ebf88ef1b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@bruce>
Date: Wed, 22 Mar 2006 14:12:12 +1100
Subject: [XFS] Sync up one/two other minor changes missed in previous merges.

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/Makefile-linux-2.6    | 40 +++++++++++++---------------------------
 fs/xfs/linux-2.6/xfs_super.c |  4 +---
 fs/xfs/xfs_dmapi.h           | 10 ----------
 fs/xfs/xfs_rw.h              |  1 +
 fs/xfs/xfs_vnodeops.c        |  6 +++---
 5 files changed, 18 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 97bd4743b461..5d73eaa1971f 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -1,33 +1,19 @@
 #
-# Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (c) 2000-2005 Silicon Graphics, Inc.
+# All Rights Reserved.
 #
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of version 2 of the GNU General Public License as
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
 # published by the Free Software Foundation.
 #
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
 #
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like.  Any license provided herein, whether implied or
-# otherwise, applies only to this software file.  Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write the Free Software Foundation, Inc., 59
-# Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
-# Mountain View, CA  94043, or:
-#
-# http://www.sgi.com
-#
-# For further information regarding this notice, see:
-#
-# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
 EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
@@ -36,7 +22,7 @@ XFS_LINUX := linux-2.6
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
 	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-	EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
+	EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
 endif
 ifeq ($(CONFIG_XFS_TRACE),y)
 	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
@@ -50,7 +36,7 @@ ifeq ($(CONFIG_XFS_TRACE),y)
 	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
 	EXTRA_CFLAGS += -DXFS_LOG_TRACE
 	EXTRA_CFLAGS += -DXFS_RW_TRACE
-	EXTRA_CFLAGS += -DPAGEBUF_TRACE
+	EXTRA_CFLAGS += -DXFS_BUF_TRACE
 	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
 endif
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d9d28a965bac..8355faf8ffde 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -545,7 +545,7 @@ xfs_flush_device(
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
-#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE)
 STATIC void
 vfs_sync_worker(
 	vfs_t		*vfsp,
@@ -972,7 +972,6 @@ init_xfs_fs( void )
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
 		goto undo_register;
-	XFS_DM_INIT(&xfs_fs_type);
 	return 0;
 
 undo_register:
@@ -989,7 +988,6 @@ STATIC void __exit
 exit_xfs_fs( void )
 {
 	vfs_exitquota();
-	XFS_DM_EXIT(&xfs_fs_type);
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
 	xfs_buf_terminate();
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index b4c7f2bc55a0..00b1540f8108 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -191,14 +191,4 @@ typedef enum {
 
 extern struct bhv_vfsops xfs_dmops;
 
-#ifdef CONFIG_XFS_DMAPI
-void xfs_dm_init(struct file_system_type *);
-void xfs_dm_exit(struct file_system_type *);
-#define XFS_DM_INIT(fstype)	xfs_dm_init(fstype)
-#define XFS_DM_EXIT(fstype)	xfs_dm_exit(fstype)
-#else
-#define XFS_DM_INIT(fstype)
-#define XFS_DM_EXIT(fstype)
-#endif
-
 #endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index de85eefb7966..e63795644478 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -89,6 +89,7 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
  */
 extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock);
 extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock);
+extern int xfs_setattr(bhv_desc_t *bdp, vattr_t *vap, int flags, cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
 				 xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 697bf22a84f3..a478f42e63ff 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3187,7 +3187,7 @@ xfs_rmdir(
 
 	/* Fall through to std_return with error = 0 or the errno
 	 * from xfs_trans_commit. */
-std_return:
+ std_return:
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
 					dir_vp, DM_RIGHT_NULL,
@@ -3197,12 +3197,12 @@ std_return:
 	}
 	return error;
 
-error1:
+ error1:
 	xfs_bmap_cancel(&free_list);
 	cancel_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
 
-error_return:
+ error_return:
 	xfs_trans_cancel(tp, cancel_flags);
 	goto std_return;
 }
-- 
cgit v1.2.3


From 5e7a99ac452d7a4ce43b8bacb3495475e1f9fd71 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Wed, 22 Mar 2006 00:07:37 -0800
Subject: [PATCH] v9fs: assign dentry ops to negative dentries

If a file is not found in v9fs_vfs_lookup, the function creates negative
dentry, but doesn't assign any dentry ops.  This leaves the negative entry
in the cache (there is no d_delete to mark it for removal).  If the file is
created outside of the mounted v9fs filesystem, the file shows up in the
directory with weird permissions.

This patch assigns the default v9fs dentry ops to the negative dentry.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3ad8455f8577..651a9e14d9a9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
+	dentry->d_op = &v9fs_dentry_operations;
 	dirfid = v9fs_fid_lookup(dentry->d_parent);
 
 	if (!dirfid) {
@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto FreeFcall;
 
 	fid->qid = fcall->params.rstat.stat.qid;
-
-	dentry->d_op = &v9fs_dentry_operations;
 	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 
 	d_add(dentry, inode);
-- 
cgit v1.2.3


From ac2b898ca6fb06196a26869c23b66afe7944e52e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Wed, 22 Mar 2006 00:08:15 -0800
Subject: [PATCH] slab: Remove SLAB_NO_REAP option

SLAB_NO_REAP is documented as an option that will cause this slab not to be
reaped under memory pressure.  However, that is not what happens.  The only
thing that SLAB_NO_REAP controls at the moment is the reclaim of the unused
slab elements that were allocated in batch in cache_reap().  Cache_reap()
is run every few seconds independently of memory pressure.

Could we remove the whole thing?  Its only used by three slabs anyways and
I cannot find a reason for having this option.

There is an additional problem with SLAB_NO_REAP.  If set then the recovery
of objects from alien caches is switched off.  Objects not freed on the
same node where they were initially allocated will only be reused if a
certain amount of objects accumulates from one alien node (not very likely)
or if the cache is explicitly shrunk.  (Strangely __cache_shrink does not
check for SLAB_NO_REAP)

Getting rid of SLAB_NO_REAP fixes the problems with alien cache freeing.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/scsi/iscsi_tcp.c |  2 +-
 fs/ocfs2/super.c         |  2 +-
 include/linux/slab.h     |  1 -
 mm/slab.c                | 13 ++-----------
 4 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index ff79e68b347c..7b82ff090d42 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -3639,7 +3639,7 @@ iscsi_tcp_init(void)
 
 	taskcache = kmem_cache_create("iscsi_taskcache",
 			sizeof(struct iscsi_data_task), 0,
-			SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!taskcache)
 		return -ENOMEM;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8dd3aafec499..09e1c57a86a0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(void)
 	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
 					     sizeof(struct ocfs2_journal_lock),
 					     0,
-					     SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
+					     SLAB_HWCACHE_ALIGN,
 					     NULL, NULL);
 	if (!ocfs2_lock_cache)
 		return -ENOMEM;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 38bed95dda7a..2b28c849d75a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t;
 #define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
 #define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
 #define	SLAB_POISON		0x00000800UL	/* Poison objects */
-#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
 #define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
 #define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
 #define SLAB_MUST_HWCACHE_ALIGN	0x00008000UL	/* force alignment */
diff --git a/mm/slab.c b/mm/slab.c
index 5c2574989834..24235506b2a0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -170,12 +170,12 @@
 #if DEBUG
 # define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
-			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
+			 SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
 #else
-# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
@@ -662,7 +662,6 @@ static struct kmem_cache cache_cache = {
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
 	.buffer_size = sizeof(struct kmem_cache),
-	.flags = SLAB_NO_REAP,
 	.name = "kmem_cache",
 #if DEBUG
 	.obj_size = sizeof(struct kmem_cache),
@@ -1848,9 +1847,6 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  * for buffer overruns.
  *
- * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
- * memory pressure.
- *
  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  * cacheline.  This can be beneficial if you're counting cycles as closely
  * as davem.
@@ -3584,10 +3580,6 @@ static void cache_reap(void *unused)
 		struct slab *slabp;
 
 		searchp = list_entry(walk, struct kmem_cache, next);
-
-		if (searchp->flags & SLAB_NO_REAP)
-			goto next;
-
 		check_irq_on();
 
 		l3 = searchp->nodelists[numa_node_id()];
@@ -3635,7 +3627,6 @@ static void cache_reap(void *unused)
 		} while (--tofree > 0);
 next_unlock:
 		spin_unlock_irq(&l3->list_lock);
-next:
 		cond_resched();
 	}
 	check_irq_on();
-- 
cgit v1.2.3


From 84097518d1ecd2330f9488e4c2d09953a3340e74 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 22 Mar 2006 00:08:34 -0800
Subject: [PATCH] mm: nommu use compound pages

Now that compound page handling is properly fixed in the VM, move nommu
over to using compound pages rather than rolling their own refcounting.

nommu vm page refcounting is broken anyway, but there is no need to have
divergent code in the core VM now, nor when it gets fixed.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: David Howells <dhowells@redhat.com>

(Needs testing, please).
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/file-nommu.c |  3 +--
 include/linux/mm.h    |  4 ----
 mm/internal.h         | 12 ------------
 mm/nommu.c            |  4 ++--
 mm/page_alloc.c       |  7 -------
 mm/slab.c             |  9 ++++++++-
 6 files changed, 11 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 3f810acd0bfa..b1ca234068f6 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -87,8 +87,7 @@ static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 	xpages = 1UL << order;
 	npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-	for (loop = 0; loop < npages; loop++)
-		set_page_count(pages + loop, 1);
+	split_page(pages, order);
 
 	/* trim off any pages we don't actually require */
 	for (loop = npages; loop < xpages; loop++)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9b3cdfc8046d..3d84b7a35e0d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -327,11 +327,7 @@ static inline void get_page(struct page *page)
 
 void put_page(struct page *page);
 
-#ifdef CONFIG_MMU
 void split_page(struct page *page, unsigned int order);
-#else
-static inline void split_page(struct page *page, unsigned int order) {}
-#endif
 
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
diff --git a/mm/internal.h b/mm/internal.h
index e3042db2a2d6..7bb339779818 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -15,19 +15,7 @@
 
 static inline void set_page_refs(struct page *page, int order)
 {
-#ifdef CONFIG_MMU
 	set_page_count(page, 1);
-#else
-	int i;
-
-	/*
-	 * We need to reference all the pages for this order, otherwise if
-	 * anyone accesses one of the pages with (get/put) it will be freed.
-	 * - eg: access_process_vm()
-	 */
-	for (i = 0; i < (1 << order); i++)
-		set_page_count(page + i, 1);
-#endif /* CONFIG_MMU */
 }
 
 static inline void __put_page(struct page *page)
diff --git a/mm/nommu.c b/mm/nommu.c
index 4951f4786f28..db45efac17cc 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -159,7 +159,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 	/*
 	 * kmalloc doesn't like __GFP_HIGHMEM for some reason
 	 */
-	return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
+	return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
 }
 
 struct page * vmalloc_to_page(void *addr)
@@ -623,7 +623,7 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
 	 * - note that this may not return a page-aligned address if the object
 	 *   we're allocating is smaller than a page
 	 */
-	base = kmalloc(len, GFP_KERNEL);
+	base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
 	if (!base)
 		goto enomem;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7aa0181287e1..e197818a7cf6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -422,11 +422,6 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 		mutex_debug_check_no_locks_freed(page_address(page),
 						 PAGE_SIZE<<order);
 
-#ifndef CONFIG_MMU
-	for (i = 1 ; i < (1 << order) ; ++i)
-		__put_page(page + i);
-#endif
-
 	for (i = 0 ; i < (1 << order) ; ++i)
 		reserved += free_pages_check(page + i);
 	if (reserved)
@@ -746,7 +741,6 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 		clear_highpage(page + i);
 }
 
-#ifdef CONFIG_MMU
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -766,7 +760,6 @@ void split_page(struct page *page, unsigned int order)
 		set_page_count(page + i, 1);
 	}
 }
-#endif
 
 /*
  * Really, prep_compound_page() should be called from __rmqueue_bulk().  But
diff --git a/mm/slab.c b/mm/slab.c
index f477acfb732f..ff0ab772f49d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -590,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
 
 static inline struct kmem_cache *page_get_cache(struct page *page)
 {
+	if (unlikely(PageCompound(page)))
+		page = (struct page *)page_private(page);
 	return (struct kmem_cache *)page->lru.next;
 }
 
@@ -600,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab)
 
 static inline struct slab *page_get_slab(struct page *page)
 {
+	if (unlikely(PageCompound(page)))
+		page = (struct page *)page_private(page);
 	return (struct slab *)page->lru.prev;
 }
 
@@ -2412,8 +2416,11 @@ static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp,
 	struct page *page;
 
 	/* Nasty!!!!!! I hope this is OK. */
-	i = 1 << cachep->gfporder;
 	page = virt_to_page(objp);
+
+	i = 1;
+	if (likely(!PageCompound(page)))
+		i <<= cachep->gfporder;
 	do {
 		page_set_cache(page, cachep);
 		page_set_slab(page, slabp);
-- 
cgit v1.2.3


From b45b5bd65f668a665db40d093e4e1fe563533608 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 22 Mar 2006 00:08:55 -0800
Subject: [PATCH] hugepage: Strict page reservation for hugepage inodes

These days, hugepages are demand-allocated at first fault time.  There's a
somewhat dubious (and racy) heuristic when making a new mmap() to check if
there are enough available hugepages to fully satisfy that mapping.

A particularly obvious case where the heuristic breaks down is where a
process maps its hugepages not as a single chunk, but as a bunch of
individually mmap()ed (or shmat()ed) blocks without touching and
instantiating the pages in between allocations.  In this case the size of
each block is compared against the total number of available hugepages.
It's thus easy for the process to become overcommitted, because each block
mapping will succeed, although the total number of hugepages required by
all blocks exceeds the number available.  In particular, this defeats such
a program which will detect a mapping failure and adjust its hugepage usage
downward accordingly.

The patch below addresses this problem, by strictly reserving a number of
physical hugepages for hugepage inodes which have been mapped, but not
instatiated.  MAP_SHARED mappings are thus "safe" - they will fail on
mmap(), not later with an OOM SIGKILL.  MAP_PRIVATE mappings can still
trigger an OOM.  (Actually SHARED mappings can technically still OOM, but
only if the sysadmin explicitly reduces the hugepage pool between mapping
and instantiation)

This patch appears to address the problem at hand - it allows DB2 to start
correctly, for instance, which previously suffered the failure described
above.

This patch causes no regressions on the libhugetblfs testsuite, and makes a
test (designed to catch this problem) pass which previously failed (ppc64,
POWER5).

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c    |  74 ++++++++------------------
 include/linux/hugetlb.h |   8 ++-
 mm/hugetlb.c            | 136 ++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 154 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b35195289945..1a1c2fcb7823 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -56,48 +56,10 @@ static void huge_pagevec_release(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-/*
- * huge_pages_needed tries to determine the number of new huge pages that
- * will be required to fully populate this VMA.  This will be equal to
- * the size of the VMA in huge pages minus the number of huge pages
- * (covered by this VMA) that are found in the page cache.
- *
- * Result is in bytes to be compatible with is_hugepage_mem_enough()
- */
-static unsigned long
-huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
-{
-	int i;
-	struct pagevec pvec;
-	unsigned long start = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
-	pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
-	pgoff_t endpg = next + hugepages;
-
-	pagevec_init(&pvec, 0);
-	while (next < endpg) {
-		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
-			break;
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			if (page->index > next)
-				next = page->index;
-			if (page->index >= endpg)
-				break;
-			next++;
-			hugepages--;
-		}
-		huge_pagevec_release(&pvec);
-	}
-	return hugepages << HPAGE_SHIFT;
-}
-
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long bytes;
+	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 	loff_t len, vma_len;
 	int ret;
 
@@ -113,10 +75,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
 		return -EINVAL;
 
-	bytes = huge_pages_needed(mapping, vma);
-	if (!is_hugepage_mem_enough(bytes))
-		return -ENOMEM;
-
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	mutex_lock(&inode->i_mutex);
@@ -129,6 +87,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 		goto out;
 
+	if (vma->vm_flags & VM_MAYSHARE)
+		if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0)
+			goto out;
+
 	ret = 0;
 	hugetlb_prefault_arch_hook(vma->vm_mm);
 	if (inode->i_size < len)
@@ -227,13 +189,18 @@ static void truncate_huge_page(struct page *page)
 	put_page(page);
 }
 
-static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
+static void truncate_hugepages(struct inode *inode, loff_t lstart)
 {
+	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
 	int i;
 
+	hugetlb_truncate_reservation(HUGETLBFS_I(inode),
+				     lstart >> HPAGE_SHIFT);
+	if (!mapping->nrpages)
+		return;
 	pagevec_init(&pvec, 0);
 	next = start;
 	while (1) {
@@ -262,8 +229,7 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 
 static void hugetlbfs_delete_inode(struct inode *inode)
 {
-	if (inode->i_data.nrpages)
-		truncate_hugepages(&inode->i_data, 0);
+	truncate_hugepages(inode, 0);
 	clear_inode(inode);
 }
 
@@ -296,8 +262,7 @@ static void hugetlbfs_forget_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
-	if (inode->i_data.nrpages)
-		truncate_hugepages(&inode->i_data, 0);
+	truncate_hugepages(inode, 0);
 	clear_inode(inode);
 	destroy_inode(inode);
 }
@@ -356,7 +321,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	spin_unlock(&mapping->i_mmap_lock);
-	truncate_hugepages(mapping, offset);
+	truncate_hugepages(inode, offset);
 	return 0;
 }
 
@@ -573,6 +538,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 	}
+	p->prereserved_hpages = 0;
 	return &p->vfs_inode;
 }
 
@@ -805,9 +771,6 @@ struct file *hugetlb_zero_setup(size_t size)
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
 
-	if (!is_hugepage_mem_enough(size))
-		return ERR_PTR(-ENOMEM);
-
 	if (!user_shm_lock(size, current->user))
 		return ERR_PTR(-ENOMEM);
 
@@ -831,6 +794,11 @@ struct file *hugetlb_zero_setup(size_t size)
 	if (!inode)
 		goto out_file;
 
+	error = -ENOMEM;
+	if (hugetlb_extend_reservation(HUGETLBFS_I(inode),
+				       size >> HPAGE_SHIFT) != 0)
+		goto out_inode;
+
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
@@ -841,6 +809,8 @@ struct file *hugetlb_zero_setup(size_t size)
 	file->f_mode = FMODE_WRITE | FMODE_READ;
 	return file;
 
+out_inode:
+	iput(inode);
 out_file:
 	put_filp(file);
 out_dentry:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fa83836b63d2..cafe73eecb05 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,7 +20,6 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long)
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
-int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
 struct page *alloc_huge_page(struct vm_area_struct *, unsigned long);
 void free_huge_page(struct page *);
@@ -89,7 +88,6 @@ static inline unsigned long hugetlb_total_pages(void)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
 #define unmap_hugepage_range(vma, start, end)	BUG()
-#define is_hugepage_mem_enough(size)		0
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
@@ -132,6 +130,8 @@ struct hugetlbfs_sb_info {
 
 struct hugetlbfs_inode_info {
 	struct shared_policy policy;
+	/* Protected by the (global) hugetlb_lock */
+	unsigned long prereserved_hpages;
 	struct inode vfs_inode;
 };
 
@@ -148,6 +148,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 extern struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_zero_setup(size_t);
+int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
+			       unsigned long atleast_hpages);
+void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
+				  unsigned long atmost_hpages);
 int hugetlb_get_quota(struct address_space *mapping);
 void hugetlb_put_quota(struct address_space *mapping);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d5987a87bbe5..27fad5d9bcf6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,7 +22,7 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static unsigned long nr_huge_pages, free_huge_pages;
+static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages;
 unsigned long max_huge_pages;
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
@@ -120,17 +120,136 @@ void free_huge_page(struct page *page)
 
 struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
 {
+	struct inode *inode = vma->vm_file->f_dentry->d_inode;
 	struct page *page;
+	int use_reserve = 0;
+	unsigned long idx;
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page(vma, addr);
-	if (!page) {
-		spin_unlock(&hugetlb_lock);
-		return NULL;
+
+	if (vma->vm_flags & VM_MAYSHARE) {
+
+		/* idx = radix tree index, i.e. offset into file in
+		 * HPAGE_SIZE units */
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+
+		/* The hugetlbfs specific inode info stores the number
+		 * of "guaranteed available" (huge) pages.  That is,
+		 * the first 'prereserved_hpages' pages of the inode
+		 * are either already instantiated, or have been
+		 * pre-reserved (by hugetlb_reserve_for_inode()). Here
+		 * we're in the process of instantiating the page, so
+		 * we use this to determine whether to draw from the
+		 * pre-reserved pool or the truly free pool. */
+		if (idx < HUGETLBFS_I(inode)->prereserved_hpages)
+			use_reserve = 1;
+	}
+
+	if (!use_reserve) {
+		if (free_huge_pages <= reserved_huge_pages)
+			goto fail;
+	} else {
+		BUG_ON(reserved_huge_pages == 0);
+		reserved_huge_pages--;
 	}
+
+	page = dequeue_huge_page(vma, addr);
+	if (!page)
+		goto fail;
+
 	spin_unlock(&hugetlb_lock);
 	set_page_refcounted(page);
 	return page;
+
+ fail:
+	WARN_ON(use_reserve); /* reserved allocations shouldn't fail */
+	spin_unlock(&hugetlb_lock);
+	return NULL;
+}
+
+/* hugetlb_extend_reservation()
+ *
+ * Ensure that at least 'atleast' hugepages are, and will remain,
+ * available to instantiate the first 'atleast' pages of the given
+ * inode.  If the inode doesn't already have this many pages reserved
+ * or instantiated, set aside some hugepages in the reserved pool to
+ * satisfy later faults (or fail now if there aren't enough, rather
+ * than getting the SIGBUS later).
+ */
+int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
+			       unsigned long atleast)
+{
+	struct inode *inode = &info->vfs_inode;
+	unsigned long change_in_reserve = 0;
+	int ret = 0;
+
+	spin_lock(&hugetlb_lock);
+	read_lock_irq(&inode->i_mapping->tree_lock);
+
+	if (info->prereserved_hpages >= atleast)
+		goto out;
+
+	/* Because we always call this on shared mappings, none of the
+	 * pages beyond info->prereserved_hpages can have been
+	 * instantiated, so we need to reserve all of them now. */
+	change_in_reserve = atleast - info->prereserved_hpages;
+
+	if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	reserved_huge_pages += change_in_reserve;
+	info->prereserved_hpages = atleast;
+
+ out:
+	read_unlock_irq(&inode->i_mapping->tree_lock);
+	spin_unlock(&hugetlb_lock);
+
+	return ret;
+}
+
+/* hugetlb_truncate_reservation()
+ *
+ * This returns pages reserved for the given inode to the general free
+ * hugepage pool.  If the inode has any pages prereserved, but not
+ * instantiated, beyond offset (atmost << HPAGE_SIZE), then release
+ * them.
+ */
+void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
+				  unsigned long atmost)
+{
+	struct inode *inode = &info->vfs_inode;
+	struct address_space *mapping = inode->i_mapping;
+	unsigned long idx;
+	unsigned long change_in_reserve = 0;
+	struct page *page;
+
+	spin_lock(&hugetlb_lock);
+	read_lock_irq(&inode->i_mapping->tree_lock);
+
+	if (info->prereserved_hpages <= atmost)
+		goto out;
+
+	/* Count pages which were reserved, but not instantiated, and
+	 * which we can now release. */
+	for (idx = atmost; idx < info->prereserved_hpages; idx++) {
+		page = radix_tree_lookup(&mapping->page_tree, idx);
+		if (!page)
+			/* Pages which are already instantiated can't
+			 * be unreserved (and in fact have already
+			 * been removed from the reserved pool) */
+			change_in_reserve++;
+	}
+
+	BUG_ON(reserved_huge_pages < change_in_reserve);
+	reserved_huge_pages -= change_in_reserve;
+	info->prereserved_hpages = atmost;
+
+ out:
+	read_unlock_irq(&inode->i_mapping->tree_lock);
+	spin_unlock(&hugetlb_lock);
 }
 
 static int __init hugetlb_init(void)
@@ -238,9 +357,11 @@ int hugetlb_report_meminfo(char *buf)
 	return sprintf(buf,
 			"HugePages_Total: %5lu\n"
 			"HugePages_Free:  %5lu\n"
+		        "HugePages_Rsvd:  %5lu\n"
 			"Hugepagesize:    %5lu kB\n",
 			nr_huge_pages,
 			free_huge_pages,
+		        reserved_huge_pages,
 			HPAGE_SIZE/1024);
 }
 
@@ -253,11 +374,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
 		nid, free_huge_pages_node[nid]);
 }
 
-int is_hugepage_mem_enough(size_t size)
-{
-	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
-}
-
 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 unsigned long hugetlb_total_pages(void)
 {
-- 
cgit v1.2.3


From bba1e9b2111b14625f670bd07e57fd7ed57ce804 Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 22 Mar 2006 00:09:02 -0800
Subject: [PATCH] convert hugetlbfs_counter to atomic

Implementation of hugetlbfs_counter() is functionally equivalent to
atomic_inc_return().  Use the simpler atomic form.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1a1c2fcb7823..25fa8bba8cb5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -737,21 +737,6 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-/*
- * Return the next identifier for a shm file
- */
-static unsigned long hugetlbfs_counter(void)
-{
-	static DEFINE_SPINLOCK(lock);
-	static unsigned long counter;
-	unsigned long ret;
-
-	spin_lock(&lock);
-	ret = ++counter;
-	spin_unlock(&lock);
-	return ret;
-}
-
 static int can_do_hugetlb_shm(void)
 {
 	return likely(capable(CAP_IPC_LOCK) ||
@@ -767,6 +752,7 @@ struct file *hugetlb_zero_setup(size_t size)
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
 	char buf[16];
+	static atomic_t counter;
 
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
@@ -775,7 +761,7 @@ struct file *hugetlb_zero_setup(size_t size)
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
+	snprintf(buf, 16, "%u", atomic_inc_return(&counter));
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
-- 
cgit v1.2.3


From b20a35035f983f4ac7e29c4a68f30e43510007e0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 22 Mar 2006 00:09:12 -0800
Subject: [PATCH] page migration reorg

Centralize the page migration functions in anticipation of additional
tinkering.  Creates a new file mm/migrate.c

1. Extract buffer_migrate_page() from fs/buffer.c

2. Extract central migration code from vmscan.c

3. Extract some components from mempolicy.c

4. Export pageout() and remove_from_swap() from vmscan.c

5. Make it possible to configure NUMA systems without page migration
   and non-NUMA systems with page migration.

I had to so some #ifdeffing in mempolicy.c that may need a cleanup.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                |  62 -----
 fs/xfs/linux-2.6/xfs_buf.c |   1 +
 include/linux/migrate.h    |  36 +++
 include/linux/swap.h       |  34 ++-
 mm/Kconfig                 |   6 +
 mm/Makefile                |   2 +
 mm/mempolicy.c             | 113 ++------
 mm/migrate.c               | 655 +++++++++++++++++++++++++++++++++++++++++++++
 mm/swap_state.c            |   1 +
 mm/vmscan.c                | 491 +--------------------------------
 10 files changed, 741 insertions(+), 660 deletions(-)
 create mode 100644 include/linux/migrate.h
 create mode 100644 mm/migrate.c

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index a9b399402007..1d3683d496f8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3050,68 +3050,6 @@ asmlinkage long sys_bdflush(int func, long data)
 	return 0;
 }
 
-/*
- * Migration function for pages with buffers. This function can only be used
- * if the underlying filesystem guarantees that no other references to "page"
- * exist.
- */
-#ifdef CONFIG_MIGRATION
-int buffer_migrate_page(struct page *newpage, struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-	struct buffer_head *bh, *head;
-	int rc;
-
-	if (!mapping)
-		return -EAGAIN;
-
-	if (!page_has_buffers(page))
-		return migrate_page(newpage, page);
-
-	head = page_buffers(page);
-
-	rc = migrate_page_remove_references(newpage, page, 3);
-	if (rc)
-		return rc;
-
-	bh = head;
-	do {
-		get_bh(bh);
-		lock_buffer(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	ClearPagePrivate(page);
-	set_page_private(newpage, page_private(page));
-	set_page_private(page, 0);
-	put_page(page);
-	get_page(newpage);
-
-	bh = head;
-	do {
-		set_bh_page(bh, newpage, bh_offset(bh));
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	SetPagePrivate(newpage);
-
-	migrate_page_copy(newpage, page);
-
-	bh = head;
-	do {
-		unlock_buffer(bh);
- 		put_bh(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	return 0;
-}
-EXPORT_SYMBOL(buffer_migrate_page);
-#endif
-
 /*
  * Buffer-head allocation
  */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bfb4f2917bb6..8cdfa4151659 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/hash.h>
 #include <linux/kthread.h>
+#include <linux/migrate.h>
 #include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
new file mode 100644
index 000000000000..7d09962c3c0b
--- /dev/null
+++ b/include/linux/migrate.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_MIGRATE_H
+#define _LINUX_MIGRATE_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+
+#ifdef CONFIG_MIGRATION
+extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
+extern int putback_lru_pages(struct list_head *l);
+extern int migrate_page(struct page *, struct page *);
+extern void migrate_page_copy(struct page *, struct page *);
+extern int migrate_page_remove_references(struct page *, struct page *, int);
+extern int migrate_pages(struct list_head *l, struct list_head *t,
+		struct list_head *moved, struct list_head *failed);
+int migrate_pages_to(struct list_head *pagelist,
+			struct vm_area_struct *vma, int dest);
+extern int fail_migrate_page(struct page *, struct page *);
+
+extern int migrate_prep(void);
+
+#else
+
+static inline int isolate_lru_page(struct page *p, struct list_head *list)
+					{ return -ENOSYS; }
+static inline int putback_lru_pages(struct list_head *l) { return 0; }
+static inline int migrate_pages(struct list_head *l, struct list_head *t,
+	struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
+
+static inline int migrate_prep(void) { return -ENOSYS; }
+
+/* Possible settings for the migrate_page() method in address_operations */
+#define migrate_page NULL
+#define fail_migrate_page NULL
+
+#endif /* CONFIG_MIGRATION */
+#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3dc6c89c49b8..12415dd94451 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -175,6 +175,21 @@ extern void swap_setup(void);
 extern unsigned long try_to_free_pages(struct zone **, gfp_t);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
+extern int remove_mapping(struct address_space *mapping, struct page *page);
+
+/* possible outcome of pageout() */
+typedef enum {
+	/* failed to write page out, page is locked */
+	PAGE_KEEP,
+	/* move page to the active list, page is locked */
+	PAGE_ACTIVATE,
+	/* page has been sent to the disk successfully, page is unlocked */
+	PAGE_SUCCESS,
+	/* page is clean and locked */
+	PAGE_CLEAN,
+} pageout_t;
+
+extern pageout_t pageout(struct page *page, struct address_space *mapping);
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
@@ -188,25 +203,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 }
 #endif
 
-#ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p);
-extern unsigned long putback_lru_pages(struct list_head *l);
-extern int migrate_page(struct page *, struct page *);
-extern void migrate_page_copy(struct page *, struct page *);
-extern int migrate_page_remove_references(struct page *, struct page *, int);
-extern unsigned long migrate_pages(struct list_head *l, struct list_head *t,
-		struct list_head *moved, struct list_head *failed);
-extern int fail_migrate_page(struct page *, struct page *);
-#else
-static inline int isolate_lru_page(struct page *p) { return -ENOSYS; }
-static inline int putback_lru_pages(struct list_head *l) { return 0; }
-static inline int migrate_pages(struct list_head *l, struct list_head *t,
-	struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
-/* Possible settings for the migrate_page() method in address_operations */
-#define migrate_page NULL
-#define fail_migrate_page NULL
-#endif
-
 #ifdef CONFIG_MMU
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
diff --git a/mm/Kconfig b/mm/Kconfig
index a9cb80ae6409..bd80460360db 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,5 +137,11 @@ config SPLIT_PTLOCK_CPUS
 # support for page migration
 #
 config MIGRATION
+	bool "Page migration"
 	def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
 	depends on SWAP
+	help
+	  Allows the migration of the physical location of pages of processes
+	  while the virtual addresses are not changed. This is useful for
+	  example on NUMA systems to put pages nearer to the processors accessing
+	  the page.
diff --git a/mm/Makefile b/mm/Makefile
index 9aa03fa1dcc3..f10c753dce6d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,3 +22,5 @@ obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
+obj-$(CONFIG_MIGRATION) += migrate.o
+
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 96195dcb62e1..e93cc740c22b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -86,6 +86,7 @@
 #include <linux/swap.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
+#include <linux/migrate.h>
 
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -95,9 +96,6 @@
 #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)		/* Invert check for nodemask */
 #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2)		/* Gather statistics */
 
-/* The number of pages to migrate per call to migrate_pages() */
-#define MIGRATE_CHUNK_SIZE 256
-
 static struct kmem_cache *policy_cache;
 static struct kmem_cache *sn_cache;
 
@@ -331,17 +329,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 	struct vm_area_struct *first, *vma, *prev;
 
 	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-		/* Must have swap device for migration */
-		if (nr_swap_pages <= 0)
-			return ERR_PTR(-ENODEV);
 
-		/*
-		 * Clear the LRU lists so pages can be isolated.
-		 * Note that pages may be moved off the LRU after we have
-		 * drained them. Those pages will fail to migrate like other
-		 * pages that may be busy.
-		 */
-		lru_add_drain_all();
+		err = migrate_prep();
+		if (err)
+			return ERR_PTR(err);
 	}
 
 	first = find_vma(mm, start);
@@ -550,92 +541,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	return err;
 }
 
+#ifdef CONFIG_MIGRATION
 /*
  * page migration
  */
-
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags)
 {
 	/*
 	 * Avoid migrating a page that is shared with others.
 	 */
-	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
-		if (isolate_lru_page(page))
-			list_add_tail(&page->lru, pagelist);
-	}
-}
-
-/*
- * Migrate the list 'pagelist' of pages to a certain destination.
- *
- * Specify destination with either non-NULL vma or dest_node >= 0
- * Return the number of pages not migrated or error code
- */
-static int migrate_pages_to(struct list_head *pagelist,
-			struct vm_area_struct *vma, int dest)
-{
-	LIST_HEAD(newlist);
-	LIST_HEAD(moved);
-	LIST_HEAD(failed);
-	int err = 0;
-	unsigned long offset = 0;
-	int nr_pages;
-	struct page *page;
-	struct list_head *p;
-
-redo:
-	nr_pages = 0;
-	list_for_each(p, pagelist) {
-		if (vma) {
-			/*
-			 * The address passed to alloc_page_vma is used to
-			 * generate the proper interleave behavior. We fake
-			 * the address here by an increasing offset in order
-			 * to get the proper distribution of pages.
-			 *
-			 * No decision has been made as to which page
-			 * a certain old page is moved to so we cannot
-			 * specify the correct address.
-			 */
-			page = alloc_page_vma(GFP_HIGHUSER, vma,
-					offset + vma->vm_start);
-			offset += PAGE_SIZE;
-		}
-		else
-			page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
-
-		if (!page) {
-			err = -ENOMEM;
-			goto out;
-		}
-		list_add_tail(&page->lru, &newlist);
-		nr_pages++;
-		if (nr_pages > MIGRATE_CHUNK_SIZE)
-			break;
-	}
-	err = migrate_pages(pagelist, &newlist, &moved, &failed);
-
-	putback_lru_pages(&moved);	/* Call release pages instead ?? */
-
-	if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
-		goto redo;
-out:
-	/* Return leftover allocated pages */
-	while (!list_empty(&newlist)) {
-		page = list_entry(newlist.next, struct page, lru);
-		list_del(&page->lru);
-		__free_page(page);
-	}
-	list_splice(&failed, pagelist);
-	if (err < 0)
-		return err;
-
-	/* Calculate number of leftover pages */
-	nr_pages = 0;
-	list_for_each(p, pagelist)
-		nr_pages++;
-	return nr_pages;
+	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
+		isolate_lru_page(page, pagelist);
 }
 
 /*
@@ -742,8 +659,23 @@ int do_migrate_pages(struct mm_struct *mm,
 	if (err < 0)
 		return err;
 	return busy;
+
 }
 
+#else
+
+static void migrate_page_add(struct page *page, struct list_head *pagelist,
+				unsigned long flags)
+{
+}
+
+int do_migrate_pages(struct mm_struct *mm,
+	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+{
+	return -ENOSYS;
+}
+#endif
+
 long do_mbind(unsigned long start, unsigned long len,
 		unsigned long mode, nodemask_t *nmask, unsigned long flags)
 {
@@ -808,6 +740,7 @@ long do_mbind(unsigned long start, unsigned long len,
 		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	}
+
 	if (!list_empty(&pagelist))
 		putback_lru_pages(&pagelist);
 
diff --git a/mm/migrate.c b/mm/migrate.c
new file mode 100644
index 000000000000..09f6e4aa87fc
--- /dev/null
+++ b/mm/migrate.c
@@ -0,0 +1,655 @@
+/*
+ * Memory Migration functionality - linux/mm/migration.c
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
+ *
+ * Page migration was first developed in the context of the memory hotplug
+ * project. The main authors of the migration code are:
+ *
+ * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
+ * Hirokazu Takahashi <taka@valinux.co.jp>
+ * Dave Hansen <haveblue@us.ibm.com>
+ * Christoph Lameter <clameter@sgi.com>
+ */
+
+#include <linux/migrate.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/buffer_head.h>	/* for try_to_release_page(),
+					buffer_heads_over_limit */
+#include <linux/mm_inline.h>
+#include <linux/pagevec.h>
+#include <linux/rmap.h>
+#include <linux/topology.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/swapops.h>
+
+#include "internal.h"
+
+#include "internal.h"
+
+/* The maximum number of pages to take off the LRU for migration */
+#define MIGRATE_CHUNK_SIZE 256
+
+#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
+
+/*
+ * Isolate one page from the LRU lists. If successful put it onto
+ * the indicated list with elevated page count.
+ *
+ * Result:
+ *  -EBUSY: page not on LRU list
+ *  0: page removed from LRU list and added to the specified list.
+ */
+int isolate_lru_page(struct page *page, struct list_head *pagelist)
+{
+	int ret = -EBUSY;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		if (PageLRU(page)) {
+			ret = 0;
+			get_page(page);
+			ClearPageLRU(page);
+			if (PageActive(page))
+				del_page_from_active_list(zone, page);
+			else
+				del_page_from_inactive_list(zone, page);
+			list_add_tail(&page->lru, pagelist);
+		}
+		spin_unlock_irq(&zone->lru_lock);
+	}
+	return ret;
+}
+
+/*
+ * migrate_prep() needs to be called after we have compiled the list of pages
+ * to be migrated using isolate_lru_page() but before we begin a series of calls
+ * to migrate_pages().
+ */
+int migrate_prep(void)
+{
+	/* Must have swap device for migration */
+	if (nr_swap_pages <= 0)
+		return -ENODEV;
+
+	/*
+	 * Clear the LRU lists so pages can be isolated.
+	 * Note that pages may be moved off the LRU after we have
+	 * drained them. Those pages will fail to migrate like other
+	 * pages that may be busy.
+	 */
+	lru_add_drain_all();
+
+	return 0;
+}
+
+static inline void move_to_lru(struct page *page)
+{
+	list_del(&page->lru);
+	if (PageActive(page)) {
+		/*
+		 * lru_cache_add_active checks that
+		 * the PG_active bit is off.
+		 */
+		ClearPageActive(page);
+		lru_cache_add_active(page);
+	} else {
+		lru_cache_add(page);
+	}
+	put_page(page);
+}
+
+/*
+ * Add isolated pages on the list back to the LRU.
+ *
+ * returns the number of pages put back.
+ */
+int putback_lru_pages(struct list_head *l)
+{
+	struct page *page;
+	struct page *page2;
+	int count = 0;
+
+	list_for_each_entry_safe(page, page2, l, lru) {
+		move_to_lru(page);
+		count++;
+	}
+	return count;
+}
+
+/*
+ * Non migratable page
+ */
+int fail_migrate_page(struct page *newpage, struct page *page)
+{
+	return -EIO;
+}
+EXPORT_SYMBOL(fail_migrate_page);
+
+/*
+ * swapout a single page
+ * page is locked upon entry, unlocked on exit
+ */
+static int swap_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+
+	if (page_mapped(page) && mapping)
+		if (try_to_unmap(page, 1) != SWAP_SUCCESS)
+			goto unlock_retry;
+
+	if (PageDirty(page)) {
+		/* Page is dirty, try to write it out here */
+		switch(pageout(page, mapping)) {
+		case PAGE_KEEP:
+		case PAGE_ACTIVATE:
+			goto unlock_retry;
+
+		case PAGE_SUCCESS:
+			goto retry;
+
+		case PAGE_CLEAN:
+			; /* try to free the page below */
+		}
+	}
+
+	if (PagePrivate(page)) {
+		if (!try_to_release_page(page, GFP_KERNEL) ||
+		    (!mapping && page_count(page) == 1))
+			goto unlock_retry;
+	}
+
+	if (remove_mapping(mapping, page)) {
+		/* Success */
+		unlock_page(page);
+		return 0;
+	}
+
+unlock_retry:
+	unlock_page(page);
+
+retry:
+	return -EAGAIN;
+}
+EXPORT_SYMBOL(swap_page);
+
+/*
+ * Remove references for a page and establish the new page with the correct
+ * basic settings to be able to stop accesses to the page.
+ */
+int migrate_page_remove_references(struct page *newpage,
+				struct page *page, int nr_refs)
+{
+	struct address_space *mapping = page_mapping(page);
+	struct page **radix_pointer;
+
+	/*
+	 * Avoid doing any of the following work if the page count
+	 * indicates that the page is in use or truncate has removed
+	 * the page.
+	 */
+	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
+		return -EAGAIN;
+
+	/*
+	 * Establish swap ptes for anonymous pages or destroy pte
+	 * maps for files.
+	 *
+	 * In order to reestablish file backed mappings the fault handlers
+	 * will take the radix tree_lock which may then be used to stop
+  	 * processses from accessing this page until the new page is ready.
+	 *
+	 * A process accessing via a swap pte (an anonymous page) will take a
+	 * page_lock on the old page which will block the process until the
+	 * migration attempt is complete. At that time the PageSwapCache bit
+	 * will be examined. If the page was migrated then the PageSwapCache
+	 * bit will be clear and the operation to retrieve the page will be
+	 * retried which will find the new page in the radix tree. Then a new
+	 * direct mapping may be generated based on the radix tree contents.
+	 *
+	 * If the page was not migrated then the PageSwapCache bit
+	 * is still set and the operation may continue.
+	 */
+	if (try_to_unmap(page, 1) == SWAP_FAIL)
+		/* A vma has VM_LOCKED set -> permanent failure */
+		return -EPERM;
+
+	/*
+	 * Give up if we were unable to remove all mappings.
+	 */
+	if (page_mapcount(page))
+		return -EAGAIN;
+
+	write_lock_irq(&mapping->tree_lock);
+
+	radix_pointer = (struct page **)radix_tree_lookup_slot(
+						&mapping->page_tree,
+						page_index(page));
+
+	if (!page_mapping(page) || page_count(page) != nr_refs ||
+			*radix_pointer != page) {
+		write_unlock_irq(&mapping->tree_lock);
+		return 1;
+	}
+
+	/*
+	 * Now we know that no one else is looking at the page.
+	 *
+	 * Certain minimal information about a page must be available
+	 * in order for other subsystems to properly handle the page if they
+	 * find it through the radix tree update before we are finished
+	 * copying the page.
+	 */
+	get_page(newpage);
+	newpage->index = page->index;
+	newpage->mapping = page->mapping;
+	if (PageSwapCache(page)) {
+		SetPageSwapCache(newpage);
+		set_page_private(newpage, page_private(page));
+	}
+
+	*radix_pointer = newpage;
+	__put_page(page);
+	write_unlock_irq(&mapping->tree_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(migrate_page_remove_references);
+
+/*
+ * Copy the page to its new location
+ */
+void migrate_page_copy(struct page *newpage, struct page *page)
+{
+	copy_highpage(newpage, page);
+
+	if (PageError(page))
+		SetPageError(newpage);
+	if (PageReferenced(page))
+		SetPageReferenced(newpage);
+	if (PageUptodate(page))
+		SetPageUptodate(newpage);
+	if (PageActive(page))
+		SetPageActive(newpage);
+	if (PageChecked(page))
+		SetPageChecked(newpage);
+	if (PageMappedToDisk(page))
+		SetPageMappedToDisk(newpage);
+
+	if (PageDirty(page)) {
+		clear_page_dirty_for_io(page);
+		set_page_dirty(newpage);
+ 	}
+
+	ClearPageSwapCache(page);
+	ClearPageActive(page);
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	page->mapping = NULL;
+
+	/*
+	 * If any waiters have accumulated on the new page then
+	 * wake them up.
+	 */
+	if (PageWriteback(newpage))
+		end_page_writeback(newpage);
+}
+EXPORT_SYMBOL(migrate_page_copy);
+
+/*
+ * Common logic to directly migrate a single page suitable for
+ * pages that do not use PagePrivate.
+ *
+ * Pages are locked upon entry and exit.
+ */
+int migrate_page(struct page *newpage, struct page *page)
+{
+	int rc;
+
+	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
+
+	rc = migrate_page_remove_references(newpage, page, 2);
+
+	if (rc)
+		return rc;
+
+	migrate_page_copy(newpage, page);
+
+	/*
+	 * Remove auxiliary swap entries and replace
+	 * them with real ptes.
+	 *
+	 * Note that a real pte entry will allow processes that are not
+	 * waiting on the page lock to use the new page via the page tables
+	 * before the new page is unlocked.
+	 */
+	remove_from_swap(newpage);
+	return 0;
+}
+EXPORT_SYMBOL(migrate_page);
+
+/*
+ * migrate_pages
+ *
+ * Two lists are passed to this function. The first list
+ * contains the pages isolated from the LRU to be migrated.
+ * The second list contains new pages that the pages isolated
+ * can be moved to. If the second list is NULL then all
+ * pages are swapped out.
+ *
+ * The function returns after 10 attempts or if no pages
+ * are movable anymore because to has become empty
+ * or no retryable pages exist anymore.
+ *
+ * Return: Number of pages not migrated when "to" ran empty.
+ */
+int migrate_pages(struct list_head *from, struct list_head *to,
+		  struct list_head *moved, struct list_head *failed)
+{
+	int retry;
+	int nr_failed = 0;
+	int pass = 0;
+	struct page *page;
+	struct page *page2;
+	int swapwrite = current->flags & PF_SWAPWRITE;
+	int rc;
+
+	if (!swapwrite)
+		current->flags |= PF_SWAPWRITE;
+
+redo:
+	retry = 0;
+
+	list_for_each_entry_safe(page, page2, from, lru) {
+		struct page *newpage = NULL;
+		struct address_space *mapping;
+
+		cond_resched();
+
+		rc = 0;
+		if (page_count(page) == 1)
+			/* page was freed from under us. So we are done. */
+			goto next;
+
+		if (to && list_empty(to))
+			break;
+
+		/*
+		 * Skip locked pages during the first two passes to give the
+		 * functions holding the lock time to release the page. Later we
+		 * use lock_page() to have a higher chance of acquiring the
+		 * lock.
+		 */
+		rc = -EAGAIN;
+		if (pass > 2)
+			lock_page(page);
+		else
+			if (TestSetPageLocked(page))
+				goto next;
+
+		/*
+		 * Only wait on writeback if we have already done a pass where
+		 * we we may have triggered writeouts for lots of pages.
+		 */
+		if (pass > 0) {
+			wait_on_page_writeback(page);
+		} else {
+			if (PageWriteback(page))
+				goto unlock_page;
+		}
+
+		/*
+		 * Anonymous pages must have swap cache references otherwise
+		 * the information contained in the page maps cannot be
+		 * preserved.
+		 */
+		if (PageAnon(page) && !PageSwapCache(page)) {
+			if (!add_to_swap(page, GFP_KERNEL)) {
+				rc = -ENOMEM;
+				goto unlock_page;
+			}
+		}
+
+		if (!to) {
+			rc = swap_page(page);
+			goto next;
+		}
+
+		newpage = lru_to_page(to);
+		lock_page(newpage);
+
+		/*
+		 * Pages are properly locked and writeback is complete.
+		 * Try to migrate the page.
+		 */
+		mapping = page_mapping(page);
+		if (!mapping)
+			goto unlock_both;
+
+		if (mapping->a_ops->migratepage) {
+			/*
+			 * Most pages have a mapping and most filesystems
+			 * should provide a migration function. Anonymous
+			 * pages are part of swap space which also has its
+			 * own migration function. This is the most common
+			 * path for page migration.
+			 */
+			rc = mapping->a_ops->migratepage(newpage, page);
+			goto unlock_both;
+                }
+
+		/*
+		 * Default handling if a filesystem does not provide
+		 * a migration function. We can only migrate clean
+		 * pages so try to write out any dirty pages first.
+		 */
+		if (PageDirty(page)) {
+			switch (pageout(page, mapping)) {
+			case PAGE_KEEP:
+			case PAGE_ACTIVATE:
+				goto unlock_both;
+
+			case PAGE_SUCCESS:
+				unlock_page(newpage);
+				goto next;
+
+			case PAGE_CLEAN:
+				; /* try to migrate the page below */
+			}
+                }
+
+		/*
+		 * Buffers are managed in a filesystem specific way.
+		 * We must have no buffers or drop them.
+		 */
+		if (!page_has_buffers(page) ||
+		    try_to_release_page(page, GFP_KERNEL)) {
+			rc = migrate_page(newpage, page);
+			goto unlock_both;
+		}
+
+		/*
+		 * On early passes with mapped pages simply
+		 * retry. There may be a lock held for some
+		 * buffers that may go away. Later
+		 * swap them out.
+		 */
+		if (pass > 4) {
+			/*
+			 * Persistently unable to drop buffers..... As a
+			 * measure of last resort we fall back to
+			 * swap_page().
+			 */
+			unlock_page(newpage);
+			newpage = NULL;
+			rc = swap_page(page);
+			goto next;
+		}
+
+unlock_both:
+		unlock_page(newpage);
+
+unlock_page:
+		unlock_page(page);
+
+next:
+		if (rc == -EAGAIN) {
+			retry++;
+		} else if (rc) {
+			/* Permanent failure */
+			list_move(&page->lru, failed);
+			nr_failed++;
+		} else {
+			if (newpage) {
+				/* Successful migration. Return page to LRU */
+				move_to_lru(newpage);
+			}
+			list_move(&page->lru, moved);
+		}
+	}
+	if (retry && pass++ < 10)
+		goto redo;
+
+	if (!swapwrite)
+		current->flags &= ~PF_SWAPWRITE;
+
+	return nr_failed + retry;
+}
+
+/*
+ * Migration function for pages with buffers. This function can only be used
+ * if the underlying filesystem guarantees that no other references to "page"
+ * exist.
+ */
+int buffer_migrate_page(struct page *newpage, struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct buffer_head *bh, *head;
+	int rc;
+
+	if (!mapping)
+		return -EAGAIN;
+
+	if (!page_has_buffers(page))
+		return migrate_page(newpage, page);
+
+	head = page_buffers(page);
+
+	rc = migrate_page_remove_references(newpage, page, 3);
+
+	if (rc)
+		return rc;
+
+	bh = head;
+	do {
+		get_bh(bh);
+		lock_buffer(bh);
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	ClearPagePrivate(page);
+	set_page_private(newpage, page_private(page));
+	set_page_private(page, 0);
+	put_page(page);
+	get_page(newpage);
+
+	bh = head;
+	do {
+		set_bh_page(bh, newpage, bh_offset(bh));
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	SetPagePrivate(newpage);
+
+	migrate_page_copy(newpage, page);
+
+	bh = head;
+	do {
+		unlock_buffer(bh);
+ 		put_bh(bh);
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	return 0;
+}
+EXPORT_SYMBOL(buffer_migrate_page);
+
+/*
+ * Migrate the list 'pagelist' of pages to a certain destination.
+ *
+ * Specify destination with either non-NULL vma or dest_node >= 0
+ * Return the number of pages not migrated or error code
+ */
+int migrate_pages_to(struct list_head *pagelist,
+			struct vm_area_struct *vma, int dest)
+{
+	LIST_HEAD(newlist);
+	LIST_HEAD(moved);
+	LIST_HEAD(failed);
+	int err = 0;
+	unsigned long offset = 0;
+	int nr_pages;
+	struct page *page;
+	struct list_head *p;
+
+redo:
+	nr_pages = 0;
+	list_for_each(p, pagelist) {
+		if (vma) {
+			/*
+			 * The address passed to alloc_page_vma is used to
+			 * generate the proper interleave behavior. We fake
+			 * the address here by an increasing offset in order
+			 * to get the proper distribution of pages.
+			 *
+			 * No decision has been made as to which page
+			 * a certain old page is moved to so we cannot
+			 * specify the correct address.
+			 */
+			page = alloc_page_vma(GFP_HIGHUSER, vma,
+					offset + vma->vm_start);
+			offset += PAGE_SIZE;
+		}
+		else
+			page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
+
+		if (!page) {
+			err = -ENOMEM;
+			goto out;
+		}
+		list_add_tail(&page->lru, &newlist);
+		nr_pages++;
+		if (nr_pages > MIGRATE_CHUNK_SIZE)
+			break;
+	}
+	err = migrate_pages(pagelist, &newlist, &moved, &failed);
+
+	putback_lru_pages(&moved);	/* Call release pages instead ?? */
+
+	if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
+		goto redo;
+out:
+	/* Return leftover allocated pages */
+	while (!list_empty(&newlist)) {
+		page = list_entry(newlist.next, struct page, lru);
+		list_del(&page->lru);
+		__free_page(page);
+	}
+	list_splice(&failed, pagelist);
+	if (err < 0)
+		return err;
+
+	/* Calculate number of leftover pages */
+	nr_pages = 0;
+	list_for_each(p, pagelist)
+		nr_pages++;
+	return nr_pages;
+}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index db8a3d3e1636..d7af296833fc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/migrate.h>
 
 #include <asm/pgtable.h>
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 548e023c193b..fd572bbdc9f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -42,18 +42,6 @@
 
 #include "internal.h"
 
-/* possible outcome of pageout() */
-typedef enum {
-	/* failed to write page out, page is locked */
-	PAGE_KEEP,
-	/* move page to the active list, page is locked */
-	PAGE_ACTIVATE,
-	/* page has been sent to the disk successfully, page is unlocked */
-	PAGE_SUCCESS,
-	/* page is clean and locked */
-	PAGE_CLEAN,
-} pageout_t;
-
 struct scan_control {
 	/* Incremented by the number of inactive pages that were scanned */
 	unsigned long nr_scanned;
@@ -304,7 +292,7 @@ static void handle_write_error(struct address_space *mapping,
  * pageout is called by shrink_page_list() for each dirty page.
  * Calls ->writepage().
  */
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+pageout_t pageout(struct page *page, struct address_space *mapping)
 {
 	/*
 	 * If the page is dirty, only perform writeback if that write
@@ -372,7 +360,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 	return PAGE_CLEAN;
 }
 
-static int remove_mapping(struct address_space *mapping, struct page *page)
+int remove_mapping(struct address_space *mapping, struct page *page)
 {
 	if (!mapping)
 		return 0;		/* truncate got there first */
@@ -570,481 +558,6 @@ keep:
 	return nr_reclaimed;
 }
 
-#ifdef CONFIG_MIGRATION
-static inline void move_to_lru(struct page *page)
-{
-	list_del(&page->lru);
-	if (PageActive(page)) {
-		/*
-		 * lru_cache_add_active checks that
-		 * the PG_active bit is off.
-		 */
-		ClearPageActive(page);
-		lru_cache_add_active(page);
-	} else {
-		lru_cache_add(page);
-	}
-	put_page(page);
-}
-
-/*
- * Add isolated pages on the list back to the LRU.
- *
- * returns the number of pages put back.
- */
-unsigned long putback_lru_pages(struct list_head *l)
-{
-	struct page *page;
-	struct page *page2;
-	unsigned long count = 0;
-
-	list_for_each_entry_safe(page, page2, l, lru) {
-		move_to_lru(page);
-		count++;
-	}
-	return count;
-}
-
-/*
- * Non migratable page
- */
-int fail_migrate_page(struct page *newpage, struct page *page)
-{
-	return -EIO;
-}
-EXPORT_SYMBOL(fail_migrate_page);
-
-/*
- * swapout a single page
- * page is locked upon entry, unlocked on exit
- */
-static int swap_page(struct page *page)
-{
-	struct address_space *mapping = page_mapping(page);
-
-	if (page_mapped(page) && mapping)
-		if (try_to_unmap(page, 1) != SWAP_SUCCESS)
-			goto unlock_retry;
-
-	if (PageDirty(page)) {
-		/* Page is dirty, try to write it out here */
-		switch(pageout(page, mapping)) {
-		case PAGE_KEEP:
-		case PAGE_ACTIVATE:
-			goto unlock_retry;
-
-		case PAGE_SUCCESS:
-			goto retry;
-
-		case PAGE_CLEAN:
-			; /* try to free the page below */
-		}
-	}
-
-	if (PagePrivate(page)) {
-		if (!try_to_release_page(page, GFP_KERNEL) ||
-		    (!mapping && page_count(page) == 1))
-			goto unlock_retry;
-	}
-
-	if (remove_mapping(mapping, page)) {
-		/* Success */
-		unlock_page(page);
-		return 0;
-	}
-
-unlock_retry:
-	unlock_page(page);
-
-retry:
-	return -EAGAIN;
-}
-EXPORT_SYMBOL(swap_page);
-
-/*
- * Page migration was first developed in the context of the memory hotplug
- * project. The main authors of the migration code are:
- *
- * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
- * Hirokazu Takahashi <taka@valinux.co.jp>
- * Dave Hansen <haveblue@us.ibm.com>
- * Christoph Lameter <clameter@sgi.com>
- */
-
-/*
- * Remove references for a page and establish the new page with the correct
- * basic settings to be able to stop accesses to the page.
- */
-int migrate_page_remove_references(struct page *newpage,
-				struct page *page, int nr_refs)
-{
-	struct address_space *mapping = page_mapping(page);
-	struct page **radix_pointer;
-
-	/*
-	 * Avoid doing any of the following work if the page count
-	 * indicates that the page is in use or truncate has removed
-	 * the page.
-	 */
-	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
-		return -EAGAIN;
-
-	/*
-	 * Establish swap ptes for anonymous pages or destroy pte
-	 * maps for files.
-	 *
-	 * In order to reestablish file backed mappings the fault handlers
-	 * will take the radix tree_lock which may then be used to stop
-  	 * processses from accessing this page until the new page is ready.
-	 *
-	 * A process accessing via a swap pte (an anonymous page) will take a
-	 * page_lock on the old page which will block the process until the
-	 * migration attempt is complete. At that time the PageSwapCache bit
-	 * will be examined. If the page was migrated then the PageSwapCache
-	 * bit will be clear and the operation to retrieve the page will be
-	 * retried which will find the new page in the radix tree. Then a new
-	 * direct mapping may be generated based on the radix tree contents.
-	 *
-	 * If the page was not migrated then the PageSwapCache bit
-	 * is still set and the operation may continue.
-	 */
-	if (try_to_unmap(page, 1) == SWAP_FAIL)
-		/* A vma has VM_LOCKED set -> Permanent failure */
-		return -EPERM;
-
-	/*
-	 * Give up if we were unable to remove all mappings.
-	 */
-	if (page_mapcount(page))
-		return -EAGAIN;
-
-	write_lock_irq(&mapping->tree_lock);
-
-	radix_pointer = (struct page **)radix_tree_lookup_slot(
-						&mapping->page_tree,
-						page_index(page));
-
-	if (!page_mapping(page) || page_count(page) != nr_refs ||
-			*radix_pointer != page) {
-		write_unlock_irq(&mapping->tree_lock);
-		return -EAGAIN;
-	}
-
-	/*
-	 * Now we know that no one else is looking at the page.
-	 *
-	 * Certain minimal information about a page must be available
-	 * in order for other subsystems to properly handle the page if they
-	 * find it through the radix tree update before we are finished
-	 * copying the page.
-	 */
-	get_page(newpage);
-	newpage->index = page->index;
-	newpage->mapping = page->mapping;
-	if (PageSwapCache(page)) {
-		SetPageSwapCache(newpage);
-		set_page_private(newpage, page_private(page));
-	}
-
-	*radix_pointer = newpage;
-	__put_page(page);
-	write_unlock_irq(&mapping->tree_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(migrate_page_remove_references);
-
-/*
- * Copy the page to its new location
- */
-void migrate_page_copy(struct page *newpage, struct page *page)
-{
-	copy_highpage(newpage, page);
-
-	if (PageError(page))
-		SetPageError(newpage);
-	if (PageReferenced(page))
-		SetPageReferenced(newpage);
-	if (PageUptodate(page))
-		SetPageUptodate(newpage);
-	if (PageActive(page))
-		SetPageActive(newpage);
-	if (PageChecked(page))
-		SetPageChecked(newpage);
-	if (PageMappedToDisk(page))
-		SetPageMappedToDisk(newpage);
-
-	if (PageDirty(page)) {
-		clear_page_dirty_for_io(page);
-		set_page_dirty(newpage);
- 	}
-
-	ClearPageSwapCache(page);
-	ClearPageActive(page);
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	page->mapping = NULL;
-
-	/*
-	 * If any waiters have accumulated on the new page then
-	 * wake them up.
-	 */
-	if (PageWriteback(newpage))
-		end_page_writeback(newpage);
-}
-EXPORT_SYMBOL(migrate_page_copy);
-
-/*
- * Common logic to directly migrate a single page suitable for
- * pages that do not use PagePrivate.
- *
- * Pages are locked upon entry and exit.
- */
-int migrate_page(struct page *newpage, struct page *page)
-{
-	int rc;
-
-	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
-
-	rc = migrate_page_remove_references(newpage, page, 2);
-
-	if (rc)
-		return rc;
-
-	migrate_page_copy(newpage, page);
-
-	/*
-	 * Remove auxiliary swap entries and replace
-	 * them with real ptes.
-	 *
-	 * Note that a real pte entry will allow processes that are not
-	 * waiting on the page lock to use the new page via the page tables
-	 * before the new page is unlocked.
-	 */
-	remove_from_swap(newpage);
-	return 0;
-}
-EXPORT_SYMBOL(migrate_page);
-
-/*
- * migrate_pages
- *
- * Two lists are passed to this function. The first list
- * contains the pages isolated from the LRU to be migrated.
- * The second list contains new pages that the pages isolated
- * can be moved to. If the second list is NULL then all
- * pages are swapped out.
- *
- * The function returns after 10 attempts or if no pages
- * are movable anymore because to has become empty
- * or no retryable pages exist anymore.
- *
- * Return: Number of pages not migrated when "to" ran empty.
- */
-unsigned long migrate_pages(struct list_head *from, struct list_head *to,
-		  struct list_head *moved, struct list_head *failed)
-{
-	unsigned long retry;
-	unsigned long nr_failed = 0;
-	int pass = 0;
-	struct page *page;
-	struct page *page2;
-	int swapwrite = current->flags & PF_SWAPWRITE;
-	int rc;
-
-	if (!swapwrite)
-		current->flags |= PF_SWAPWRITE;
-
-redo:
-	retry = 0;
-
-	list_for_each_entry_safe(page, page2, from, lru) {
-		struct page *newpage = NULL;
-		struct address_space *mapping;
-
-		cond_resched();
-
-		rc = 0;
-		if (page_count(page) == 1)
-			/* page was freed from under us. So we are done. */
-			goto next;
-
-		if (to && list_empty(to))
-			break;
-
-		/*
-		 * Skip locked pages during the first two passes to give the
-		 * functions holding the lock time to release the page. Later we
-		 * use lock_page() to have a higher chance of acquiring the
-		 * lock.
-		 */
-		rc = -EAGAIN;
-		if (pass > 2)
-			lock_page(page);
-		else
-			if (TestSetPageLocked(page))
-				goto next;
-
-		/*
-		 * Only wait on writeback if we have already done a pass where
-		 * we we may have triggered writeouts for lots of pages.
-		 */
-		if (pass > 0) {
-			wait_on_page_writeback(page);
-		} else {
-			if (PageWriteback(page))
-				goto unlock_page;
-		}
-
-		/*
-		 * Anonymous pages must have swap cache references otherwise
-		 * the information contained in the page maps cannot be
-		 * preserved.
-		 */
-		if (PageAnon(page) && !PageSwapCache(page)) {
-			if (!add_to_swap(page, GFP_KERNEL)) {
-				rc = -ENOMEM;
-				goto unlock_page;
-			}
-		}
-
-		if (!to) {
-			rc = swap_page(page);
-			goto next;
-		}
-
-		newpage = lru_to_page(to);
-		lock_page(newpage);
-
-		/*
-		 * Pages are properly locked and writeback is complete.
-		 * Try to migrate the page.
-		 */
-		mapping = page_mapping(page);
-		if (!mapping)
-			goto unlock_both;
-
-		if (mapping->a_ops->migratepage) {
-			/*
-			 * Most pages have a mapping and most filesystems
-			 * should provide a migration function. Anonymous
-			 * pages are part of swap space which also has its
-			 * own migration function. This is the most common
-			 * path for page migration.
-			 */
-			rc = mapping->a_ops->migratepage(newpage, page);
-			goto unlock_both;
-                }
-
-		/*
-		 * Default handling if a filesystem does not provide
-		 * a migration function. We can only migrate clean
-		 * pages so try to write out any dirty pages first.
-		 */
-		if (PageDirty(page)) {
-			switch (pageout(page, mapping)) {
-			case PAGE_KEEP:
-			case PAGE_ACTIVATE:
-				goto unlock_both;
-
-			case PAGE_SUCCESS:
-				unlock_page(newpage);
-				goto next;
-
-			case PAGE_CLEAN:
-				; /* try to migrate the page below */
-			}
-                }
-
-		/*
-		 * Buffers are managed in a filesystem specific way.
-		 * We must have no buffers or drop them.
-		 */
-		if (!page_has_buffers(page) ||
-		    try_to_release_page(page, GFP_KERNEL)) {
-			rc = migrate_page(newpage, page);
-			goto unlock_both;
-		}
-
-		/*
-		 * On early passes with mapped pages simply
-		 * retry. There may be a lock held for some
-		 * buffers that may go away. Later
-		 * swap them out.
-		 */
-		if (pass > 4) {
-			/*
-			 * Persistently unable to drop buffers..... As a
-			 * measure of last resort we fall back to
-			 * swap_page().
-			 */
-			unlock_page(newpage);
-			newpage = NULL;
-			rc = swap_page(page);
-			goto next;
-		}
-
-unlock_both:
-		unlock_page(newpage);
-
-unlock_page:
-		unlock_page(page);
-
-next:
-		if (rc == -EAGAIN) {
-			retry++;
-		} else if (rc) {
-			/* Permanent failure */
-			list_move(&page->lru, failed);
-			nr_failed++;
-		} else {
-			if (newpage) {
-				/* Successful migration. Return page to LRU */
-				move_to_lru(newpage);
-			}
-			list_move(&page->lru, moved);
-		}
-	}
-	if (retry && pass++ < 10)
-		goto redo;
-
-	if (!swapwrite)
-		current->flags &= ~PF_SWAPWRITE;
-
-	return nr_failed + retry;
-}
-
-/*
- * Isolate one page from the LRU lists and put it on the
- * indicated list with elevated refcount.
- *
- * Result:
- *  0 = page not on LRU list
- *  1 = page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page)
-{
-	int ret = 0;
-
-	if (PageLRU(page)) {
-		struct zone *zone = page_zone(page);
-		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page)) {
-			ret = 1;
-			get_page(page);
-			ClearPageLRU(page);
-			if (PageActive(page))
-				del_page_from_active_list(zone, page);
-			else
-				del_page_from_inactive_list(zone, page);
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
-
-	return ret;
-}
-#endif
-
 /*
  * zone->lru_lock is heavily contended.  Some of the functions that
  * shrink the lists perform better by taking out a batch of pages
-- 
cgit v1.2.3


From 67b1dfe77a2eb2a88b37cd77b8979cbdb7695bd6 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 14:57:43 +0000
Subject: NTFS: Fix an (innocent) off-by-one error in the runlist code.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  6 ++++++
 fs/ntfs/Makefile  |  2 +-
 fs/ntfs/namei.c   |  2 +-
 fs/ntfs/runlist.c | 12 ++++++++----
 fs/ntfs/super.c   |  2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9d8ffa89e2c2..3d8d60be48de 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -19,6 +19,12 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
+2.1.27 - Various bug fixes.
+
+	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
+	  reporting them.
+	- Fix an (innocent) off-by-one error in the runlist code.
+
 2.1.26 - Minor bug fixes and updates.
 
 	- Fix a potential overflow in file.c where a cast to s64 was missing in
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index d95fac7fdeb6..e27b4eacffbf 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.26\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.27\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 5ea9eb93af62..78e0cf738e24 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -2,7 +2,7 @@
  * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS
  *	     project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 061b5ff6b73c..eb52b801512b 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -381,6 +381,7 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
+	signed delta;
 	BOOL left = FALSE;	/* Left end of @src needs merging. */
 	BOOL right = FALSE;	/* Right end of @src needs merging. */
 	int tail;		/* Start of tail of @dst. */
@@ -396,11 +397,14 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		left = ntfs_are_rl_mergeable(dst + loc - 1, src);
 	/*
 	 * Allocate some space.  We will need less if the left, right, or both
-	 * ends get merged.
+	 * ends get merged.  The -1 accounts for the run being replaced.
 	 */
-	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
-	if (IS_ERR(dst))
-		return dst;
+	delta = ssize - 1 - left - right;
+	if (delta > 0) {
+		dst = ntfs_rl_realloc(dst, dsize, dsize + delta);
+		if (IS_ERR(dst))
+			return dst;
+	}
 	/*
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlists.
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 368a8ec10668..71c58eca580e 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3234,7 +3234,7 @@ static void __exit exit_ntfs_fs(void)
 }
 
 MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2006 Anton Altaparmakov");
 MODULE_VERSION(NTFS_VERSION);
 MODULE_LICENSE("GPL");
 #ifdef DEBUG
-- 
cgit v1.2.3


From 3ccc7384db3d762e834dfdae13c1d6434b2fdeab Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:03:11 +0000
Subject: NTFS: Fix a buggette in an "should be impossible" case handling where
 we       continued the attribute lookup loop instead of aborting it.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/attrib.c  | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3d8d60be48de..d35a5c8e3da9 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -24,6 +24,8 @@ ToDo/Notes:
 	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
 	  reporting them.
 	- Fix an (innocent) off-by-one error in the runlist code.
+	- Fix a buggette in an "should be impossible" case handling where we
+	  continued the attribute lookup loop instead of aborting it.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 9480a0526cd3..a92b9e9db91d 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
 /**
  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -1048,7 +1048,7 @@ do_next_attr_loop:
 				le32_to_cpu(ctx->mrec->bytes_allocated))
 			break;
 		if (a->type == AT_END)
-			continue;
+			break;
 		if (!a->length)
 			break;
 		if (al_entry->instance != a->instance)
-- 
cgit v1.2.3


From 78264bd9c239237fe356c32d08abf8e52a2d8737 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:06:18 +0000
Subject: NTFS: Use buffer_migrate_page() for the ->migratepage function of all
 ntfs        address space operations.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 +++-
 fs/ntfs/aops.c    | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index d35a5c8e3da9..8df10700a930 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -19,13 +19,15 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
-2.1.27 - Various bug fixes.
+2.1.27 - Various bug fixes and cleanups.
 
 	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
 	  reporting them.
 	- Fix an (innocent) off-by-one error in the runlist code.
 	- Fix a buggette in an "should be impossible" case handling where we
 	  continued the attribute lookup loop instead of aborting it.
+	- Use buffer_migrate_page() for the ->migratepage function of all ntfs
+	  address space operations.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7e361da770b3..7c7e313620fa 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
@@ -1551,6 +1552,9 @@ struct address_space_operations ntfs_aops = {
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
 #endif /* NTFS_RW */
+	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
+						   one physical page to an
+						   other. */
 };
 
 /**
@@ -1567,6 +1571,9 @@ struct address_space_operations ntfs_mst_aops = {
 						   without touching the buffers
 						   belonging to the page. */
 #endif /* NTFS_RW */
+	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
+						   one physical page to an
+						   other. */
 };
 
 #ifdef NTFS_RW
-- 
cgit v1.2.3


From 949763b2b8822c6dc6da0d0e1d4af092152546c2 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:34:13 +0000
Subject: NTFS: Fix comparison of $MFT and $MFTMirr to not bail out when there
 are       unused, invalid mft records which are the same in both $MFT and    
   $MFTMirr.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/super.c   | 38 +++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 8df10700a930..548d9059a697 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -28,6 +28,9 @@ ToDo/Notes:
 	  continued the attribute lookup loop instead of aborting it.
 	- Use buffer_migrate_page() for the ->migratepage function of all ntfs
 	  address space operations.
+	- Fix comparison of $MFT and $MFTMirr to not bail out when there are
+	  unused, invalid mft records which are the same in both $MFT and
+	  $MFTMirr.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 71c58eca580e..fd4aecc5548e 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1099,26 +1099,38 @@ static BOOL check_mft_mirror(ntfs_volume *vol)
 			kmirr = page_address(mirr_page);
 			++index;
 		}
-		/* Make sure the record is ok. */
-		if (ntfs_is_baad_recordp((le32*)kmft)) {
-			ntfs_error(sb, "Incomplete multi sector transfer "
-					"detected in mft record %i.", i);
+		/* Do not check the record if it is not in use. */
+		if (((MFT_RECORD*)kmft)->flags & MFT_RECORD_IN_USE) {
+			/* Make sure the record is ok. */
+			if (ntfs_is_baad_recordp((le32*)kmft)) {
+				ntfs_error(sb, "Incomplete multi sector "
+						"transfer detected in mft "
+						"record %i.", i);
 mm_unmap_out:
-			ntfs_unmap_page(mirr_page);
+				ntfs_unmap_page(mirr_page);
 mft_unmap_out:
-			ntfs_unmap_page(mft_page);
-			return FALSE;
+				ntfs_unmap_page(mft_page);
+				return FALSE;
+			}
 		}
-		if (ntfs_is_baad_recordp((le32*)kmirr)) {
-			ntfs_error(sb, "Incomplete multi sector transfer "
-					"detected in mft mirror record %i.", i);
-			goto mm_unmap_out;
+		/* Do not check the mirror record if it is not in use. */
+		if (((MFT_RECORD*)kmirr)->flags & MFT_RECORD_IN_USE) {
+			if (ntfs_is_baad_recordp((le32*)kmirr)) {
+				ntfs_error(sb, "Incomplete multi sector "
+						"transfer detected in mft "
+						"mirror record %i.", i);
+				goto mm_unmap_out;
+			}
 		}
 		/* Get the amount of data in the current record. */
 		bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use);
-		if (!bytes || bytes > vol->mft_record_size) {
+		if (bytes < sizeof(MFT_RECORD_OLD) ||
+				bytes > vol->mft_record_size ||
+				ntfs_is_baad_recordp((le32*)kmft)) {
 			bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use);
-			if (!bytes || bytes > vol->mft_record_size)
+			if (bytes < sizeof(MFT_RECORD_OLD) ||
+					bytes > vol->mft_record_size ||
+					ntfs_is_baad_recordp((le32*)kmirr))
 				bytes = vol->mft_record_size;
 		}
 		/* Compare the two records. */
-- 
cgit v1.2.3


From 5be0e9511990dc307670dc66a42073db96b20f26 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 23 Mar 2006 02:59:19 -0800
Subject: [PATCH] proc: fix duplicate line in /proc/devices

Fix a duplicate block device line printed after the "Block device" header
in /proc/devices.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1d24fead51a6..826c131994c3 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -312,7 +312,7 @@ static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
 		case BLK_HDR:
 			info->state = BLK_LIST;
 			(*pos)++;
-			break;
+			/*fallthrough*/
 		case BLK_LIST:
 			if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
 				/*
-- 
cgit v1.2.3


From d8733c2956968a01394a4d2a9e97a8b431a78776 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 23 Mar 2006 03:00:11 -0800
Subject: [PATCH] ext3_readdir: use generic readahead

Linus points out that ext3_readdir's readahead only cuts in when
ext3_readdir() is operating at the very start of the directory.  So for large
directories we end up performing no readahead at all and we suck.

So take it all out and use the core VM's page_cache_readahead().  This means
that ext3 directory reads will use all of readahead's dynamic sizing goop.

Note that we're using the directory's filp->f_ra to hold the readahead state,
but readahead is actually being performed against the underlying blockdev's
address_space.  Fortunately the readahead code is all set up to handle this.

Tested with printk.  It works.  I was struggling to find a real workload which
actually cared.

(The patch also exports page_cache_readahead() to GPL modules)

Cc: "Stephen C. Tweedie" <sct@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c           | 52 +++++++++++++++++++++++--------------------------
 fs/ext3/inode.c         |  2 +-
 include/linux/ext3_fs.h |  9 ++++++---
 mm/readahead.c          |  1 +
 4 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 832867aef3dc..773459164bb2 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -95,11 +95,10 @@ static int ext3_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
-	struct buffer_head * bh, * tmp, * bha[16];
-	struct ext3_dir_entry_2 * de;
-	struct super_block * sb;
+	unsigned long offset;
+	int i, stored;
+	struct ext3_dir_entry_2 *de;
+	struct super_block *sb;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	int ret = 0;
@@ -124,12 +123,29 @@ static int ext3_readdir(struct file * filp,
 	}
 #endif
 	stored = 0;
-	bh = NULL;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
 	while (!error && !stored && filp->f_pos < inode->i_size) {
-		blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb);
-		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		struct buffer_head map_bh;
+		struct buffer_head *bh = NULL;
+
+		map_bh.b_state = 0;
+		err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
+		if (!err) {
+			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
+				&filp->f_ra,
+				filp,
+				map_bh.b_blocknr >>
+					(PAGE_CACHE_SHIFT - inode->i_blkbits),
+				1);
+			bh = ext3_bread(NULL, inode, blk, 0, &err);
+		}
+
+		/*
+		 * We ignore I/O errors on directories so users have a chance
+		 * of recovering data when there's a bad sector
+		 */
 		if (!bh) {
 			ext3_error (sb, "ext3_readdir",
 				"directory #%lu contains a hole at offset %lu",
@@ -138,26 +154,6 @@ static int ext3_readdir(struct file * filp,
 			continue;
 		}
 
-		/*
-		 * Do the readahead
-		 */
-		if (!offset) {
-			for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0;
-			     i > 0; i--) {
-				tmp = ext3_getblk (NULL, inode, ++blk, 0, &err);
-				if (tmp && !buffer_uptodate(tmp) &&
-						!buffer_locked(tmp))
-					bha[num++] = tmp;
-				else
-					brelse (tmp);
-			}
-			if (num) {
-				ll_rw_block (READA, num, bha);
-				for (i = 0; i < num; i++)
-					brelse (bha[i]);
-			}
-		}
-
 revalidate:
 		/* If the dir block has changed since the last call to
 		 * readdir(2), then we might be pointing to an invalid
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0384e539b88f..d59d5a667b0b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -671,7 +671,7 @@ err_out:
  * The BKL may not be held on entry here.  Be sure to take it early.
  */
 
-static int
+int
 ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		struct buffer_head *bh_result, int create, int extend_disksize)
 {
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index c0272d73ab20..e7239f2f97a1 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -772,9 +772,12 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
 
 
 /* inode.c */
-extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
-extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
-extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+int ext3_get_block_handle(handle_t *handle, struct inode *inode,
+	sector_t iblock, struct buffer_head *bh_result, int create,
+	int extend_disksize);
 
 extern void ext3_read_inode (struct inode *);
 extern int  ext3_write_inode (struct inode *, int);
diff --git a/mm/readahead.c b/mm/readahead.c
index 301b36c4a0ce..0f142a40984b 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -555,6 +555,7 @@ recheck:
 out:
 	return ra->prev_page + 1;
 }
+EXPORT_SYMBOL_GPL(page_cache_readahead);
 
 /*
  * handle_ra_miss() is called when it is known that a page which should have
-- 
cgit v1.2.3


From 0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 23 Mar 2006 03:00:12 -0800
Subject: [PATCH] Shrinks sizeof(files_struct) and better layout

1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
   platforms, lowering kmalloc() allocated space by 50%.

2) Reduce the size of (files_struct), using a special 32 bits (or
   64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
   close_on_exec_init and open_fds_init fields.  This save some ram (248
   bytes per task) as most tasks dont open more than 32 files.  D-Cache
   footprint for such tasks is also reduced to the minimum.

3) Reduce size of allocated fdset.  Currently two full pages are
   allocated, that is 32768 bits on x86 for example, and way too much.  The
   minimum is now L1_CACHE_BYTES.

UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 ..  2] being in the
same cache line)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fcntl.c                |  9 ++++-----
 fs/file.c                 | 34 ++++++++++++++--------------------
 fs/open.c                 |  8 ++++----
 include/linux/file.h      | 28 ++++++++++++++++++++++++----
 include/linux/init_task.h | 10 +++++-----
 kernel/fork.c             |  8 ++++----
 6 files changed, 55 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index dc4a7007f4e7..03c789560fb8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -73,8 +73,8 @@ repeat:
 	 * orig_start..fdt->next_fd
 	 */
 	start = orig_start;
-	if (start < fdt->next_fd)
-		start = fdt->next_fd;
+	if (start < files->next_fd)
+		start = files->next_fd;
 
 	newfd = start;
 	if (start < fdt->max_fdset) {
@@ -102,9 +102,8 @@ repeat:
 	 * we reacquire the fdtable pointer and use it while holding
 	 * the lock, no one can free it during that time.
 	 */
-	fdt = files_fdtable(files);
-	if (start <= fdt->next_fd)
-		fdt->next_fd = newfd + 1;
+	if (start <= files->next_fd)
+		files->next_fd = newfd + 1;
 
 	error = newfd;
 	
diff --git a/fs/file.c b/fs/file.c
index cea7cbea11d0..bbc743314730 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
+		fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
 		 * The fdtable was embedded
 		 */
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 
 void free_fdtable(struct fdtable *fdt)
 {
-	if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
-					fdt->max_fds > NR_OPEN_DEFAULT)
+	if (fdt->free_files ||
+		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
+		fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 		       (nfdt->max_fds - fdt->max_fds) *
 					sizeof(struct file *));
 	}
-	nfdt->next_fd = fdt->next_fd;
 }
 
 /*
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num)
 
 void free_fdset(fd_set *array, int num)
 {
-	int size = num / 8;
-
-	if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
+	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
 		return;
-	else if (size <= PAGE_SIZE)
+	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
 	else
 		vfree(array);
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr)
   	fd_set *new_openset = NULL, *new_execset = NULL;
 	struct file **new_fds;
 
-	fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
 	if (!fdt)
   		goto out;
-	memset(fdt, 0, sizeof(*fdt));
 
-	nfds = __FD_SETSIZE;
+	nfds = 8 * L1_CACHE_BYTES;
   	/* Expand to the max in easy steps */
-  	do {
-		if (nfds < (PAGE_SIZE * 8))
-			nfds = PAGE_SIZE * 8;
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-		}
-	} while (nfds <= nr);
+  	while (nfds <= nr) {
+		nfds = nfds * 2;
+		if (nfds > NR_OPEN)
+			nfds = NR_OPEN;
+	}
 
   	new_openset = alloc_fdset(nfds);
   	new_execset = alloc_fdset(nfds);
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e77..1091dadd6c38 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -973,7 +973,7 @@ repeat:
 	fdt = files_fdtable(files);
  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
 				fdt->max_fdset,
-				fdt->next_fd);
+				files->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -998,7 +998,7 @@ repeat:
 
 	FD_SET(fd, fdt->open_fds);
 	FD_CLR(fd, fdt->close_on_exec);
-	fdt->next_fd = fd + 1;
+	files->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
 	if (fdt->fd[fd] != NULL) {
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
 	__FD_CLR(fd, fdt->open_fds);
-	if (fd < fdt->next_fd)
-		fdt->next_fd = fd;
+	if (fd < files->next_fd)
+		files->next_fd = fd;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
diff --git a/include/linux/file.h b/include/linux/file.h
index 9901b850f2e4..9f7c2513866f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/types.h>
 
 /*
  * The default fd array needs to be at least BITS_PER_LONG,
@@ -17,10 +18,22 @@
  */
 #define NR_OPEN_DEFAULT BITS_PER_LONG
 
+/*
+ * The embedded_fd_set is a small fd_set,
+ * suitable for most tasks (which open <= BITS_PER_LONG files)
+ */
+struct embedded_fd_set {
+	unsigned long fds_bits[1];
+};
+
+/*
+ * More than this number of fds: we use a separately allocated fd_set
+ */
+#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
+
 struct fdtable {
 	unsigned int max_fds;
 	int max_fdset;
-	int next_fd;
 	struct file ** fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
@@ -33,13 +46,20 @@ struct fdtable {
  * Open file table structure
  */
 struct files_struct {
+  /*
+   * read mostly part
+   */
 	atomic_t count;
 	struct fdtable *fdt;
 	struct fdtable fdtab;
-	fd_set close_on_exec_init;
-	fd_set open_fds_init;
+  /*
+   * written part on a separate cache line in SMP
+   */
+	spinlock_t file_lock ____cacheline_aligned_in_smp;
+	int next_fd;
+	struct embedded_fd_set close_on_exec_init;
+	struct embedded_fd_set open_fds_init;
 	struct file * fd_array[NR_OPEN_DEFAULT];
-	spinlock_t file_lock;     /* Protects concurrent writers.  Nests inside tsk->alloc_lock */
 };
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfd2ecccb5d..92146f3b7423 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -7,11 +7,10 @@
 #define INIT_FDTABLE \
 {							\
 	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.max_fdset	= __FD_SETSIZE, 		\
-	.next_fd	= 0, 				\
+	.max_fdset	= EMBEDDED_FD_SET_SIZE,		\
 	.fd		= &init_files.fd_array[0], 	\
-	.close_on_exec	= &init_files.close_on_exec_init, \
-	.open_fds	= &init_files.open_fds_init, 	\
+	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
+	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
 	.rcu		= RCU_HEAD_INIT, 		\
 	.free_files	= NULL,		 		\
 	.next		= NULL,		 		\
@@ -20,9 +19,10 @@
 #define INIT_FILES \
 { 							\
 	.count		= ATOMIC_INIT(1), 		\
-	.file_lock	= SPIN_LOCK_UNLOCKED, 		\
 	.fdt		= &init_files.fdtab, 		\
 	.fdtab		= INIT_FDTABLE,			\
+	.file_lock	= SPIN_LOCK_UNLOCKED, 		\
+	.next_fd	= 0, 				\
 	.close_on_exec_init = { { 0, } }, 		\
 	.open_fds_init	= { { 0, } }, 			\
 	.fd_array	= { NULL, } 			\
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd7b65ee418..c79ae0b19a49 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -607,12 +607,12 @@ static struct files_struct *alloc_files(void)
 	atomic_set(&newf->count, 1);
 
 	spin_lock_init(&newf->file_lock);
+	newf->next_fd = 0;
 	fdt = &newf->fdtab;
-	fdt->next_fd = 0;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = __FD_SETSIZE;
-	fdt->close_on_exec = &newf->close_on_exec_init;
-	fdt->open_fds = &newf->open_fds_init;
+	fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
+	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
 	INIT_RCU_HEAD(&fdt->rcu);
 	fdt->free_files = NULL;
-- 
cgit v1.2.3


From 6362e4d4eda61efb04ac1cdae32e48ac6d90b701 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Mar 2006 03:00:17 -0800
Subject: [PATCH] Fix oops in invalidate_dquots()

When quota is being turned off we assumed that all the references to dquots
were already dropped.  That need not be true as inodes being deleted are
not on superblock's inodes list and hence we need not reach it when
removing quota references from inodes.  So invalidate_dquots() has to wait
for all the users of dquots (as quota is already marked as turned off, no
new references can be acquired and so this is bound to happen rather
early).  When we do this, we can also remove the iprune_sem locking as it
was protecting us against exactly the same problem when freeing inodes
icache memory.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 52 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 1966c890b48d..9376a4378988 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -118,8 +118,7 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
- *   > dquot->dq_lock > dqio_sem
+ *  i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
  * i_mutex on quota files is special (it's below dqio_sem)
  */
 
@@ -407,23 +406,49 @@ out_dqlock:
 
 /* Invalidate all dquots on the list. Note that this function is called after
  * quota is disabled and pointers from inodes removed so there cannot be new
- * quota users. Also because we hold dqonoff_sem there can be no quota users
- * for this sb+type at all. */
+ * quota users. There can still be some users of quotas due to inodes being
+ * just deleted or pruned by prune_icache() (those are not attached to any
+ * list). We have to wait for such users.
+ */
 static void invalidate_dquots(struct super_block *sb, int type)
 {
 	struct dquot *dquot, *tmp;
 
+restart:
 	spin_lock(&dq_list_lock);
 	list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
 		if (dquot->dq_sb != sb)
 			continue;
 		if (dquot->dq_type != type)
 			continue;
-#ifdef __DQUOT_PARANOIA
-		if (atomic_read(&dquot->dq_count))
-			BUG();
-#endif
-		/* Quota now has no users and it has been written on last dqput() */
+		/* Wait for dquot users */
+		if (atomic_read(&dquot->dq_count)) {
+			DEFINE_WAIT(wait);
+
+			atomic_inc(&dquot->dq_count);
+			prepare_to_wait(&dquot->dq_wait_unused, &wait,
+					TASK_UNINTERRUPTIBLE);
+			spin_unlock(&dq_list_lock);
+			/* Once dqput() wakes us up, we know it's time to free
+			 * the dquot.
+			 * IMPORTANT: we rely on the fact that there is always
+			 * at most one process waiting for dquot to free.
+			 * Otherwise dq_count would be > 1 and we would never
+			 * wake up.
+			 */
+			if (atomic_read(&dquot->dq_count) > 1)
+				schedule();
+			finish_wait(&dquot->dq_wait_unused, &wait);
+			dqput(dquot);
+			/* At this moment dquot() need not exist (it could be
+			 * reclaimed by prune_dqcache(). Hence we must
+			 * restart. */
+			goto restart;
+		}
+		/*
+		 * Quota now has no users and it has been written on last
+		 * dqput()
+		 */
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
@@ -540,6 +565,10 @@ we_slept:
 	if (atomic_read(&dquot->dq_count) > 1) {
 		/* We have more than one user... nothing to do */
 		atomic_dec(&dquot->dq_count);
+		/* Releasing dquot during quotaoff phase? */
+		if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+		    atomic_read(&dquot->dq_count) == 1)
+			wake_up(&dquot->dq_wait_unused);
 		spin_unlock(&dq_list_lock);
 		return;
 	}
@@ -581,6 +610,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 	INIT_LIST_HEAD(&dquot->dq_inuse);
 	INIT_HLIST_NODE(&dquot->dq_hash);
 	INIT_LIST_HEAD(&dquot->dq_dirty);
+	init_waitqueue_head(&dquot->dq_wait_unused);
 	dquot->dq_sb = sb;
 	dquot->dq_type = type;
 	atomic_set(&dquot->dq_count, 1);
@@ -732,13 +762,9 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 {
 	LIST_HEAD(tofree_head);
 
-	/* We need to be guarded against prune_icache to reach all the
-	 * inodes - otherwise some can be on the local list of prune_icache */
-	down(&iprune_sem);
 	down_write(&sb_dqopt(sb)->dqptr_sem);
 	remove_dquot_ref(sb, type, &tofree_head);
 	up_write(&sb_dqopt(sb)->dqptr_sem);
-	up(&iprune_sem);
 	put_dquot_list(&tofree_head);
 }
 
-- 
cgit v1.2.3


From 4f7a07b88726781e37c4c5f63db3a6ee3f8500d3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:19 -0800
Subject: [PATCH] convert fs/9p/ to mutexes, fix locking bugs

Convert fs/9p/mux.c from semaphore to mutex.

NOTE: fixed locking bugs in the process - the code was using semaphores
the other way around.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/mux.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index ea1134eb47c8..8e8356c1c229 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -31,6 +31,7 @@
 #include <linux/poll.h>
 #include <linux/kthread.h>
 #include <linux/idr.h>
+#include <linux/mutex.h>
 
 #include "debug.h"
 #include "v9fs.h"
@@ -110,7 +111,7 @@ static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
 static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
 
-static DECLARE_MUTEX(v9fs_mux_task_lock);
+static DEFINE_MUTEX(v9fs_mux_task_lock);
 static struct workqueue_struct *v9fs_mux_wq;
 
 static int v9fs_mux_num;
@@ -166,7 +167,7 @@ static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 
 	dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
 		v9fs_mux_poll_task_num);
-	up(&v9fs_mux_task_lock);
+	mutex_lock(&v9fs_mux_task_lock);
 
 	n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
 	if (n > v9fs_mux_poll_task_num) {
@@ -225,7 +226,7 @@ static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 	}
 
 	v9fs_mux_num++;
-	down(&v9fs_mux_task_lock);
+	mutex_unlock(&v9fs_mux_task_lock);
 
 	return 0;
 }
@@ -235,7 +236,7 @@ static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
 	int i;
 	struct v9fs_mux_poll_task *vpt;
 
-	up(&v9fs_mux_task_lock);
+	mutex_lock(&v9fs_mux_task_lock);
 	vpt = m->poll_task;
 	list_del(&m->mux_list);
 	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
@@ -252,7 +253,7 @@ static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
 		v9fs_mux_poll_task_num--;
 	}
 	v9fs_mux_num--;
-	down(&v9fs_mux_task_lock);
+	mutex_unlock(&v9fs_mux_task_lock);
 }
 
 /**
-- 
cgit v1.2.3


From c039e3134ae62863bbc8e8429b29e3c43cf21b2a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:28 -0800
Subject: [PATCH] sem2mutex: blockdev #2

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/ioctl.c                   | 22 +++++++++++-----------
 drivers/block/rd.c              |  4 ++--
 drivers/s390/block/dasd_ioctl.c |  8 ++++----
 fs/block_dev.c                  | 28 ++++++++++++++--------------
 fs/buffer.c                     |  6 +++---
 fs/super.c                      |  4 ++--
 include/linux/fs.h              |  4 ++--
 7 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/block/ioctl.c b/block/ioctl.c
index e1109491c234..35fdb7dc6512 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -42,9 +42,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 					return -EINVAL;
 			}
 			/* partition number in use? */
-			down(&bdev->bd_sem);
+			mutex_lock(&bdev->bd_mutex);
 			if (disk->part[part - 1]) {
-				up(&bdev->bd_sem);
+				mutex_unlock(&bdev->bd_mutex);
 				return -EBUSY;
 			}
 			/* overlap? */
@@ -55,13 +55,13 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 					continue;
 				if (!(start+length <= s->start_sect ||
 				      start >= s->start_sect + s->nr_sects)) {
-					up(&bdev->bd_sem);
+					mutex_unlock(&bdev->bd_mutex);
 					return -EBUSY;
 				}
 			}
 			/* all seems OK */
 			add_partition(disk, part, start, length);
-			up(&bdev->bd_sem);
+			mutex_unlock(&bdev->bd_mutex);
 			return 0;
 		case BLKPG_DEL_PARTITION:
 			if (!disk->part[part-1])
@@ -71,9 +71,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 			bdevp = bdget_disk(disk, part);
 			if (!bdevp)
 				return -ENOMEM;
-			down(&bdevp->bd_sem);
+			mutex_lock(&bdevp->bd_mutex);
 			if (bdevp->bd_openers) {
-				up(&bdevp->bd_sem);
+				mutex_unlock(&bdevp->bd_mutex);
 				bdput(bdevp);
 				return -EBUSY;
 			}
@@ -81,10 +81,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
 
-			down(&bdev->bd_sem);
+			mutex_lock(&bdev->bd_mutex);
 			delete_partition(disk, part);
-			up(&bdev->bd_sem);
-			up(&bdevp->bd_sem);
+			mutex_unlock(&bdev->bd_mutex);
+			mutex_unlock(&bdevp->bd_mutex);
 			bdput(bdevp);
 
 			return 0;
@@ -102,10 +102,10 @@ static int blkdev_reread_part(struct block_device *bdev)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
+	if (!mutex_trylock(&bdev->bd_mutex))
 		return -EBUSY;
 	res = rescan_partitions(disk, bdev);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	return res;
 }
 
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index ffd6abd6d5a0..1c54f46d3f70 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -310,12 +310,12 @@ static int rd_ioctl(struct inode *inode, struct file *file,
 	 * cache
 	 */
 	error = -EBUSY;
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	if (bdev->bd_openers <= 2) {
 		truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
 		error = 0;
 	}
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	return error;
 }
 
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index fafeeae52675..f9930552ab54 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -151,9 +151,9 @@ dasd_ioctl_enable(struct block_device *bdev, int no, long args)
 		return -ENODEV;
 	dasd_enable_device(device);
 	/* Formatting the dasd device can change the capacity. */
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	i_size_write(bdev->bd_inode, (loff_t)get_capacity(device->gdp) << 9);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	return 0;
 }
 
@@ -184,9 +184,9 @@ dasd_ioctl_disable(struct block_device *bdev, int no, long args)
 	 * Set i_size to zero, since read, write, etc. check against this
 	 * value.
 	 */
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	i_size_write(bdev->bd_inode, 0);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	return 0;
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6e50346fb1ee..44d05e6e34db 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -265,8 +265,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 	    SLAB_CTOR_CONSTRUCTOR)
 	{
 		memset(bdev, 0, sizeof(*bdev));
-		sema_init(&bdev->bd_sem, 1);
-		sema_init(&bdev->bd_mount_sem, 1);
+		mutex_init(&bdev->bd_mutex);
+		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
@@ -574,7 +574,7 @@ static int do_open(struct block_device *bdev, struct file *file)
 	}
 	owner = disk->fops->owner;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -605,21 +605,21 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			down(&whole->bd_sem);
+			mutex_lock(&whole->bd_mutex);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
 			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
 				whole->bd_part_count--;
-				up(&whole->bd_sem);
+				mutex_unlock(&whole->bd_mutex);
 				ret = -ENXIO;
 				goto out_first;
 			}
 			kobject_get(&p->kobj);
 			bdev->bd_part = p;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
-			up(&whole->bd_sem);
+			mutex_unlock(&whole->bd_mutex);
 		}
 	} else {
 		put_disk(disk);
@@ -633,13 +633,13 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			down(&bdev->bd_contains->bd_sem);
+			mutex_lock(&bdev->bd_contains->bd_mutex);
 			bdev->bd_contains->bd_part_count++;
-			up(&bdev->bd_contains->bd_sem);
+			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
 	}
 	bdev->bd_openers++;
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	unlock_kernel();
 	return 0;
 
@@ -652,7 +652,7 @@ out_first:
 	put_disk(disk);
 	module_put(owner);
 out:
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	unlock_kernel();
 	if (ret)
 		bdput(bdev);
@@ -714,7 +714,7 @@ int blkdev_put(struct block_device *bdev)
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	lock_kernel();
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
@@ -724,9 +724,9 @@ int blkdev_put(struct block_device *bdev)
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
 	} else {
-		down(&bdev->bd_contains->bd_sem);
+		mutex_lock(&bdev->bd_contains->bd_mutex);
 		bdev->bd_contains->bd_part_count--;
-		up(&bdev->bd_contains->bd_sem);
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
 	if (!bdev->bd_openers) {
 		struct module *owner = disk->fops->owner;
@@ -746,7 +746,7 @@ int blkdev_put(struct block_device *bdev)
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
 	return ret;
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 1d3683d496f8..0d6ca7bac6c8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -201,7 +201,7 @@ int fsync_bdev(struct block_device *bdev)
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
  *
- * This takes the block device bd_mount_sem to make sure no new mounts
+ * This takes the block device bd_mount_mutex to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
@@ -210,7 +210,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 {
 	struct super_block *sb;
 
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_mount_mutex);
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
@@ -264,7 +264,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		drop_super(sb);
 	}
 
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_mount_mutex);
 }
 EXPORT_SYMBOL(thaw_bdev);
 
diff --git a/fs/super.c b/fs/super.c
index e20b5580afd5..8f9c9b3af70c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -693,9 +693,9 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_mount_mutex);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_mount_mutex);
 	if (IS_ERR(s))
 		goto out;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 128d0082522c..009ac96053fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -397,8 +397,8 @@ struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
-	struct semaphore	bd_sem;	/* open/close mutex */
-	struct semaphore	bd_mount_sem;	/* mount mutex */
+	struct mutex		bd_mutex;	/* open/close mutex */
+	struct mutex		bd_mount_mutex;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
-- 
cgit v1.2.3


From d3be915fc5e7d19a2283ad9b0fe0782a74675d0a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:29 -0800
Subject: [PATCH] sem2mutex: quota

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c            | 117 +++++++++++++++++++++++++-------------------------
 fs/ext3/super.c       |   4 +-
 fs/quota.c            |   6 +--
 fs/quota_v2.c         |   2 +-
 fs/super.c            |   4 +-
 include/linux/quota.h |   7 +--
 6 files changed, 71 insertions(+), 69 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 9376a4378988..acf07e581f8c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -103,12 +103,12 @@
  * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
  * for altering the flag i_mutex is also needed).  If operation is holding
  * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
- * dqonoff_sem.
+ * dqonoff_mutex.
  * This locking assures that:
  *   a) update/access to dquot pointers in inode is serialized
  *   b) everyone is guarded against invalidate_dquots()
  *
- * Each dquot has its dq_lock semaphore. Locked dquots might not be referenced
+ * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
  * from inodes (dquot_alloc_space() and such don't check the dq_lock).
  * Currently dquot is locked only when it is being read to memory (or space for
  * it is being allocated) on the first dqget() and when it is being released on
@@ -118,8 +118,9 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *  i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
- * i_mutex on quota files is special (it's below dqio_sem)
+ *   i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock >
+ *   dqio_mutex
+ * i_mutex on quota files is special (it's below dqio_mutex)
  */
 
 static DEFINE_SPINLOCK(dq_list_lock);
@@ -280,8 +281,8 @@ static inline void remove_inuse(struct dquot *dquot)
 
 static void wait_on_dquot(struct dquot *dquot)
 {
-	down(&dquot->dq_lock);
-	up(&dquot->dq_lock);
+	mutex_lock(&dquot->dq_lock);
+	mutex_unlock(&dquot->dq_lock);
 }
 
 #define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot))
@@ -320,8 +321,8 @@ int dquot_acquire(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dquot->dq_lock);
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dquot->dq_lock);
+	mutex_lock(&dqopt->dqio_mutex);
 	if (!test_bit(DQ_READ_B, &dquot->dq_flags))
 		ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot);
 	if (ret < 0)
@@ -342,8 +343,8 @@ int dquot_acquire(struct dquot *dquot)
 	}
 	set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
 out_iolock:
-	up(&dqopt->dqio_sem);
-	up(&dquot->dq_lock);
+	mutex_unlock(&dqopt->dqio_mutex);
+	mutex_unlock(&dquot->dq_lock);
 	return ret;
 }
 
@@ -355,7 +356,7 @@ int dquot_commit(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	spin_lock(&dq_list_lock);
 	if (!clear_dquot_dirty(dquot)) {
 		spin_unlock(&dq_list_lock);
@@ -372,7 +373,7 @@ int dquot_commit(struct dquot *dquot)
 			ret = ret2;
 	}
 out_sem:
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	return ret;
 }
 
@@ -384,11 +385,11 @@ int dquot_release(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dquot->dq_lock);
+	mutex_lock(&dquot->dq_lock);
 	/* Check whether we are not racing with some other dqget() */
 	if (atomic_read(&dquot->dq_count) > 1)
 		goto out_dqlock;
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	if (dqopt->ops[dquot->dq_type]->release_dqblk) {
 		ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot);
 		/* Write the info */
@@ -398,9 +399,9 @@ int dquot_release(struct dquot *dquot)
 			ret = ret2;
 	}
 	clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 out_dqlock:
-	up(&dquot->dq_lock);
+	mutex_unlock(&dquot->dq_lock);
 	return ret;
 }
 
@@ -464,7 +465,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
@@ -499,7 +500,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	spin_lock(&dq_list_lock);
 	dqstats.syncs++;
 	spin_unlock(&dq_list_lock);
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 }
@@ -540,7 +541,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 /*
  * Put reference to dquot
  * NOTE: If you change this function please check whether dqput_blocks() works right...
- * MUST be called with either dqptr_sem or dqonoff_sem held
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
 static void dqput(struct dquot *dquot)
 {
@@ -605,7 +606,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 		return NODQUOT;
 
 	memset((caddr_t)dquot, 0, sizeof(struct dquot));
-	sema_init(&dquot->dq_lock, 1);
+	mutex_init(&dquot->dq_lock);
 	INIT_LIST_HEAD(&dquot->dq_free);
 	INIT_LIST_HEAD(&dquot->dq_inuse);
 	INIT_HLIST_NODE(&dquot->dq_hash);
@@ -620,7 +621,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 
 /*
  * Get reference to dquot
- * MUST be called with either dqptr_sem or dqonoff_sem held
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
 static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
@@ -686,7 +687,7 @@ static int dqinit_needed(struct inode *inode, int type)
 	return 0;
 }
 
-/* This routine is guarded by dqonoff_sem semaphore */
+/* This routine is guarded by dqonoff_mutex mutex */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct list_head *p;
@@ -964,8 +965,8 @@ int dquot_initialize(struct inode *inode, int type)
 	unsigned int id = 0;
 	int cnt, ret = 0;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1028,8 +1029,8 @@ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_add:
 		inode_add_bytes(inode, number);
@@ -1077,8 +1078,8 @@ int dquot_alloc_inode(const struct inode *inode, unsigned long number)
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1121,8 +1122,8 @@ int dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_sub:
 		inode_sub_bytes(inode, number);
@@ -1157,8 +1158,8 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1197,8 +1198,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	    chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	/* Clear the arrays */
@@ -1292,9 +1293,9 @@ int dquot_commit_info(struct super_block *sb, int type)
 	int ret;
 	struct quota_info *dqopt = sb_dqopt(sb);
 
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	ret = dqopt->ops[type]->write_file_info(sb, type);
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	return ret;
 }
 
@@ -1350,7 +1351,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 	struct inode *toputinode[MAXQUOTAS];
 
 	/* We need to serialize quota_off() for device */
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -1379,7 +1380,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 		dqopt->info[cnt].dqi_bgrace = 0;
 		dqopt->ops[cnt] = NULL;
 	}
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 	/* Sync the superblock so that buffers with quota data are written to
 	 * disk (and so userspace sees correct data afterwards). */
 	if (sb->s_op->sync_fs)
@@ -1392,7 +1393,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 	 * changes done by userspace on the next quotaon() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (toputinode[cnt]) {
-			down(&dqopt->dqonoff_sem);
+			mutex_lock(&dqopt->dqonoff_mutex);
 			/* If quota was reenabled in the meantime, we have
 			 * nothing to do */
 			if (!sb_has_quota_enabled(sb, cnt)) {
@@ -1404,7 +1405,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 				mark_inode_dirty(toputinode[cnt]);
 				iput(toputinode[cnt]);
 			}
-			up(&dqopt->dqonoff_sem);
+			mutex_unlock(&dqopt->dqonoff_mutex);
 		}
 	if (sb->s_bdev)
 		invalidate_bdev(sb->s_bdev, 0);
@@ -1445,7 +1446,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	/* And now flush the block cache so that kernel sees the changes */
 	invalidate_bdev(sb->s_bdev, 0);
 	mutex_lock(&inode->i_mutex);
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	if (sb_has_quota_enabled(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
@@ -1470,17 +1471,17 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	dqopt->ops[type] = fmt->qf_ops;
 	dqopt->info[type].dqi_format = fmt;
 	INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
-		up(&dqopt->dqio_sem);
+		mutex_unlock(&dqopt->dqio_mutex);
 		goto out_file_init;
 	}
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	mutex_unlock(&inode->i_mutex);
 	set_enable_flags(dqopt, type);
 
 	add_dquot_ref(sb, type);
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 
@@ -1488,7 +1489,7 @@ out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
 out_lock:
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 	if (oldflags != -1) {
 		down_write(&dqopt->dqptr_sem);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -1576,14 +1577,14 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 {
 	struct dquot *dquot;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!(dquot = dqget(sb, id, type))) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	do_get_dqblk(dquot, di);
 	dqput(dquot);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1645,14 +1646,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 {
 	struct dquot *dquot;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!(dquot = dqget(sb, id, type))) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	do_set_dqblk(dquot, di);
 	dqput(dquot);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1661,9 +1662,9 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
   
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!sb_has_quota_enabled(sb, type)) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	mi = sb_dqopt(sb)->info + type;
@@ -1673,7 +1674,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
 	ii->dqi_valid = IIF_ALL;
 	spin_unlock(&dq_data_lock);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1682,9 +1683,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!sb_has_quota_enabled(sb, type)) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	mi = sb_dqopt(sb)->info + type;
@@ -1699,7 +1700,7 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
 	sb->dq_op->write_info(sb, type);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 56bf76586019..efa832059143 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2382,8 +2382,8 @@ static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
  * Process 1                         Process 2
  * ext3_create()                     quota_sync()
  *   journal_start()                   write_dquot()
- *   DQUOT_INIT()                        down(dqio_sem)
- *     down(dqio_sem)                    journal_start()
+ *   DQUOT_INIT()                        down(dqio_mutex)
+ *     down(dqio_mutex)                    journal_start()
  *
  */
 
diff --git a/fs/quota.c b/fs/quota.c
index ba9e0bf32f67..d6a2be826e29 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -170,10 +170,10 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 	/* Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes. We need i_mutex and so we could
-	 * not do it inside dqonoff_sem. Moreover we need to be carefull
+	 * not do it inside dqonoff_mutex. Moreover we need to be carefull
 	 * about races with quotaoff() (that is the reason why we have own
 	 * reference to inode). */
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		discard[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -182,7 +182,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 			continue;
 		discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
 	}
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (discard[cnt]) {
 			mutex_lock(&discard[cnt]->i_mutex);
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b4199ec3ece4..c519a583e681 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -394,7 +394,7 @@ static int v2_write_dquot(struct dquot *dquot)
 	ssize_t ret;
 	struct v2_disk_dqblk ddquot, empty;
 
-	/* dq_off is guarded by dqio_sem */
+	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
 		if ((ret = dq_insert_tree(dquot)) < 0) {
 			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
diff --git a/fs/super.c b/fs/super.c
index 8f9c9b3af70c..9cc6545dfa4c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -77,8 +77,8 @@ static struct super_block *alloc_super(void)
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
 		sema_init(&s->s_vfs_rename_sem,1);
-		sema_init(&s->s_dquot.dqio_sem, 1);
-		sema_init(&s->s_dquot.dqonoff_sem, 1);
+		mutex_init(&s->s_dquot.dqio_mutex);
+		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
 		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index f33aeb22c26a..8dc2d04a103f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -38,6 +38,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
@@ -215,7 +216,7 @@ struct dquot {
 	struct list_head dq_inuse;	/* List of all quotas */
 	struct list_head dq_free;	/* Free list element */
 	struct list_head dq_dirty;	/* List of dirty dquots */
-	struct semaphore dq_lock;	/* dquot IO lock */
+	struct mutex dq_lock;		/* dquot IO lock */
 	atomic_t dq_count;		/* Use count */
 	wait_queue_head_t dq_wait_unused;	/* Wait queue for dquot to become unused */
 	struct super_block *dq_sb;	/* superblock this applies to */
@@ -285,8 +286,8 @@ struct quota_format_type {
 
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
-	struct semaphore dqio_sem;		/* lock device while I/O in progress */
-	struct semaphore dqonoff_sem;		/* Serialize quotaon & quotaoff */
+	struct mutex dqio_mutex;		/* lock device while I/O in progress */
+	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
 	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
-- 
cgit v1.2.3


From d4f9af9dac4ecb75818f909168f87b441cc95653 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:30 -0800
Subject: [PATCH] sem2mutex: inotify

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Acked-by: Robert Love <rml@novell.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         |   2 +-
 fs/inotify.c       | 110 ++++++++++++++++++++++++++---------------------------
 include/linux/fs.h |   2 +-
 3 files changed, 57 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index d0be6159eb7f..603e93ef0c6f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -206,7 +206,7 @@ void inode_init_once(struct inode *inode)
 	i_size_ordered_init(inode);
 #ifdef CONFIG_INOTIFY
 	INIT_LIST_HEAD(&inode->inotify_watches);
-	sema_init(&inode->inotify_sem, 1);
+	mutex_init(&inode->inotify_mutex);
 #endif
 }
 
diff --git a/fs/inotify.c b/fs/inotify.c
index 3041503bde02..60d9653d55b7 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -56,8 +56,8 @@ int inotify_max_queued_events;
  * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
  * iprune_sem (synchronize shrink_icache_memory())
  * 	inode_lock (protects the super_block->s_inodes list)
- * 	inode->inotify_sem (protects inode->inotify_watches and watches->i_list)
- * 		inotify_dev->sem (protects inotify_device and watches->d_list)
+ * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
+ * 		inotify_dev->mutex (protects inotify_device and watches->d_list)
  */
 
 /*
@@ -79,12 +79,12 @@ int inotify_max_queued_events;
 /*
  * struct inotify_device - represents an inotify instance
  *
- * This structure is protected by the semaphore 'sem'.
+ * This structure is protected by the mutex 'mutex'.
  */
 struct inotify_device {
 	wait_queue_head_t 	wq;		/* wait queue for i/o */
 	struct idr		idr;		/* idr mapping wd -> watch */
-	struct semaphore	sem;		/* protects this bad boy */
+	struct mutex		mutex;		/* protects this bad boy */
 	struct list_head 	events;		/* list of queued events */
 	struct list_head	watches;	/* list of watches */
 	atomic_t		count;		/* reference count */
@@ -101,7 +101,7 @@ struct inotify_device {
  * device.  In read(), this list is walked and all events that can fit in the
  * buffer are returned.
  *
- * Protected by dev->sem of the device in which we are queued.
+ * Protected by dev->mutex of the device in which we are queued.
  */
 struct inotify_kernel_event {
 	struct inotify_event	event;	/* the user-space event */
@@ -112,8 +112,8 @@ struct inotify_kernel_event {
 /*
  * struct inotify_watch - represents a watch request on a specific inode
  *
- * d_list is protected by dev->sem of the associated watch->dev.
- * i_list and mask are protected by inode->inotify_sem of the associated inode.
+ * d_list is protected by dev->mutex of the associated watch->dev.
+ * i_list and mask are protected by inode->inotify_mutex of the associated inode.
  * dev, inode, and wd are never written to once the watch is created.
  */
 struct inotify_watch {
@@ -261,7 +261,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
 /*
  * inotify_dev_get_event - return the next event in the given dev's queue
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static inline struct inotify_kernel_event *
 inotify_dev_get_event(struct inotify_device *dev)
@@ -272,7 +272,7 @@ inotify_dev_get_event(struct inotify_device *dev)
 /*
  * inotify_dev_queue_event - add a new event to the given device
  *
- * Caller must hold dev->sem.  Can sleep (calls kernel_event()).
+ * Caller must hold dev->mutex.  Can sleep (calls kernel_event()).
  */
 static void inotify_dev_queue_event(struct inotify_device *dev,
 				    struct inotify_watch *watch, u32 mask,
@@ -315,7 +315,7 @@ static void inotify_dev_queue_event(struct inotify_device *dev,
 /*
  * remove_kevent - cleans up and ultimately frees the given kevent
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static void remove_kevent(struct inotify_device *dev,
 			  struct inotify_kernel_event *kevent)
@@ -332,7 +332,7 @@ static void remove_kevent(struct inotify_device *dev,
 /*
  * inotify_dev_event_dequeue - destroy an event on the given device
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static void inotify_dev_event_dequeue(struct inotify_device *dev)
 {
@@ -346,7 +346,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 /*
  * inotify_dev_get_wd - returns the next WD for use by the given dev
  *
- * Callers must hold dev->sem.  This function can sleep.
+ * Callers must hold dev->mutex.  This function can sleep.
  */
 static int inotify_dev_get_wd(struct inotify_device *dev,
 			      struct inotify_watch *watch)
@@ -383,7 +383,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 /*
  * create_watch - creates a watch on the given device.
  *
- * Callers must hold dev->sem.  Calls inotify_dev_get_wd() so may sleep.
+ * Callers must hold dev->mutex.  Calls inotify_dev_get_wd() so may sleep.
  * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
  */
 static struct inotify_watch *create_watch(struct inotify_device *dev,
@@ -434,7 +434,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 /*
  * inotify_find_dev - find the watch associated with the given inode and dev
  *
- * Callers must hold inode->inotify_sem.
+ * Callers must hold inode->inotify_mutex.
  */
 static struct inotify_watch *inode_find_dev(struct inode *inode,
 					    struct inotify_device *dev)
@@ -469,7 +469,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
  * the IN_IGNORED event to the given device signifying that the inode is no
  * longer watched.
  *
- * Callers must hold both inode->inotify_sem and dev->sem.  We drop a
+ * Callers must hold both inode->inotify_mutex and dev->mutex.  We drop a
  * reference to the inode before returning.
  *
  * The inode is not iput() so as to remain atomic.  If the inode needs to be
@@ -507,21 +507,21 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
 	if (!inotify_inode_watched(inode))
 		return;
 
-	down(&inode->inotify_sem);
+	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		u32 watch_mask = watch->mask;
 		if (watch_mask & mask) {
 			struct inotify_device *dev = watch->dev;
 			get_inotify_watch(watch);
-			down(&dev->sem);
+			mutex_lock(&dev->mutex);
 			inotify_dev_queue_event(dev, watch, mask, cookie, name);
 			if (watch_mask & IN_ONESHOT)
 				remove_watch_no_event(watch, dev);
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 			put_inotify_watch(watch);
 		}
 	}
-	up(&inode->inotify_sem);
+	mutex_unlock(&inode->inotify_mutex);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
 
@@ -626,16 +626,16 @@ void inotify_unmount_inodes(struct list_head *list)
 			iput(need_iput_tmp);
 
 		/* for each watch, send IN_UNMOUNT and then remove it */
-		down(&inode->inotify_sem);
+		mutex_lock(&inode->inotify_mutex);
 		watches = &inode->inotify_watches;
 		list_for_each_entry_safe(watch, next_w, watches, i_list) {
 			struct inotify_device *dev = watch->dev;
-			down(&dev->sem);
+			mutex_lock(&dev->mutex);
 			inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
 			remove_watch(watch, dev);
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 		}
-		up(&inode->inotify_sem);
+		mutex_unlock(&inode->inotify_mutex);
 		iput(inode);		
 
 		spin_lock(&inode_lock);
@@ -651,14 +651,14 @@ void inotify_inode_is_dead(struct inode *inode)
 {
 	struct inotify_watch *watch, *next;
 
-	down(&inode->inotify_sem);
+	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		struct inotify_device *dev = watch->dev;
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		remove_watch(watch, dev);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 	}
-	up(&inode->inotify_sem);
+	mutex_unlock(&inode->inotify_mutex);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
 
@@ -670,10 +670,10 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
 	int ret = 0;
 
 	poll_wait(file, &dev->wq, wait);
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	if (!list_empty(&dev->events))
 		ret = POLLIN | POLLRDNORM;
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	return ret;
 }
@@ -695,9 +695,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
 
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		events = !list_empty(&dev->events);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 		if (events) {
 			ret = 0;
 			break;
@@ -720,7 +720,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	while (1) {
 		struct inotify_kernel_event *kevent;
 
@@ -750,7 +750,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 		remove_kevent(dev, kevent);
 	}
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	return ret;
 }
@@ -763,37 +763,37 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	 * Destroy all of the watches on this device.  Unfortunately, not very
 	 * pretty.  We cannot do a simple iteration over the list, because we
 	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_sem before dev->sem.  The following works.
+	 * hold inode->inotify_mutex before dev->mutex.  The following works.
 	 */
 	while (1) {
 		struct inotify_watch *watch;
 		struct list_head *watches;
 		struct inode *inode;
 
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		watches = &dev->watches;
 		if (list_empty(watches)) {
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 			break;
 		}
 		watch = list_entry(watches->next, struct inotify_watch, d_list);
 		get_inotify_watch(watch);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 
 		inode = watch->inode;
-		down(&inode->inotify_sem);
-		down(&dev->sem);
+		mutex_lock(&inode->inotify_mutex);
+		mutex_lock(&dev->mutex);
 		remove_watch_no_event(watch, dev);
-		up(&dev->sem);
-		up(&inode->inotify_sem);
+		mutex_unlock(&dev->mutex);
+		mutex_unlock(&inode->inotify_mutex);
 		put_inotify_watch(watch);
 	}
 
 	/* destroy all of the events on this device */
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	while (!list_empty(&dev->events))
 		inotify_dev_event_dequeue(dev);
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	/* free this device: the put matching the get in inotify_init() */
 	put_inotify_dev(dev);
@@ -811,26 +811,26 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd)
 	struct inotify_watch *watch;
 	struct inode *inode;
 
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	watch = idr_find(&dev->idr, wd);
 	if (unlikely(!watch)) {
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 		return -EINVAL;
 	}
 	get_inotify_watch(watch);
 	inode = watch->inode;
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
-	down(&inode->inotify_sem);
-	down(&dev->sem);
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&dev->mutex);
 
 	/* make sure that we did not race */
 	watch = idr_find(&dev->idr, wd);
 	if (likely(watch))
 		remove_watch(watch, dev);
 
-	up(&dev->sem);
-	up(&inode->inotify_sem);
+	mutex_unlock(&dev->mutex);
+	mutex_unlock(&inode->inotify_mutex);
 	put_inotify_watch(watch);
 
 	return 0;
@@ -905,7 +905,7 @@ asmlinkage long sys_inotify_init(void)
 	INIT_LIST_HEAD(&dev->events);
 	INIT_LIST_HEAD(&dev->watches);
 	init_waitqueue_head(&dev->wq);
-	sema_init(&dev->sem, 1);
+	mutex_init(&dev->mutex);
 	dev->event_count = 0;
 	dev->queue_size = 0;
 	dev->max_events = inotify_max_queued_events;
@@ -960,8 +960,8 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	inode = nd.dentry->d_inode;
 	dev = filp->private_data;
 
-	down(&inode->inotify_sem);
-	down(&dev->sem);
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&dev->mutex);
 
 	if (mask & IN_MASK_ADD)
 		mask_add = 1;
@@ -998,8 +998,8 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	list_add(&watch->i_list, &inode->inotify_watches);
 	ret = watch->wd;
 out:
-	up(&dev->sem);
-	up(&inode->inotify_sem);
+	mutex_unlock(&dev->mutex);
+	mutex_unlock(&inode->inotify_mutex);
 	path_release(&nd);
 fput_and_out:
 	fput_light(filp, fput_needed);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 009ac96053fe..9ed1f36b6d54 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -509,7 +509,7 @@ struct inode {
 
 #ifdef CONFIG_INOTIFY
 	struct list_head	inotify_watches; /* watches on this inode */
-	struct semaphore	inotify_sem;	/* protects the watches list */
+	struct mutex		inotify_mutex;	/* protects the watches list */
 #endif
 
 	unsigned long		i_state;
-- 
cgit v1.2.3


From 144efe3e3e5ad57af549bf800fa4560d7c74e9fe Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:32 -0800
Subject: [PATCH] sem2mutex: eventpoll

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4284cd31eba6..f5d69f46ba9b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
 #include <linux/eventpoll.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -46,7 +47,7 @@
  * LOCKING:
  * There are three level of locking required by epoll :
  *
- * 1) epsem (semaphore)
+ * 1) epmutex (mutex)
  * 2) ep->sem (rw_semaphore)
  * 3) ep->lock (rw_lock)
  *
@@ -67,9 +68,9 @@
  * if a file has been pushed inside an epoll set and it is then
  * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
  * It is possible to drop the "ep->sem" and to use the global
- * semaphore "epsem" (together with "ep->lock") to have it working,
+ * semaphore "epmutex" (together with "ep->lock") to have it working,
  * but having "ep->sem" will make the interface more scalable.
- * Events that require holding "epsem" are very rare, while for
+ * Events that require holding "epmutex" are very rare, while for
  * normal operations the epoll private "ep->sem" will guarantee
  * a greater scalability.
  */
@@ -274,7 +275,7 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-static struct semaphore epsem;
+static struct mutex epmutex;
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
@@ -477,10 +478,10 @@ void eventpoll_release_file(struct file *file)
 	 * cleanup path, and this means that noone is using this file anymore.
 	 * The only hit might come from ep_free() but by holding the semaphore
 	 * will correctly serialize the operation. We do need to acquire
-	 * "ep->sem" after "epsem" because ep_remove() requires it when called
+	 * "ep->sem" after "epmutex" because ep_remove() requires it when called
 	 * from anywhere but ep_free().
 	 */
-	down(&epsem);
+	mutex_lock(&epmutex);
 
 	while (!list_empty(lsthead)) {
 		epi = list_entry(lsthead->next, struct epitem, fllink);
@@ -492,7 +493,7 @@ void eventpoll_release_file(struct file *file)
 		up_write(&ep->sem);
 	}
 
-	up(&epsem);
+	mutex_unlock(&epmutex);
 }
 
 
@@ -819,9 +820,9 @@ static void ep_free(struct eventpoll *ep)
 	 * We do not need to hold "ep->sem" here because the epoll file
 	 * is on the way to be removed and no one has references to it
 	 * anymore. The only hit might come from eventpoll_release_file() but
-	 * holding "epsem" is sufficent here.
+	 * holding "epmutex" is sufficent here.
 	 */
-	down(&epsem);
+	mutex_lock(&epmutex);
 
 	/*
 	 * Walks through the whole tree by unregistering poll callbacks.
@@ -843,7 +844,7 @@ static void ep_free(struct eventpoll *ep)
 		ep_remove(ep, epi);
 	}
 
-	up(&epsem);
+	mutex_unlock(&epmutex);
 }
 
 
@@ -1615,7 +1616,7 @@ static int __init eventpoll_init(void)
 {
 	int error;
 
-	init_MUTEX(&epsem);
+	mutex_init(&epmutex);
 
 	/* Initialize the structure used to perform safe poll wait head wake ups */
 	ep_poll_safewake_init(&psw);
-- 
cgit v1.2.3


From a11f3a0574a5734db3e5de38922430d005d35118 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:33 -0800
Subject: [PATCH] sem2mutex: vfs_rename_mutex

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/dir.c      |  8 ++++----
 fs/cifs/fcntl.c    |  4 ++--
 fs/cifs/file.c     |  4 ++--
 fs/cifs/inode.c    | 16 ++++++++--------
 fs/cifs/link.c     | 16 ++++++++--------
 fs/cifs/readdir.c  |  4 ++--
 fs/cifs/xattr.c    | 16 ++++++++--------
 fs/namei.c         | 12 ++++++------
 fs/super.c         |  2 +-
 include/linux/fs.h |  2 +-
 10 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fed55e3c53df..632561dd9c50 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -138,9 +138,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -317,9 +317,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if(full_path == NULL)
 		rc = -ENOMEM;
 	else if (pTcon->ses->capabilities & CAP_UNIX) {
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index a7a47bb36bf3..ec4dfe9bf5ef 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,9 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 675bd2568297..165d67426381 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -203,9 +203,9 @@ int cifs_open(struct inode *inode, struct file *file)
 		}
 	}
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 59359911f481..ff93a9f81d1c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -574,9 +574,9 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 
 	/* Unlink can be called from rename so we can not grab the sem here
 	   since we deadlock otherwise */
-/*	down(&direntry->d_sb->s_vfs_rename_sem);*/
+/*	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/
 	full_path = build_path_from_dentry(direntry);
-/*	up(&direntry->d_sb->s_vfs_rename_sem);*/
+/*	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -718,9 +718,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -803,9 +803,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -1137,9 +1137,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 			rc = 0;
 	}
 		
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 0f99aae33162..8d0da7c87c7b 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,10 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
 /* No need to check for cross device links since server will do that
    BB note DFS case in future though (when we may have to check) */
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	fromName = build_path_from_dentry(old_file);
 	toName = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if((fromName == NULL) || (toName == NULL)) {
 		rc = -ENOMEM;
 		goto cifs_hl_exit;
@@ -103,9 +103,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 
 	xid = GetXid();
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 
 	if (!full_path)
 		goto out_no_free;
@@ -164,9 +164,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		FreeXid(xid);
@@ -232,9 +232,9 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 
 /* BB would it be safe against deadlock to grab this sem 
       even though rename itself grabs the sem and calls lookup? */
-/*       down(&inode->i_sb->s_vfs_rename_sem);*/
+/*       mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/
 	full_path = build_path_from_dentry(direntry);
-/*       up(&inode->i_sb->s_vfs_rename_sem);*/
+/*       mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/
 
 	if(full_path == NULL) {
 		FreeXid(xid);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 288cc048d37f..edb3b6eb34bc 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,9 @@ static int initiate_cifs_search(const int xid, struct file *file)
 	if(pTcon == NULL)
 		return -EINVAL;
 
-	down(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		return -ENOMEM;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 777e3363c2a4..3938444d87b2 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,9 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
                                                                                      
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -116,9 +116,9 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -223,9 +223,9 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -341,9 +341,9 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/namei.c b/fs/namei.c
index 8dc2b038d5d9..c72b940797fc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -104,7 +104,7 @@
  */
 /*
  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
- * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
+ * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
  * any extra contention...
  */
 
@@ -1422,7 +1422,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 		return NULL;
 	}
 
-	down(&p1->d_inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
 
 	for (p = p1; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p2) {
@@ -1450,7 +1450,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 	mutex_unlock(&p1->d_inode->i_mutex);
 	if (p1 != p2) {
 		mutex_unlock(&p2->d_inode->i_mutex);
-		up(&p1->d_inode->i_sb->s_vfs_rename_sem);
+		mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
 	}
 }
 
@@ -2277,17 +2277,17 @@ asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
  *	a) we can get into loop creation. Check is done in is_subdir().
  *	b) race potential - two innocent renames can create a loop together.
  *	   That's where 4.4 screws up. Current fix: serialization on
- *	   sb->s_vfs_rename_sem. We might be more accurate, but that's another
+ *	   sb->s_vfs_rename_mutex. We might be more accurate, but that's another
  *	   story.
  *	c) we have to lock _three_ objects - parents and victim (if it exists).
  *	   And that - after we got ->i_mutex on parents (until then we don't know
  *	   whether the target exists).  Solution: try to be smart with locking
  *	   order for inodes.  We rely on the fact that tree topology may change
- *	   only under ->s_vfs_rename_sem _and_ that parent of the object we
+ *	   only under ->s_vfs_rename_mutex _and_ that parent of the object we
  *	   move will be locked.  Thus we can rank directories by the tree
  *	   (ancestors first) and rank all non-directories after them.
  *	   That works since everybody except rename does "lock parent, lookup,
- *	   lock child" and rename is under ->s_vfs_rename_sem.
+ *	   lock child" and rename is under ->s_vfs_rename_mutex.
  *	   HOWEVER, it relies on the assumption that any object with ->lookup()
  *	   has no more than 1 dentry.  If "hybrid" objects will ever appear,
  *	   we'd better make sure that there's no link(2) for them.
diff --git a/fs/super.c b/fs/super.c
index 9cc6545dfa4c..425861cb1caa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,7 +76,7 @@ static struct super_block *alloc_super(void)
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
-		sema_init(&s->s_vfs_rename_sem,1);
+		mutex_init(&s->s_vfs_rename_mutex);
 		mutex_init(&s->s_dquot.dqio_mutex);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ed1f36b6d54..0f71ee730127 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -847,7 +847,7 @@ struct super_block {
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
 	 */
-	struct semaphore s_vfs_rename_sem;	/* Kludge */
+	struct mutex s_vfs_rename_mutex;	/* Kludge */
 
 	/* Granuality of c/m/atime in ns.
 	   Cannot be worse than a second */
-- 
cgit v1.2.3


From f24075bd0c1cd1cc2cf86d394f960aa0401de573 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:34 -0800
Subject: [PATCH] sem2mutex: iprune

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         | 16 ++++++++--------
 fs/inotify.c       |  6 +++---
 include/linux/fs.h |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 603e93ef0c6f..25967b67903d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,14 +84,14 @@ static struct hlist_head *inode_hashtable;
 DEFINE_SPINLOCK(inode_lock);
 
 /*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
+ * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
  * icache shrinking path, and the umount path.  Without this exclusion,
  * by the time prune_icache calls iput for the inode whose pages it has
  * been invalidating, or by the time it calls clear_inode & destroy_inode
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
  */
-DECLARE_MUTEX(iprune_sem);
+DEFINE_MUTEX(iprune_mutex);
 
 /*
  * Statistics gathering..
@@ -319,7 +319,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 		/*
 		 * We can reschedule here without worrying about the list's
 		 * consistency because the per-sb list of inodes must not
-		 * change during umount anymore, and because iprune_sem keeps
+		 * change during umount anymore, and because iprune_mutex keeps
 		 * shrink_icache_memory() away.
 		 */
 		cond_resched_lock(&inode_lock);
@@ -355,14 +355,14 @@ int invalidate_inodes(struct super_block * sb)
 	int busy;
 	LIST_HEAD(throw_away);
 
-	down(&iprune_sem);
+	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
-	up(&iprune_sem);
+	mutex_unlock(&iprune_mutex);
 
 	return busy;
 }
@@ -377,7 +377,7 @@ int __invalidate_device(struct block_device *bdev)
 	if (sb) {
 		/*
 		 * no need to lock the super, get_super holds the
-		 * read semaphore so the filesystem cannot go away
+		 * read mutex so the filesystem cannot go away
 		 * under us (->put_super runs with the write lock
 		 * hold).
 		 */
@@ -423,7 +423,7 @@ static void prune_icache(int nr_to_scan)
 	int nr_scanned;
 	unsigned long reap = 0;
 
-	down(&iprune_sem);
+	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
@@ -459,7 +459,7 @@ static void prune_icache(int nr_to_scan)
 	spin_unlock(&inode_lock);
 
 	dispose_list(&freeable);
-	up(&iprune_sem);
+	mutex_unlock(&iprune_mutex);
 
 	if (current_is_kswapd())
 		mod_page_state(kswapd_inodesteal, reap);
diff --git a/fs/inotify.c b/fs/inotify.c
index 60d9653d55b7..0ee39ef591c6 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -54,7 +54,7 @@ int inotify_max_queued_events;
  * Lock ordering:
  *
  * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
- * iprune_sem (synchronize shrink_icache_memory())
+ * iprune_mutex (synchronize shrink_icache_memory())
  * 	inode_lock (protects the super_block->s_inodes list)
  * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
  * 		inotify_dev->mutex (protects inotify_device and watches->d_list)
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
  * @list: list of inodes being unmounted (sb->s_inodes)
  *
  * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay.
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  * We temporarily drop inode_lock, however, and CAN block.
  */
 void inotify_unmount_inodes(struct list_head *list)
@@ -618,7 +618,7 @@ void inotify_unmount_inodes(struct list_head *list)
 		 * We can safely drop inode_lock here because we hold
 		 * references on both inode and next_i.  Also no new inodes
 		 * will be added since the umount has begun.  Finally,
-		 * iprune_sem keeps shrink_icache_memory() away.
+		 * iprune_mutex keeps shrink_icache_memory() away.
 		 */
 		spin_unlock(&inode_lock);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0f71ee730127..8fd8d9b90b00 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1534,7 +1534,7 @@ extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int remove_suid(struct dentry *);
 extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
-extern struct semaphore iprune_sem;
+extern struct mutex iprune_mutex;
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void remove_inode_hash(struct inode *);
-- 
cgit v1.2.3


From 2c68ee754c40099c59828e59618a54726f76126a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:35 -0800
Subject: [PATCH] sem2mutex: jbd, j_checkpoint_mutex

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/checkpoint.c  | 4 ++--
 fs/jbd/journal.c     | 4 ++--
 fs/jbd/transaction.c | 4 ++--
 include/linux/jbd.h  | 7 ++++---
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 543ed543d1e5..3f5102b069db 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -85,7 +85,7 @@ void __log_wait_for_space(journal_t *journal)
 		if (journal->j_flags & JFS_ABORT)
 			return;
 		spin_unlock(&journal->j_state_lock);
-		down(&journal->j_checkpoint_sem);
+		mutex_lock(&journal->j_checkpoint_mutex);
 
 		/*
 		 * Test again, another process may have checkpointed while we
@@ -98,7 +98,7 @@ void __log_wait_for_space(journal_t *journal)
 			log_do_checkpoint(journal);
 			spin_lock(&journal->j_state_lock);
 		}
-		up(&journal->j_checkpoint_sem);
+		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 }
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e4b516ac4989..95a628d8cac8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -659,8 +659,8 @@ static journal_t * journal_init_common (void)
 	init_waitqueue_head(&journal->j_wait_checkpoint);
 	init_waitqueue_head(&journal->j_wait_commit);
 	init_waitqueue_head(&journal->j_wait_updates);
-	init_MUTEX(&journal->j_barrier);
-	init_MUTEX(&journal->j_checkpoint_sem);
+	mutex_init(&journal->j_barrier);
+	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
 	spin_lock_init(&journal->j_state_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ca917973c2c0..5fc40888f4cf 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -455,7 +455,7 @@ void journal_lock_updates(journal_t *journal)
 	 * to make sure that we serialise special journal-locked operations
 	 * too.
 	 */
-	down(&journal->j_barrier);
+	mutex_lock(&journal->j_barrier);
 }
 
 /**
@@ -470,7 +470,7 @@ void journal_unlock_updates (journal_t *journal)
 {
 	J_ASSERT(journal->j_barrier_count != 0);
 
-	up(&journal->j_barrier);
+	mutex_unlock(&journal->j_barrier);
 	spin_lock(&journal->j_state_lock);
 	--journal->j_barrier_count;
 	spin_unlock(&journal->j_state_lock);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 41ee79962bb2..2ccbfb6340ba 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -28,6 +28,7 @@
 #include <linux/journal-head.h>
 #include <linux/stddef.h>
 #include <linux/bit_spinlock.h>
+#include <linux/mutex.h>
 #include <asm/semaphore.h>
 #endif
 
@@ -575,7 +576,7 @@ struct transaction_s
  * @j_wait_checkpoint:  Wait queue to trigger checkpointing
  * @j_wait_commit: Wait queue to trigger commit
  * @j_wait_updates: Wait queue to wait for updates to complete
- * @j_checkpoint_sem: Semaphore for locking against concurrent checkpoints
+ * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints
  * @j_head: Journal head - identifies the first unused block in the journal
  * @j_tail: Journal tail - identifies the oldest still-used block in the
  *  journal.
@@ -645,7 +646,7 @@ struct journal_s
 	int			j_barrier_count;
 
 	/* The barrier lock itself */
-	struct semaphore	j_barrier;
+	struct mutex		j_barrier;
 
 	/*
 	 * Transactions: The current running transaction...
@@ -687,7 +688,7 @@ struct journal_s
 	wait_queue_head_t	j_wait_updates;
 
 	/* Semaphore for locking against concurrent checkpoints */
-	struct semaphore 	j_checkpoint_sem;
+	struct mutex	 	j_checkpoint_mutex;
 
 	/*
 	 * Journal head: identifies the first unused block in the journal.
-- 
cgit v1.2.3


From 7cf34c761db8827818a27e23c50756f8b821a9b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:36 -0800
Subject: [PATCH] sem2mutex: fs/libfs.c

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/libfs.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/libfs.c b/fs/libfs.c
index 71fd08fa4103..4fdeaceb892c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -7,6 +7,8 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
+#include <linux/mutex.h>
+
 #include <asm/uaccess.h>
 
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -530,7 +532,7 @@ struct simple_attr {
 	char set_buf[24];
 	void *data;
 	const char *fmt;	/* format for read operation */
-	struct semaphore sem;	/* protects access to these buffers */
+	struct mutex mutex;	/* protects access to these buffers */
 };
 
 /* simple_attr_open is called by an actual attribute open file operation
@@ -549,7 +551,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
 	attr->set = set;
 	attr->data = inode->u.generic_ip;
 	attr->fmt = fmt;
-	init_MUTEX(&attr->sem);
+	mutex_init(&attr->mutex);
 
 	file->private_data = attr;
 
@@ -575,7 +577,7 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
 	if (!attr->get)
 		return -EACCES;
 
-	down(&attr->sem);
+	mutex_lock(&attr->mutex);
 	if (*ppos) /* continued read */
 		size = strlen(attr->get_buf);
 	else	  /* first read */
@@ -584,7 +586,7 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
 				 (unsigned long long)attr->get(attr->data));
 
 	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
-	up(&attr->sem);
+	mutex_unlock(&attr->mutex);
 	return ret;
 }
 
@@ -602,7 +604,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 	if (!attr->set)
 		return -EACCES;
 
-	down(&attr->sem);
+	mutex_lock(&attr->mutex);
 	ret = -EFAULT;
 	size = min(sizeof(attr->set_buf) - 1, len);
 	if (copy_from_user(attr->set_buf, buf, size))
@@ -613,7 +615,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 	val = simple_strtol(attr->set_buf, NULL, 0);
 	attr->set(attr->data, val);
 out:
-	up(&attr->sem);
+	mutex_unlock(&attr->mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 0ac1759abc69fb62438c30a7e422f628a1120d67 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:37 -0800
Subject: [PATCH] sem2mutex: fs/seq_file.c

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/seq_file.c            | 10 +++++-----
 include/linux/seq_file.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7c40570b71dc..555b9ac04c25 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -37,7 +37,7 @@ int seq_open(struct file *file, struct seq_operations *op)
 		file->private_data = p;
 	}
 	memset(p, 0, sizeof(*p));
-	sema_init(&p->sem, 1);
+	mutex_init(&p->lock);
 	p->op = op;
 
 	/*
@@ -71,7 +71,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 	void *p;
 	int err = 0;
 
-	down(&m->sem);
+	mutex_lock(&m->lock);
 	/*
 	 * seq_file->op->..m_start/m_stop/m_next may do special actions
 	 * or optimisations based on the file->f_version, so we want to
@@ -164,7 +164,7 @@ Done:
 	else
 		*ppos += copied;
 	file->f_version = m->version;
-	up(&m->sem);
+	mutex_unlock(&m->lock);
 	return copied;
 Enomem:
 	err = -ENOMEM;
@@ -237,7 +237,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	long long retval = -EINVAL;
 
-	down(&m->sem);
+	mutex_lock(&m->lock);
 	m->version = file->f_version;
 	switch (origin) {
 		case 1:
@@ -260,7 +260,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
 				}
 			}
 	}
-	up(&m->sem);
+	mutex_unlock(&m->lock);
 	file->f_version = m->version;
 	return retval;
 }
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 850a974ee505..b95f6eb7254c 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -4,7 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 struct seq_operations;
 struct file;
@@ -19,7 +19,7 @@ struct seq_file {
 	size_t count;
 	loff_t index;
 	loff_t version;
-	struct semaphore sem;
+	struct mutex lock;
 	struct seq_operations *op;
 	void *private;
 };
-- 
cgit v1.2.3


From 1eb0d67007e75697a7b87e6b611be935a991395c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:40 -0800
Subject: [PATCH] sem2mutex: JFFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/inode-v23.c | 86 ++++++++++++++++++++++++++---------------------------
 fs/jffs/intrep.c    |  6 ++--
 fs/jffs/jffs_fm.c   |  2 +-
 fs/jffs/jffs_fm.h   |  5 ++--
 4 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index fc3855a1aef3..890d7ff7456d 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -42,7 +42,7 @@
 #include <linux/quotaops.h>
 #include <linux/highmem.h>
 #include <linux/vfs.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
@@ -203,7 +203,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 	fmc = c->fmc;
 
 	D3(printk (KERN_NOTICE "notify_change(): down biglock\n"));
-	down(&fmc->biglock);
+	mutex_lock(&fmc->biglock);
 
 	f = jffs_find_file(c, inode->i_ino);
 
@@ -211,7 +211,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 		printk("jffs_setattr(): Invalid inode number: %lu\n",
 		       inode->i_ino);
 		D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 		res = -EINVAL;
 		goto out;
 	});
@@ -232,7 +232,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (!(new_node = jffs_alloc_node())) {
 		D(printk("jffs_setattr(): Allocation failed!\n"));
 		D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 		res = -ENOMEM;
 		goto out;
 	}
@@ -319,7 +319,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 		D(printk("jffs_notify_change(): The write failed!\n"));
 		jffs_free_node(new_node);
 		D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		goto out;
 	}
 
@@ -327,7 +327,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	mark_inode_dirty(inode);
 	D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 out:
 	unlock_kernel();
 	return res;
@@ -461,7 +461,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto jffs_rename_end;
 	}
 	D3(printk (KERN_NOTICE "rename(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	/* Create a node and initialize as much as needed.  */
 	result = -ENOMEM;
 	if (!(node = jffs_alloc_node())) {
@@ -555,7 +555,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 jffs_rename_end:
 	D3(printk (KERN_NOTICE "rename(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_rename()  */
@@ -574,14 +574,14 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	int ddino;
 	lock_kernel();
 	D3(printk (KERN_NOTICE "readdir(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	D2(printk("jffs_readdir(): inode: 0x%p, filp: 0x%p\n", inode, filp));
 	if (filp->f_pos == 0) {
 		D3(printk("jffs_readdir(): \".\" %lu\n", inode->i_ino));
 		if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) {
 			D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -598,7 +598,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
 		if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
 			D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -617,7 +617,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		if (filldir(dirent, f->name, f->nsize,
 			    filp->f_pos , f->ino, DT_UNKNOWN) < 0) {
 		        D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -627,7 +627,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		} while(f && f->deleted);
 	}
 	D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return filp->f_pos;
 } /* jffs_readdir()  */
@@ -660,7 +660,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	});
 
 	D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	r = -ENAMETOOLONG;
 	if (len > JFFS_MAX_NAME_LEN) {
@@ -683,31 +683,31 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	if ((len == 1) && (name[0] == '.')) {
 		D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		if (!(inode = iget(dir->i_sb, d->ino))) {
 			D(printk("jffs_lookup(): . iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else if ((len == 2) && (name[0] == '.') && (name[1] == '.')) {
 	        D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
  		if (!(inode = iget(dir->i_sb, d->pino))) {
 			D(printk("jffs_lookup(): .. iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else if ((f = jffs_find_child(d, name, len))) {
 	        D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		if (!(inode = iget(dir->i_sb, f->ino))) {
 			D(printk("jffs_lookup(): iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else {
 		D3(printk("jffs_lookup(): Couldn't find the file. "
 			  "f = 0x%p, name = \"%s\", d = 0x%p, d->ino = %u\n",
@@ -717,13 +717,13 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	d_add(dentry, inode);
 	D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return NULL;
 
 jffs_lookup_end:
 	D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 jffs_lookup_end_no_biglock:
 	unlock_kernel();
@@ -753,7 +753,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 	ClearPageError(page);
 
 	D3(printk (KERN_NOTICE "readpage(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	read_len = 0;
 	result = 0;
@@ -782,7 +782,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 	kunmap(page);
 
 	D3(printk (KERN_NOTICE "readpage(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 	if (result) {
 	        SetPageError(page);
@@ -839,7 +839,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	c = dir_f->c;
 	D3(printk (KERN_NOTICE "mkdir(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	dir_mode = S_IFDIR | (mode & (S_IRWXUGO|S_ISVTX)
 			      & ~current->fs->umask);
@@ -906,7 +906,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	result = 0;
 jffs_mkdir_end:
 	D3(printk (KERN_NOTICE "mkdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_mkdir()  */
@@ -921,10 +921,10 @@ jffs_rmdir(struct inode *dir, struct dentry *dentry)
 	D3(printk("***jffs_rmdir()\n"));
 	D3(printk (KERN_NOTICE "rmdir(): down biglock\n"));
 	lock_kernel();
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	ret = jffs_remove(dir, dentry, S_IFDIR);
 	D3(printk (KERN_NOTICE "rmdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return ret;
 }
@@ -940,10 +940,10 @@ jffs_unlink(struct inode *dir, struct dentry *dentry)
 	lock_kernel();
 	D3(printk("***jffs_unlink()\n"));
 	D3(printk (KERN_NOTICE "unlink(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	ret = jffs_remove(dir, dentry, 0);
 	D3(printk (KERN_NOTICE "unlink(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return ret;
 }
@@ -1086,7 +1086,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	c = dir_f->c;
 
 	D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	/* Create and initialize a new node.  */
 	if (!(node = jffs_alloc_node())) {
@@ -1152,7 +1152,7 @@ jffs_mknod_err:
 
 jffs_mknod_end:
 	D3(printk (KERN_NOTICE "mknod(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_mknod()  */
@@ -1203,7 +1203,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 		return -ENOMEM;
 	}
 	D3(printk (KERN_NOTICE "symlink(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	node->data_offset = 0;
 	node->removed_size = 0;
@@ -1253,7 +1253,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	d_instantiate(dentry, inode);
  jffs_symlink_end:
 	D3(printk (KERN_NOTICE "symlink(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return err;
 } /* jffs_symlink()  */
@@ -1306,7 +1306,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
 		return -ENOMEM;
 	}
 	D3(printk (KERN_NOTICE "create(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	node->data_offset = 0;
 	node->removed_size = 0;
@@ -1359,7 +1359,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
 	d_instantiate(dentry, inode);
  jffs_create_end:
 	D3(printk (KERN_NOTICE "create(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return err;
 } /* jffs_create()  */
@@ -1423,7 +1423,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	thiscount = min(c->fmc->max_chunk_size - sizeof(struct jffs_raw_inode), count);
 
 	D3(printk (KERN_NOTICE "file_write(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	/* Urgh. POSIX says we can do short writes if we feel like it. 
 	 * In practice, we can't. Nothing will cope. So we loop until
@@ -1511,7 +1511,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	}
  out:
 	D3(printk (KERN_NOTICE "file_write(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 	/* Fix things in the real inode.  */
 	if (pos > inode->i_size) {
@@ -1567,7 +1567,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		return -EIO;
 	}
 	D3(printk (KERN_NOTICE "ioctl(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	switch (cmd) {
 	case JFFS_PRINT_HASH:
@@ -1609,7 +1609,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		ret = -ENOTTY;
 	}
 	D3(printk (KERN_NOTICE "ioctl(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	return ret;
 } /* jffs_ioctl()  */
 
@@ -1685,12 +1685,12 @@ jffs_read_inode(struct inode *inode)
 	}
 	c = (struct jffs_control *)inode->i_sb->s_fs_info;
 	D3(printk (KERN_NOTICE "read_inode(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	if (!(f = jffs_find_file(c, inode->i_ino))) {
 		D(printk("jffs_read_inode(): No such inode (%lu).\n",
 			 inode->i_ino));
 		D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		return;
 	}
 	inode->u.generic_ip = (void *)f;
@@ -1732,7 +1732,7 @@ jffs_read_inode(struct inode *inode)
 	}
 
 	D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 }
 
 
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index ce7b54b0b2b7..0ef207dfaf6f 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -62,7 +62,7 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/pagemap.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <asm/byteorder.h>
 #include <linux/smp_lock.h>
 #include <linux/time.h>
@@ -3416,7 +3416,7 @@ jffs_garbage_collect_thread(void *ptr)
 		D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): collecting.\n"));
 
 		D3(printk (KERN_NOTICE "g_c_thread(): down biglock\n"));
-		down(&fmc->biglock);
+		mutex_lock(&fmc->biglock);
 		
 		D1(printk("***jffs_garbage_collect_thread(): round #%u, "
 			  "fmc->dirty_size = %u\n", i++, fmc->dirty_size));
@@ -3447,6 +3447,6 @@ jffs_garbage_collect_thread(void *ptr)
 		
 	gc_end:
 		D3(printk (KERN_NOTICE "g_c_thread(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 	} /* for (;;) */
 } /* jffs_garbage_collect_thread() */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 6da13b309bd1..7d8ca1aeace2 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -139,7 +139,7 @@ jffs_build_begin(struct jffs_control *c, int unit)
 	fmc->tail = NULL;
 	fmc->head_extra = NULL;
 	fmc->tail_extra = NULL;
-	init_MUTEX(&fmc->biglock);
+	mutex_init(&fmc->biglock);
 	return fmc;
 }
 
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index f64151e74122..c794d923df2a 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -20,10 +20,11 @@
 #ifndef __LINUX_JFFS_FM_H__
 #define __LINUX_JFFS_FM_H__
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/jffs.h>
 #include <linux/mtd/mtd.h>
-#include <linux/config.h>
+#include <linux/mutex.h>
 
 /* The alignment between two nodes in the flash memory.  */
 #define JFFS_ALIGN_SIZE 4
@@ -97,7 +98,7 @@ struct jffs_fmcontrol
 	struct jffs_fm *tail;
 	struct jffs_fm *head_extra;
 	struct jffs_fm *tail_extra;
-	struct semaphore biglock;
+	struct mutex biglock;
 };
 
 /* Notice the two members head_extra and tail_extra in the jffs_control
-- 
cgit v1.2.3


From 1d5599e397dcc7c2300d200e66dad326d7dbac38 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:41 -0800
Subject: [PATCH] sem2mutex: autofs4 wq_sem

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 ++-
 fs/autofs4/inode.c    |  2 +-
 fs/autofs4/waitq.c    | 16 ++++++++--------
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 385bed09b0d8..f54c5b21f876 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -13,6 +13,7 @@
 /* Internal header file for autofs */
 
 #include <linux/auto_fs4.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
@@ -102,7 +103,7 @@ struct autofs_sb_info {
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
-	struct semaphore wq_sem;
+	struct mutex wq_mutex;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 };
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2d3082854a29..1ad98d48e550 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -269,7 +269,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
-	init_MUTEX(&sbi->wq_sem);
+	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
 	s->s_blocksize = 1024;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 394ff36ef8f1..be78e9378c03 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -178,7 +178,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOENT;
 	}
 
-	if (down_interruptible(&sbi->wq_sem)) {
+	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
 		kfree(name);
 		return -EINTR;
 	}
@@ -194,7 +194,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		/* Can't wait for an expire if there's no mount */
 		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
 			kfree(name);
-			up(&sbi->wq_sem);
+			mutex_unlock(&sbi->wq_mutex);
 			return -ENOENT;
 		}
 
@@ -202,7 +202,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if ( !wq ) {
 			kfree(name);
-			up(&sbi->wq_sem);
+			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
 		}
 
@@ -218,10 +218,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
 		atomic_set(&wq->notified, 1);
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 	} else {
 		atomic_inc(&wq->wait_ctr);
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 		kfree(name);
 		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
@@ -282,19 +282,19 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 {
 	struct autofs_wait_queue *wq, **wql;
 
-	down(&sbi->wq_sem);
+	mutex_lock(&sbi->wq_mutex);
 	for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
 		if ( wq->wait_queue_token == wait_queue_token )
 			break;
 	}
 
 	if ( !wq ) {
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 		return -EINVAL;
 	}
 
 	*wql = wq->next;	/* Unlink from chain */
-	up(&sbi->wq_sem);
+	mutex_unlock(&sbi->wq_mutex);
 	kfree(wq->name);
 	wq->name = NULL;	/* Do not wait on this queue */
 
-- 
cgit v1.2.3


From 7bf6d78dd93ccc52cd2cac5066c4b84834e4f1f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:42 -0800
Subject: [PATCH] sem2mutex: HPFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/hpfs_fn.h |  5 +++--
 fs/hpfs/inode.c   | 10 +++++-----
 fs/hpfs/namei.c   | 60 +++++++++++++++++++++++++++----------------------------
 fs/hpfs/super.c   |  4 ++--
 4 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 6628c3b352cb..4c6473ab3b34 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -9,6 +9,7 @@
 //#define DBG
 //#define DEBUG_LOCKS
 
+#include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/hpfs_fs.h>
@@ -57,8 +58,8 @@ struct hpfs_inode_info {
 	unsigned i_ea_uid : 1;	/* file's uid is stored in ea */
 	unsigned i_ea_gid : 1;	/* file's gid is stored in ea */
 	unsigned i_dirty : 1;
-	struct semaphore i_sem;
-	struct semaphore i_parent;
+	struct mutex i_mutex;
+	struct mutex i_parent_mutex;
 	loff_t **i_rddir_off;
 	struct inode vfs_inode;
 };
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index e3d17e9ea6c1..56f2c338c4d9 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -186,9 +186,9 @@ void hpfs_write_inode(struct inode *i)
 		kfree(hpfs_inode->i_rddir_off);
 		hpfs_inode->i_rddir_off = NULL;
 	}
-	down(&hpfs_inode->i_parent);
+	mutex_lock(&hpfs_inode->i_parent_mutex);
 	if (!i->i_nlink) {
-		up(&hpfs_inode->i_parent);
+		mutex_unlock(&hpfs_inode->i_parent_mutex);
 		return;
 	}
 	parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -199,14 +199,14 @@ void hpfs_write_inode(struct inode *i)
 			hpfs_read_inode(parent);
 			unlock_new_inode(parent);
 		}
-		down(&hpfs_inode->i_sem);
+		mutex_lock(&hpfs_inode->i_mutex);
 		hpfs_write_inode_nolock(i);
-		up(&hpfs_inode->i_sem);
+		mutex_unlock(&hpfs_inode->i_mutex);
 		iput(parent);
 	} else {
 		mark_inode_dirty(i);
 	}
-	up(&hpfs_inode->i_parent);
+	mutex_unlock(&hpfs_inode->i_parent_mutex);
 }
 
 void hpfs_write_inode_nolock(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 8ff8fc433fc1..a03abb12c610 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -60,7 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (dee.read_only)
 		result->i_mode &= ~0222;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail3;
@@ -101,11 +101,11 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 bail3:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail2:
 	hpfs_brelse4(&qbh0);
@@ -168,7 +168,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	result->i_data.a_ops = &hpfs_aops;
 	hpfs_i(result)->mmu_private = 0;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -193,12 +193,12 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -254,7 +254,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	result->i_blocks = 1;
 	init_special_inode(result, mode, rdev);
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -271,12 +271,12 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	brelse(bh);
 	unlock_kernel();
 	return 0;
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -333,7 +333,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	result->i_op = &page_symlink_inode_operations;
 	result->i_data.a_ops = &hpfs_symlink_aops;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -352,11 +352,11 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -382,8 +382,8 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 	lock_kernel();
 	hpfs_adjust_length((char *)name, &len);
 again:
-	down(&hpfs_i(inode)->i_parent);
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
 	if (!de)
@@ -410,8 +410,8 @@ again:
 		if (rep++)
 			break;
 
-		up(&hpfs_i(dir)->i_sem);
-		up(&hpfs_i(inode)->i_parent);
+		mutex_unlock(&hpfs_i(dir)->i_mutex);
+		mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count) > 1 ||
@@ -442,8 +442,8 @@ again:
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	up(&hpfs_i(dir)->i_sem);
-	up(&hpfs_i(inode)->i_parent);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
+	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
@@ -463,8 +463,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	hpfs_adjust_length((char *)name, &len);
 	lock_kernel();
-	down(&hpfs_i(inode)->i_parent);
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
 	if (!de)
@@ -502,8 +502,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	up(&hpfs_i(dir)->i_sem);
-	up(&hpfs_i(inode)->i_parent);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
+	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
@@ -565,12 +565,12 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	lock_kernel();
 	/* order doesn't matter, due to VFS exclusion */
-	down(&hpfs_i(i)->i_parent);
+	mutex_lock(&hpfs_i(i)->i_parent_mutex);
 	if (new_inode)
-		down(&hpfs_i(new_inode)->i_parent);
-	down(&hpfs_i(old_dir)->i_sem);
+		mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(old_dir)->i_mutex);
 	if (new_dir != old_dir)
-		down(&hpfs_i(new_dir)->i_sem);
+		mutex_lock(&hpfs_i(new_dir)->i_mutex);
 	
 	/* Erm? Moving over the empty non-busy directory is perfectly legal */
 	if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -650,11 +650,11 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	hpfs_decide_conv(i, (char *)new_name, new_len);
 end1:
 	if (old_dir != new_dir)
-		up(&hpfs_i(new_dir)->i_sem);
-	up(&hpfs_i(old_dir)->i_sem);
-	up(&hpfs_i(i)->i_parent);
+		mutex_unlock(&hpfs_i(new_dir)->i_mutex);
+	mutex_unlock(&hpfs_i(old_dir)->i_mutex);
+	mutex_unlock(&hpfs_i(i)->i_parent_mutex);
 	if (new_inode)
-		up(&hpfs_i(new_inode)->i_parent);
+		mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 63e88d7e2c3b..9488a794076e 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -181,8 +181,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
-		init_MUTEX(&ei->i_sem);
-		init_MUTEX(&ei->i_parent);
+		mutex_init(&ei->i_mutex);
+		mutex_init(&ei->i_parent_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
-- 
cgit v1.2.3


From 97461518610fb1679f67333bb699bb81136e49fe Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:42 -0800
Subject: [PATCH] convert ext3's truncate_sem to a mutex

ext3's truncate_sem is always released in the same function it's taken
and it otherwise is a mutex as well..

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/file.c            |  4 ++--
 fs/ext3/inode.c           | 14 +++++++-------
 fs/ext3/ioctl.c           |  4 ++--
 fs/ext3/super.c           |  2 +-
 include/linux/ext3_fs_i.h |  7 ++++---
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 98e78345ead9..59098ea56711 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -37,9 +37,9 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
 	{
-		down(&EXT3_I(inode)->truncate_sem);
+		mutex_lock(&EXT3_I(inode)->truncate_mutex);
 		ext3_discard_reservation(inode);
-		up(&EXT3_I(inode)->truncate_sem);
+		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
 	}
 	if (is_dx(inode) && filp->private_data)
 		ext3_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d59d5a667b0b..2c361377e0a5 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -702,7 +702,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	if (!create || err == -EIO)
 		goto cleanup;
 
-	down(&ei->truncate_sem);
+	mutex_lock(&ei->truncate_mutex);
 
 	/*
 	 * If the indirect block is missing while we are reading
@@ -723,7 +723,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		}
 		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
-			up(&ei->truncate_sem);
+			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
 			clear_buffer_new(bh_result);
@@ -759,13 +759,13 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		err = ext3_splice_branch(handle, inode, iblock, chain,
 					 partial, left);
 	/*
-	 * i_disksize growing is protected by truncate_sem.  Don't forget to
+	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
 	 * ext3_get_block() -bzzz
 	*/
 	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
 		ei->i_disksize = inode->i_size;
-	up(&ei->truncate_sem);
+	mutex_unlock(&ei->truncate_mutex);
 	if (err)
 		goto cleanup;
 
@@ -1227,7 +1227,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
  * Same applies to ext3_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_sem.
+ * lock_journal and i_truncate_mutex.
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
  * allocations fail.
@@ -2161,7 +2161,7 @@ void ext3_truncate(struct inode * inode)
 	 * From here we block out all ext3_get_block() callers who want to
 	 * modify the block allocation tree.
 	 */
-	down(&ei->truncate_sem);
+	mutex_lock(&ei->truncate_mutex);
 
 	if (n == 1) {		/* direct blocks */
 		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2228,7 +2228,7 @@ do_indirects:
 
 	ext3_discard_reservation(inode);
 
-	up(&ei->truncate_sem);
+	mutex_unlock(&ei->truncate_mutex);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	ext3_mark_inode_dirty(handle, inode);
 
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 556cd5510078..aaf1da17b6d4 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -182,7 +182,7 @@ flags_err:
 		 * need to allocate reservation structure for this inode
 		 * before set the window size
 		 */
-		down(&ei->truncate_sem);
+		mutex_lock(&ei->truncate_mutex);
 		if (!ei->i_block_alloc_info)
 			ext3_init_block_alloc_info(inode);
 
@@ -190,7 +190,7 @@ flags_err:
 			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
 			rsv->rsv_goal_size = rsv_window_size;
 		}
-		up(&ei->truncate_sem);
+		mutex_unlock(&ei->truncate_mutex);
 		return 0;
 	}
 	case EXT3_IOC_GROUP_EXTEND: {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index efa832059143..efe5b20d7a5a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 #ifdef CONFIG_EXT3_FS_XATTR
 		init_rwsem(&ei->xattr_sem);
 #endif
-		init_MUTEX(&ei->truncate_sem);
+		mutex_init(&ei->truncate_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index e71dd98dbcae..7abf90147180 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -19,6 +19,7 @@
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
 #include <linux/seqlock.h>
+#include <linux/mutex.h>
 
 struct ext3_reserve_window {
 	__u32			_rsv_start;	/* First byte reserved */
@@ -122,16 +123,16 @@ struct ext3_inode_info {
 	__u16 i_extra_isize;
 
 	/*
-	 * truncate_sem is for serialising ext3_truncate() against
+	 * truncate_mutex is for serialising ext3_truncate() against
 	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
 	 * data tree are chopped off during truncate. We can't do that in
 	 * ext3 because whenever we perform intermediate commits during
 	 * truncate, the inode and all the metadata blocks *must* be in a
 	 * consistent state which allows truncation of the orphans to restart
 	 * during recovery.  Hence we must fix the get_block-vs-truncate race
-	 * by other means, so we have truncate_sem.
+	 * by other means, so we have truncate_mutex.
 	 */
-	struct semaphore truncate_sem;
+	struct mutex truncate_mutex;
 	struct inode vfs_inode;
 };
 
-- 
cgit v1.2.3


From 8e3f90459b7052c31a9669417b837fb14aa6d313 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:43 -0800
Subject: [PATCH] sem2mutex: NCPFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/file.c           |  4 ++--
 fs/ncpfs/inode.c          |  6 +++---
 fs/ncpfs/ncplib_kernel.c  |  4 ++--
 fs/ncpfs/sock.c           | 34 +++++++++++++++++-----------------
 include/linux/ncp_fs_i.h  |  2 +-
 include/linux/ncp_fs_sb.h |  5 +++--
 6 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 973b444d6914..ebdad8f6398f 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -46,7 +46,7 @@ int ncp_make_open(struct inode *inode, int right)
 		NCP_FINFO(inode)->volNumber, 
 		NCP_FINFO(inode)->dirEntNum);
 	error = -EACCES;
-	down(&NCP_FINFO(inode)->open_sem);
+	mutex_lock(&NCP_FINFO(inode)->open_mutex);
 	if (!atomic_read(&NCP_FINFO(inode)->opened)) {
 		struct ncp_entry_info finfo;
 		int result;
@@ -93,7 +93,7 @@ int ncp_make_open(struct inode *inode, int right)
 	}
 
 out_unlock:
-	up(&NCP_FINFO(inode)->open_sem);
+	mutex_unlock(&NCP_FINFO(inode)->open_mutex);
 out:
 	return error;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d277a58bd128..0b521d3d97ce 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -63,7 +63,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
-		init_MUTEX(&ei->open_sem);
+		mutex_init(&ei->open_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
@@ -520,7 +520,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	}
 
 /*	server->lock = 0;	*/
-	init_MUTEX(&server->sem);
+	mutex_init(&server->mutex);
 	server->packet = NULL;
 /*	server->buffer_size = 0;	*/
 /*	server->conn_status = 0;	*/
@@ -557,7 +557,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	server->dentry_ttl = 0;	/* no caching */
 
 	INIT_LIST_HEAD(&server->tx.requests);
-	init_MUTEX(&server->rcv.creq_sem);
+	mutex_init(&server->rcv.creq_mutex);
 	server->tx.creq		= NULL;
 	server->rcv.creq	= NULL;
 	server->data_ready	= sock->sk->sk_data_ready;
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index c755e1848a42..d9ebf6439f59 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -291,7 +291,7 @@ ncp_make_closed(struct inode *inode)
 	int err;
 
 	err = 0;
-	down(&NCP_FINFO(inode)->open_sem);	
+	mutex_lock(&NCP_FINFO(inode)->open_mutex);
 	if (atomic_read(&NCP_FINFO(inode)->opened) == 1) {
 		atomic_set(&NCP_FINFO(inode)->opened, 0);
 		err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
@@ -301,7 +301,7 @@ ncp_make_closed(struct inode *inode)
 				NCP_FINFO(inode)->volNumber,
 				NCP_FINFO(inode)->dirEntNum, err);
 	}
-	up(&NCP_FINFO(inode)->open_sem);
+	mutex_unlock(&NCP_FINFO(inode)->open_mutex);
 	return err;
 }
 
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 6593a5ca88ba..8783eb7ec641 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -171,9 +171,9 @@ static inline void __ncp_abort_request(struct ncp_server *server, struct ncp_req
 
 static inline void ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err)
 {
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncp_abort_request(server, req, err);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static inline void __ncptcp_abort(struct ncp_server *server)
@@ -303,20 +303,20 @@ static inline void __ncp_start_request(struct ncp_server *server, struct ncp_req
 
 static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	if (!ncp_conn_valid(server)) {
-		up(&server->rcv.creq_sem);
+		mutex_unlock(&server->rcv.creq_mutex);
 		printk(KERN_ERR "ncpfs: tcp: Server died\n");
 		return -EIO;
 	}
 	if (server->tx.creq || server->rcv.creq) {
 		req->status = RQ_QUEUED;
 		list_add_tail(&req->req, &server->tx.requests);
-		up(&server->rcv.creq_sem);
+		mutex_unlock(&server->rcv.creq_mutex);
 		return 0;
 	}
 	__ncp_start_request(server, req);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 	return 0;
 }
 
@@ -400,7 +400,7 @@ void ncpdgram_rcv_proc(void *s)
 				info_server(server, 0, server->unexpected_packet.data, result);
 				continue;
 			}
-			down(&server->rcv.creq_sem);		
+			mutex_lock(&server->rcv.creq_mutex);
 			req = server->rcv.creq;
 			if (req && (req->tx_type == NCP_ALLOC_SLOT_REQUEST || (server->sequence == reply.sequence && 
 					server->connection == get_conn_number(&reply)))) {
@@ -430,11 +430,11 @@ void ncpdgram_rcv_proc(void *s)
 				     	server->rcv.creq = NULL;
 					ncp_finish_request(req, result);
 					__ncp_next_request(server);
-					up(&server->rcv.creq_sem);
+					mutex_unlock(&server->rcv.creq_mutex);
 					continue;
 				}
 			}
-			up(&server->rcv.creq_sem);
+			mutex_unlock(&server->rcv.creq_mutex);
 		}
 drop:;		
 		_recv(sock, &reply, sizeof(reply), MSG_DONTWAIT);
@@ -472,9 +472,9 @@ static void __ncpdgram_timeout_proc(struct ncp_server *server)
 void ncpdgram_timeout_proc(void *s)
 {
 	struct ncp_server *server = s;
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncpdgram_timeout_proc(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static inline void ncp_init_req(struct ncp_request_reply* req)
@@ -657,18 +657,18 @@ void ncp_tcp_rcv_proc(void *s)
 {
 	struct ncp_server *server = s;
 
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_rcv_proc(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 void ncp_tcp_tx_proc(void *s)
 {
 	struct ncp_server *server = s;
 	
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_try_send(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static int do_ncp_rpc_call(struct ncp_server *server, int size,
@@ -833,7 +833,7 @@ int ncp_disconnect(struct ncp_server *server)
 
 void ncp_lock_server(struct ncp_server *server)
 {
-	down(&server->sem);
+	mutex_lock(&server->mutex);
 	if (server->lock)
 		printk(KERN_WARNING "ncp_lock_server: was locked!\n");
 	server->lock = 1;
@@ -846,5 +846,5 @@ void ncp_unlock_server(struct ncp_server *server)
 		return;
 	}
 	server->lock = 0;
-	up(&server->sem);
+	mutex_unlock(&server->mutex);
 }
diff --git a/include/linux/ncp_fs_i.h b/include/linux/ncp_fs_i.h
index 415be1ec6f98..bdb4c8ae6924 100644
--- a/include/linux/ncp_fs_i.h
+++ b/include/linux/ncp_fs_i.h
@@ -19,7 +19,7 @@ struct ncp_inode_info {
 	__le32	DosDirNum;
 	__u8	volNumber;
 	__le32	nwattr;
-	struct semaphore open_sem;
+	struct mutex open_mutex;
 	atomic_t	opened;
 	int	access;
 	int	flags;
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index cf858eb80f0b..b089d9506283 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/ncp_mount.h>
 #include <linux/net.h>
+#include <linux/mutex.h>
 
 #ifdef __KERNEL__
 
@@ -51,7 +52,7 @@ struct ncp_server {
 				   receive replies */
 
 	int lock;		/* To prevent mismatch in protocols. */
-	struct semaphore sem;
+	struct mutex mutex;
 
 	int current_size;	/* for packet preparation */
 	int has_subfunction;
@@ -96,7 +97,7 @@ struct ncp_server {
 	struct {
 		struct work_struct tq;		/* STREAM/DGRAM: data/error ready */
 		struct ncp_request_reply* creq;	/* STREAM/DGRAM: awaiting reply from this request */
-		struct semaphore creq_sem;	/* DGRAM only: lock accesses to rcv.creq */
+		struct mutex creq_mutex;	/* DGRAM only: lock accesses to rcv.creq */
 
 		unsigned int state;		/* STREAM only: receiver state */
 		struct {
-- 
cgit v1.2.3


From 1e7933defd0fce79b2d8ecdbc7ca37fed0c188ed Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:44 -0800
Subject: [PATCH] sem2mutex: UDF

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/balloc.c           | 36 ++++++++++++++++++------------------
 fs/udf/ialloc.c           |  8 ++++----
 fs/udf/super.c            |  2 +-
 include/linux/udf_fs_sb.h |  4 ++--
 4 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 201049ac8a96..ea521f846d97 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -152,7 +152,7 @@ static void udf_bitmap_free_blocks(struct super_block * sb,
 	int bitmap_nr;
 	unsigned long overflow;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
 		(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
 	{
@@ -211,7 +211,7 @@ error_return:
 	sb->s_dirt = 1;
 	if (UDF_SB_LVIDBH(sb))
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
 
@@ -226,7 +226,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block * sb,
 	int nr_groups, bitmap_nr;
 	struct buffer_head *bh;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition))
 		goto out;
 
@@ -275,7 +275,7 @@ out:
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 	}
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
 
@@ -291,7 +291,7 @@ static int udf_bitmap_new_block(struct super_block * sb,
 	int newblock = 0;
 
 	*err = -ENOSPC;
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
 	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
@@ -364,7 +364,7 @@ repeat:
 	}
 	if (i >= (nr_groups*2))
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return newblock;
 	}
 	if (bit < sb->s_blocksize << 3)
@@ -373,7 +373,7 @@ repeat:
 		bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3);
 	if (bit >= sb->s_blocksize << 3)
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return 0;
 	}
 
@@ -387,7 +387,7 @@ got_block:
 	 */
 	if (inode && DQUOT_ALLOC_BLOCK(inode, 1))
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		*err = -EDQUOT;
 		return 0;
 	}
@@ -410,13 +410,13 @@ got_block:
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 	}
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
 
 error_return:
 	*err = -EIO;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return 0;
 }
 
@@ -433,7 +433,7 @@ static void udf_table_free_blocks(struct super_block * sb,
 	int8_t etype;
 	int i;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
 		(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
 	{
@@ -666,7 +666,7 @@ static void udf_table_free_blocks(struct super_block * sb,
 
 error_return:
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
 
@@ -692,7 +692,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
 	else
 		return 0;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	extoffset = sizeof(struct unallocSpaceEntry);
 	bloc = UDF_I_LOCATION(table);
 
@@ -736,7 +736,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 		sb->s_dirt = 1;
 	}
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
 
@@ -761,7 +761,7 @@ static int udf_table_new_block(struct super_block * sb,
 	else
 		return newblock;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
 		goal = 0;
 
@@ -811,7 +811,7 @@ static int udf_table_new_block(struct super_block * sb,
 	if (spread == 0xFFFFFFFF)
 	{
 		udf_release_data(goal_bh);
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return 0;
 	}
 
@@ -827,7 +827,7 @@ static int udf_table_new_block(struct super_block * sb,
 	if (inode && DQUOT_ALLOC_BLOCK(inode, 1))
 	{
 		udf_release_data(goal_bh);
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		*err = -EDQUOT;
 		return 0;
 	}
@@ -846,7 +846,7 @@ static int udf_table_new_block(struct super_block * sb,
 	}
 
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
 }
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c9b707b470ca..3873c672cb4c 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -42,7 +42,7 @@ void udf_free_inode(struct inode * inode)
 
 	clear_inode(inode);
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (sbi->s_lvidbh) {
 		if (S_ISDIR(inode->i_mode))
 			UDF_SB_LVIDIU(sb)->numDirs =
@@ -53,7 +53,7 @@ void udf_free_inode(struct inode * inode)
 		
 		mark_buffer_dirty(sbi->s_lvidbh);
 	}
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 
 	udf_free_blocks(sb, NULL, UDF_I_LOCATION(inode), 0, 1);
 }
@@ -83,7 +83,7 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 		return NULL;
 	}
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	UDF_I_UNIQUE(inode) = 0;
 	UDF_I_LENEXTENTS(inode) = 0;
 	UDF_I_NEXT_ALLOC_BLOCK(inode) = 0;
@@ -148,7 +148,7 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 		UDF_I_CRTIME(inode) = current_fs_time(inode->i_sb);
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 
 	if (DQUOT_ALLOC_INODE(inode))
 	{
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 368d8f81fe54..9303c50c5d55 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1515,7 +1515,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sb->s_fs_info = sbi;
 	memset(UDF_SB(sb), 0x00, sizeof(struct udf_sb_info));
 
-	init_MUTEX(&sbi->s_alloc_sem);
+	mutex_init(&sbi->s_alloc_mutex);
 
 	if (!udf_parse_options((char *)options, &uopt))
 		goto error_out;
diff --git a/include/linux/udf_fs_sb.h b/include/linux/udf_fs_sb.h
index b15ff2e99c91..80ae9ef940dc 100644
--- a/include/linux/udf_fs_sb.h
+++ b/include/linux/udf_fs_sb.h
@@ -13,7 +13,7 @@
 #ifndef _UDF_FS_SB_H
 #define _UDF_FS_SB_H 1
 
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 #pragma pack(1)
 
@@ -111,7 +111,7 @@ struct udf_sb_info
 	/* VAT inode */
 	struct inode		*s_vat;
 
-	struct semaphore	s_alloc_sem;
+	struct mutex		s_alloc_mutex;
 };
 
 #endif /* _UDF_FS_SB_H */
-- 
cgit v1.2.3


From 6b9438e1323a2be10dcc039f6321e7ca18b9459e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:48 -0800
Subject: [PATCH] fat_lock is used as a mutex, convert it to using the new
 mutex primitive

The fat code uses the fat_lock always in a mutex way (taking and releasing
the lock in the same function), the patch below converts it into the new
mutex primitive.  Please consider this patch for the code.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/fatent.c          | 6 +++---
 include/linux/msdos_fs.h | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a1a9e0451217..ab171ea8e869 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -267,19 +267,19 @@ static struct fatent_operations fat32_ops = {
 
 static inline void lock_fat(struct msdos_sb_info *sbi)
 {
-	down(&sbi->fat_lock);
+	mutex_lock(&sbi->fat_lock);
 }
 
 static inline void unlock_fat(struct msdos_sb_info *sbi)
 {
-	up(&sbi->fat_lock);
+	mutex_unlock(&sbi->fat_lock);
 }
 
 void fat_ent_access_init(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
-	init_MUTEX(&sbi->fat_lock);
+	mutex_init(&sbi->fat_lock);
 
 	switch (sbi->fat_bits) {
 	case 32:
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 8bcd9450d926..779e6a5744c7 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -184,6 +184,7 @@ struct fat_slot_info {
 #include <linux/string.h>
 #include <linux/nls.h>
 #include <linux/fs.h>
+#include <linux/mutex.h>
 
 struct fat_mount_options {
 	uid_t fs_uid;
@@ -226,7 +227,7 @@ struct msdos_sb_info {
 	unsigned long max_cluster;   /* maximum cluster number */
 	unsigned long root_cluster;  /* first cluster of the root directory */
 	unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
-	struct semaphore fat_lock;
+	struct mutex fat_lock;
 	unsigned int prev_free;      /* previously allocated cluster number */
 	unsigned int free_clusters;  /* -1 if undefined */
 	struct fat_mount_options options;
-- 
cgit v1.2.3


From a7ccf007189aa4401695e3105b00d9429f836b46 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:50 -0800
Subject: [PATCH] fs/ufs/file.c: drop insane header dependencies

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/file.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index ed69d7fe1b5d..62ad481810ef 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -23,18 +23,8 @@
  *  ext2 fs regular file handling primitives
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/ufs_fs.h>
-#include <linux/fcntl.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
-- 
cgit v1.2.3


From 78ec7b6917a938910d5ed6049c0e4ee6e6852e4e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:51 -0800
Subject: [PATCH] minix: switch to inode_inc_link_count, inode_dec_link_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/minix/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index b25bca5bdb57..5b6a4540a05b 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -6,18 +6,6 @@
 
 #include "minix.h"
 
-static inline void inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static int add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = minix_add_link(dentry, inode);
@@ -25,7 +13,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -125,7 +113,7 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -139,7 +127,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 	return add_nondir(dentry, inode);
 }
@@ -152,7 +140,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 	if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
 		goto out;
 
-	inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = minix_new_inode(dir, &err);
 	if (!inode)
@@ -163,7 +151,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 		inode->i_mode |= S_ISGID;
 	minix_set_inode(inode, 0);
 
-	inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = minix_make_empty(inode, dir);
 	if (err)
@@ -178,11 +166,11 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
-	dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -202,7 +190,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
 		goto end_unlink;
 
 	inode->i_ctime = dir->i_ctime;
-	dec_count(inode);
+	inode_dec_link_count(inode);
 end_unlink:
 	return err;
 }
@@ -215,8 +203,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
 	if (minix_empty_dir(inode)) {
 		err = minix_unlink(dir, dentry);
 		if (!err) {
-			dec_count(dir);
-			dec_count(inode);
+			inode_dec_link_count(dir);
+			inode_dec_link_count(inode);
 		}
 	}
 	return err;
@@ -257,34 +245,34 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
 		new_de = minix_find_entry(new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		minix_set_link(new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= info->s_link_max)
 				goto out_dir;
 		}
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = minix_add_link(new_dentry, old_inode);
 		if (err) {
-			dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	minix_delete_entry(old_de, old_page);
-	dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		minix_set_link(dir_de, dir_page, new_dir);
-		dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 4e907c3d45d10dc5162d283d109be425c23aeb69 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:52 -0800
Subject: [PATCH] sysv: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 7f0e4b53085e..b8a73f716fbe 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -16,18 +16,6 @@
 #include <linux/smp_lock.h>
 #include "sysv.h"
 
-static inline void inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static int add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = sysv_add_link(dentry, inode);
@@ -35,7 +23,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -124,7 +112,7 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -138,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	return add_nondir(dentry, inode);
@@ -151,7 +139,7 @@ static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 
 	if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max) 
 		goto out;
-	inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = sysv_new_inode(dir, S_IFDIR|mode);
 	err = PTR_ERR(inode);
@@ -160,7 +148,7 @@ static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 
 	sysv_set_inode(inode, 0);
 
-	inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = sysv_make_empty(inode, dir);
 	if (err)
@@ -175,11 +163,11 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
-	dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -199,7 +187,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	dec_count(inode);
+	inode_dec_link_count(inode);
 out:
 	return err;
 }
@@ -213,8 +201,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
 		err = sysv_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			dec_count(inode);
-			dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	return err;
@@ -258,34 +246,34 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
 		new_de = sysv_find_entry(new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		sysv_set_link(new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
 				goto out_dir;
 		}
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = sysv_add_link(new_dentry, old_inode);
 		if (err) {
-			dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	sysv_delete_entry(old_de, old_page);
-	dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		sysv_set_link(dir_de, dir_page, new_dir);
-		dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From a513b035eadf80bb9ce0be4c5dd9f242cfb9eecc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:53 -0800
Subject: [PATCH] ext2: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/namei.c | 54 +++++++++++++++++++-----------------------------------
 1 file changed, 19 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ad1432a2a62e..4ca824985321 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -36,22 +36,6 @@
 #include "acl.h"
 #include "xip.h"
 
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-
-static inline void ext2_inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void ext2_dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ext2_add_link(dentry, inode);
@@ -59,7 +43,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -201,7 +185,7 @@ out:
 	return err;
 
 out_fail:
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput (inode);
 	goto out;
 }
@@ -215,7 +199,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext2_inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	return ext2_add_nondir(dentry, inode);
@@ -229,7 +213,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
-	ext2_inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = ext2_new_inode (dir, S_IFDIR | mode);
 	err = PTR_ERR(inode);
@@ -243,7 +227,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	else
 		inode->i_mapping->a_ops = &ext2_aops;
 
-	ext2_inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = ext2_make_empty(inode, dir);
 	if (err)
@@ -258,11 +242,11 @@ out:
 	return err;
 
 out_fail:
-	ext2_dec_count(inode);
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	ext2_dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -282,7 +266,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	err = 0;
 out:
 	return err;
@@ -297,8 +281,8 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
 		err = ext2_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			ext2_dec_count(inode);
-			ext2_dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	return err;
@@ -338,41 +322,41 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 		new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		ext2_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		ext2_set_link(new_dir, new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		ext2_dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= EXT2_LINK_MAX)
 				goto out_dir;
 		}
-		ext2_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = ext2_add_link(new_dentry, old_inode);
 		if (err) {
-			ext2_dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			ext2_inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	/*
 	 * Like most other Unix systems, set the ctime for inodes on a
  	 * rename.
-	 * ext2_dec_count() will mark the inode dirty.
+	 * inode_dec_link_count() will mark the inode dirty.
 	 */
 	old_inode->i_ctime = CURRENT_TIME_SEC;
 
 	ext2_delete_entry (old_de, old_page);
-	ext2_dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		ext2_set_link(old_inode, dir_de, dir_page, new_dir);
-		ext2_dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 3257545e40a769cbef98cf13eabe50f00712991e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:53 -0800
Subject: [PATCH] ufs: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 2958cde7d3d6..8d5f98a01c74 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -43,18 +43,6 @@
 #define UFSD(x)
 #endif
 
-static inline void ufs_inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void ufs_dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ufs_add_link(dentry, inode);
@@ -62,7 +50,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -173,7 +161,7 @@ out:
 	return err;
 
 out_fail:
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -191,7 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 	}
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ufs_inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	error = ufs_add_nondir(dentry, inode);
@@ -208,7 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		goto out;
 
 	lock_kernel();
-	ufs_inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = ufs_new_inode(dir, S_IFDIR|mode);
 	err = PTR_ERR(inode);
@@ -218,7 +206,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	inode->i_op = &ufs_dir_inode_operations;
 	inode->i_fop = &ufs_dir_operations;
 
-	ufs_inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = ufs_make_empty(inode, dir);
 	if (err)
@@ -234,11 +222,11 @@ out:
 	return err;
 
 out_fail:
-	ufs_dec_count(inode);
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput (inode);
 out_dir:
-	ufs_dec_count(dir);
+	inode_dec_link_count(dir);
 	unlock_kernel();
 	goto out;
 }
@@ -260,7 +248,7 @@ static int ufs_unlink(struct inode * dir, struct dentry *dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	err = 0;
 out:
 	unlock_kernel();
@@ -277,8 +265,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
 		err = ufs_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			ufs_dec_count(inode);
-			ufs_dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	unlock_kernel();
@@ -319,35 +307,35 @@ static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry,
 		new_de = ufs_find_entry (new_dentry, &new_bh);
 		if (!new_de)
 			goto out_dir;
-		ufs_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		ufs_set_link(new_dir, new_de, new_bh, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		ufs_dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= UFS_LINK_MAX)
 				goto out_dir;
 		}
-		ufs_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = ufs_add_link(new_dentry, old_inode);
 		if (err) {
-			ufs_dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			ufs_inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	ufs_delete_entry (old_dir, old_de, old_bh);
 
-	ufs_dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		ufs_set_link(old_inode, dir_de, dir_bh, new_dir);
-		ufs_dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	unlock_kernel();
 	return 0;
-- 
cgit v1.2.3


From 7a673c6b8fff4b2888ef1d47462e4be79936ac5a Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Thu, 23 Mar 2006 03:00:59 -0800
Subject: [PATCH] devpts: use lib/parser.c for parsing mount options

Item from "2.6 should fix" list.

Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/devpts/inode.c | 76 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index bfb8a230bac9..14c5620b5cab 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,6 +18,7 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/devpts_fs.h>
+#include <linux/parser.h>
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
@@ -32,39 +33,60 @@ static struct {
 	umode_t mode;
 } config = {.mode = 0600};
 
+enum {
+	Opt_uid, Opt_gid, Opt_mode,
+	Opt_err
+};
+
+static match_table_t tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_mode, "mode=%o"},
+	{Opt_err, NULL}
+};
+
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
-	int setuid = 0;
-	int setgid = 0;
-	uid_t uid = 0;
-	gid_t gid = 0;
-	umode_t mode = 0600;
-	char *this_char;
-
-	this_char = NULL;
-	while ((this_char = strsep(&data, ",")) != NULL) {
-		int n;
-		char dummy;
-		if (!*this_char)
+	char *p;
+
+	config.setuid  = 0;
+	config.setgid  = 0;
+	config.uid     = 0;
+	config.gid     = 0;
+	config.mode    = 0600;
+
+	while ((p = strsep(&data, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		int option;
+
+		if (!*p)
 			continue;
-		if (sscanf(this_char, "uid=%i%c", &n, &dummy) == 1) {
-			setuid = 1;
-			uid = n;
-		} else if (sscanf(this_char, "gid=%i%c", &n, &dummy) == 1) {
-			setgid = 1;
-			gid = n;
-		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
-			mode = n & ~S_IFMT;
-		else {
-			printk("devpts: called with bogus options\n");
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			config.uid = option;
+			config.setuid = 1;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			config.gid = option;
+			config.setgid = 1;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			config.mode = option & ~S_IFMT;
+			break;
+		default:
+			printk(KERN_ERR "devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
-	config.setuid  = setuid;
-	config.setgid  = setgid;
-	config.uid     = uid;
-	config.gid     = gid;
-	config.mode    = mode;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 713729e8b993cb880225e2ced50a3f5ac05c2b3f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:01:01 -0800
Subject: [PATCH] fs/*/file.c: drop insane header dependencies

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/file.c      |  4 ----
 fs/qnx4/file.c      |  3 ---
 fs/ramfs/file-mmu.c | 11 -----------
 3 files changed, 18 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index afebbfde6968..6af10885f9d6 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -19,11 +19,7 @@
  *
  *  adfs regular file handling primitives           
  */
-#include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/time.h>
-#include <linux/stat.h>
 #include <linux/buffer_head.h>			/* for file_fsync() */
 #include <linux/adfs_fs.h>
 
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index b471315e24ef..c33963fded9e 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,10 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/config.h>
-#include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/time.h>
 #include <linux/qnx4_fs.h>
 
 /*
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2115383dcc8d..6ada2095b9ac 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -24,18 +24,7 @@
  * caches is sufficient.
  */
 
-#include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/smp_lock.h>
-#include <linux/backing-dev.h>
-#include <linux/ramfs.h>
-
-#include <asm/uaccess.h>
-#include "internal.h"
 
 struct address_space_operations ramfs_aops = {
 	.readpage	= simple_readpage,
-- 
cgit v1.2.3


From 5a6b7951bfcca7f45f44269ea87417c74558daf8 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@linux.intel.com>
Date: Thu, 23 Mar 2006 03:01:03 -0800
Subject: [PATCH] get_empty_filp tweaks, inline epoll_init_file()

Eliminate a handful of cache references by keeping current in a register
instead of reloading (helps x86) and avoiding the overhead of a function
call.  Inlining eventpoll_init_file() saves 24 bytes.  Also reorder file
initialization to make writes occur more sequentially.

Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c            |  9 ---------
 fs/file_table.c           | 10 ++++++----
 include/linux/eventpoll.h |  8 ++++++--
 3 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f5d69f46ba9b..1c2b16fda13a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -452,15 +452,6 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
 }
 
 
-/* Used to initialize the epoll bits inside the "struct file" */
-void eventpoll_init_file(struct file *file)
-{
-
-	INIT_LIST_HEAD(&file->f_ep_links);
-	spin_lock_init(&file->f_ep_lock);
-}
-
-
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
  * interface. We need to have this facility to cleanup correctly files that are
diff --git a/fs/file_table.c b/fs/file_table.c
index 44fabeaa9415..bcea1998b4de 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -88,6 +88,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
  */
 struct file *get_empty_filp(void)
 {
+	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
 
@@ -112,13 +113,14 @@ struct file *get_empty_filp(void)
 	if (security_file_alloc(f))
 		goto fail_sec;
 
-	eventpoll_init_file(f);
+	tsk = current;
+	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_set(&f->f_count, 1);
-	f->f_uid = current->fsuid;
-	f->f_gid = current->fsgid;
 	rwlock_init(&f->f_owner.lock);
+	f->f_uid = tsk->fsuid;
+	f->f_gid = tsk->fsgid;
+	eventpoll_init_file(f);
 	/* f->f_version: 0 */
-	INIT_LIST_HEAD(&f->f_u.fu_list);
 	return f;
 
 over:
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 1289f0ec4c00..1e4bdfcf83a2 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -52,7 +52,12 @@ struct file;
 #ifdef CONFIG_EPOLL
 
 /* Used to initialize the epoll bits inside the "struct file" */
-void eventpoll_init_file(struct file *file);
+static inline void eventpoll_init_file(struct file *file)
+{
+	INIT_LIST_HEAD(&file->f_ep_links);
+	spin_lock_init(&file->f_ep_lock);
+}
+
 
 /* Used to release the epoll bits inside the "struct file" */
 void eventpoll_release_file(struct file *file);
@@ -85,7 +90,6 @@ static inline void eventpoll_release(struct file *file)
 	eventpoll_release_file(file);
 }
 
-
 #else
 
 static inline void eventpoll_init_file(struct file *file) {}
-- 
cgit v1.2.3


From 394e3902c55e667945f6f1c2bdbc59842cce70f7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 23 Mar 2006 03:01:05 -0800
Subject: [PATCH] more for_each_cpu() conversions

When we stop allocating percpu memory for not-possible CPUs we must not touch
the percpu data for not-possible CPUs at all.  The correct way of doing this
is to test cpu_possible() or to use for_each_cpu().

This patch is a kernel-wide sweep of all instances of NR_CPUS.  I found very
few instances of this bug, if any.  But the patch converts lots of open-coded
test to use the preferred helper macros.

Cc: Mikael Starvik <starvik@axis.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Kyle McMartin <kyle@parisc-linux.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Christian Zankel <chris@zankel.net>
Cc: Philippe Elie <phil.el@wanadoo.fr>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Jens Axboe <axboe@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/cris/kernel/irq.c                     | 10 ++++------
 arch/frv/kernel/irq.c                      | 10 ++++------
 arch/i386/kernel/cpu/cpufreq/powernow-k8.c |  4 +---
 arch/i386/kernel/io_apic.c                 | 22 +++++++++-------------
 arch/i386/kernel/nmi.c                     |  4 ++--
 arch/i386/oprofile/nmi_int.c               |  7 ++-----
 arch/m32r/kernel/irq.c                     | 10 ++++------
 arch/mips/kernel/irq.c                     | 10 ++++------
 arch/mips/kernel/smp.c                     |  4 ++--
 arch/mips/sgi-ip27/ip27-irq.c              |  5 +----
 arch/parisc/kernel/smp.c                   | 25 ++++++++++---------------
 arch/powerpc/kernel/irq.c                  |  5 ++---
 arch/powerpc/kernel/setup-common.c         |  5 ++---
 arch/powerpc/kernel/setup_32.c             |  5 ++---
 arch/powerpc/platforms/powermac/smp.c      |  4 +---
 arch/ppc/kernel/setup.c                    | 10 ++++------
 arch/s390/kernel/smp.c                     |  4 +---
 arch/sh/kernel/irq.c                       |  5 ++---
 arch/sh/kernel/setup.c                     |  5 ++---
 arch/sh64/kernel/irq.c                     |  5 ++---
 arch/sparc/kernel/irq.c                    |  5 ++---
 arch/sparc/kernel/smp.c                    | 24 ++++++++++--------------
 arch/sparc/kernel/sun4d_irq.c              |  8 +++-----
 arch/sparc/kernel/sun4d_smp.c              |  8 +++-----
 arch/sparc/kernel/sun4m_smp.c              |  6 ++----
 arch/sparc64/kernel/irq.c                  |  4 +---
 arch/sparc64/kernel/smp.c                  | 30 ++++++++++++------------------
 arch/x86_64/kernel/irq.c                   | 21 ++++++++-------------
 arch/x86_64/kernel/nmi.c                   |  4 +---
 arch/xtensa/kernel/irq.c                   | 15 ++++++---------
 drivers/net/loopback.c                     |  4 +---
 drivers/oprofile/cpu_buffer.c              |  3 +--
 fs/xfs/linux-2.6/xfs_stats.c               |  7 ++-----
 fs/xfs/linux-2.6/xfs_sysctl.c              |  3 +--
 include/asm-alpha/mmu_context.h            |  5 ++---
 include/asm-alpha/topology.h               |  4 ++--
 include/asm-generic/percpu.h               |  7 +++----
 include/asm-powerpc/percpu.h               |  7 +++----
 include/asm-s390/percpu.h                  |  7 +++----
 include/asm-sparc64/percpu.h               |  7 +++----
 include/asm-x86_64/percpu.h                |  7 +++----
 include/linux/genhd.h                      | 14 ++++----------
 42 files changed, 137 insertions(+), 222 deletions(-)

(limited to 'fs')

diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 30deaf1b728a..b504def3e346 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -52,9 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -67,9 +66,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 27ab4c30aac6..11fa326a8f62 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -75,9 +75,8 @@ int show_interrupts(struct seq_file *p, void *v)
 	switch (i) {
 	case 0:
 		seq_printf(p, "           ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 
 		seq_putc(p, '\n');
 		break;
@@ -100,9 +99,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
 #endif
 
 		level = group->sources[ix]->level - frv_irq_levels;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index e11a09207ec8..3d5110b65cc3 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1145,9 +1145,7 @@ static int __cpuinit powernowk8_init(void)
 {
 	unsigned int i, supported_cpus = 0;
 
-	for (i=0; i<NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_cpu(i) {
 		if (check_supported_cpu(i))
 			supported_cpus++;
 	}
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index fd1c60cfd294..311b4e7266f1 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 	int i, j;
 	Dprintk("Rotating IRQs among CPUs.\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+	for_each_online_cpu(i) {
+		for (j = 0; j < NR_IRQS; j++) {
 			if (!irq_desc[j].action)
 				continue;
 			/* Is it a significant load ?  */
@@ -381,7 +381,7 @@ static void do_irq_balance(void)
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		int package_index;
 		CPU_IRQ(i) = 0;
 		if (!cpu_online(i))
@@ -422,9 +422,7 @@ static void do_irq_balance(void)
 		}
 	}
 	/* Find the least loaded processor package */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (min_cpu_irq > CPU_IRQ(i)) {
@@ -441,9 +439,7 @@ tryanothercpu:
 	 */
 	tmp_cpu_irq = 0;
 	tmp_loaded = -1;
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (max_cpu_irq <= CPU_IRQ(i)) 
@@ -619,9 +615,7 @@ static int __init balanced_irq_init(void)
 	if (smp_num_siblings > 1 && !cpus_empty(tmp))
 		physical_balance = 1;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
@@ -638,9 +632,11 @@ static int __init balanced_irq_init(void)
 	else 
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
+		irq_cpu_data[i].irq_delta = NULL;
 		kfree(irq_cpu_data[i].last_irq);
+		irq_cpu_data[i].last_irq = NULL;
 	}
 	return 0;
 }
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 1db34effdd8d..9074818b9473 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void)
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_cpu(cpu) {
 #ifdef CONFIG_SMP
 		/* Check cpu_callin_map here because that is set
 		   after the timer is started. */
@@ -510,7 +510,7 @@ void touch_nmi_watchdog (void)
 	 * Just reset the alert counters, (other CPUs might be
 	 * spinning on locks we hold):
 	 */
-	for (i = 0; i < NR_CPUS; i++)
+	for_each_cpu(i)
 		alert_counter[i] = 0;
 
 	/*
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 0493e8b8ec49..1accce50c2c7 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy)
 static void free_msrs(void)
 {
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_cpu(i) {
 		kfree(cpu_msrs[i].counters);
 		cpu_msrs[i].counters = NULL;
 		kfree(cpu_msrs[i].controls);
@@ -138,10 +138,7 @@ static int allocate_msrs(void)
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
-		if (!cpu_online(i))
-			continue;
-
+	for_each_online_cpu(i) {
 		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
 		if (!cpu_msrs[i].counters) {
 			success = 0;
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 1ce63926a3c0..a4634b06f675 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -37,9 +37,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -52,9 +51,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 7d93992e462c..3dd76b3d2967 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -68,9 +68,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -83,9 +82,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 06ed90752424..78d171bfa331 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -167,8 +167,8 @@ int smp_call_function (void (*func) (void *info), void *info, int retry,
 	mb();
 
 	/* Send a message to all other CPUs and wait for them to respond */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_online(i) && i != cpu)
+	for_each_online_cpu(i)
+		if (i != cpu)
 			core_send_ipi(i, SMP_CALL_FUNCTION);
 
 	/* Wait for response */
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 73e5e52781d8..2854ac4c9be1 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -88,12 +88,9 @@ static inline int find_level(cpuid_t *cpunum, int irq)
 {
 	int cpu, i;
 
-	for (cpu = 0; cpu <= NR_CPUS; cpu++) {
+	for_each_online_cpu(cpu) {
 		struct slice_data *si = cpu_data[cpu].data;
 
-		if (!cpu_online(cpu))
-			continue;
-
 		for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
 			if (si->level_to_irq[i] == irq) {
 				*cpunum = cpu;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 25564b7ca6bb..d6ac1c60a471 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -298,8 +298,8 @@ send_IPI_allbutself(enum ipi_message_type op)
 {
 	int i;
 	
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i) && i != smp_processor_id())
+	for_each_online_cpu(i) {
+		if (i != smp_processor_id())
 			send_IPI_single(i, op);
 	}
 }
@@ -643,14 +643,13 @@ int sys_cpus(int argc, char **argv)
 	if ( argc == 1 ){
 	
 #ifdef DUMP_MORE_STATE
-		for(i=0; i<NR_CPUS; i++) {
+		for_each_online_cpu(i) {
 			int cpus_per_line = 4;
-			if(cpu_online(i)) {
-				if (j++ % cpus_per_line)
-					printk(" %3d",i);
-				else
-					printk("\n %3d",i);
-			}
+
+			if (j++ % cpus_per_line)
+				printk(" %3d",i);
+			else
+				printk("\n %3d",i);
 		}
 		printk("\n"); 
 #else
@@ -659,9 +658,7 @@ int sys_cpus(int argc, char **argv)
 	} else if((argc==2) && !(strcmp(argv[1],"-l"))) {
 		printk("\nCPUSTATE  TASK CPUNUM CPUID HARDCPU(HPA)\n");
 #ifdef DUMP_MORE_STATE
-		for(i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:
@@ -695,9 +692,7 @@ int sys_cpus(int argc, char **argv)
 	} else if ((argc==2) && !(strcmp(argv[1],"-s"))) { 
 #ifdef DUMP_MORE_STATE
      		printk("\nCPUSTATE   CPUID\n");
-		for (i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 24dc8117b822..771a59cbd213 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,9 +135,8 @@ skip:
 #ifdef CONFIG_TAU_INT
 		if (tau_initialized){
 			seq_puts(p, "TAU: ");
-			for (j = 0; j < NR_CPUS; j++)
-				if (cpu_online(j))
-					seq_printf(p, "%10u ", tau_interrupts(j));
+			for_each_online_cpu(j)
+				seq_printf(p, "%10u ", tau_interrupts(j));
 			seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
 		}
 #endif
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index be12041c0fc5..c1d62bf11f29 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -162,9 +162,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
 		unsigned long bogosum = 0;
 		int i;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i))
-				bogosum += loops_per_jiffy;
+		for_each_online_cpu(i)
+			bogosum += loops_per_jiffy;
 		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index db72a92943bf..dc2770df25b3 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -272,9 +272,8 @@ int __init ppc_init(void)
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
 	/* register CPU devices */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			register_cpu(&cpu_devices[i], i, NULL);
+	for_each_cpu(i)
+		register_cpu(&cpu_devices[i], i, NULL);
 
 	/* call platform init */
 	if (ppc_md.init != NULL) {
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 6d64a9bf3474..1065d87fc279 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -191,9 +191,7 @@ static void smp_psurge_message_pass(int target, int msg)
 	if (num_online_cpus() < 2)
 		return;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (target == MSG_ALL
 		    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
 		    || target == i) {
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index c08ab432e958..53e9deacee82 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -168,9 +168,8 @@ int show_cpuinfo(struct seq_file *m, void *v)
 		/* Show summary information */
 #ifdef CONFIG_SMP
 		unsigned long bogosum = 0;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i))
-				bogosum += cpu_data[i].loops_per_jiffy;
+		for_each_online_cpu(i)
+			bogosum += cpu_data[i].loops_per_jiffy;
 		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP */
@@ -712,9 +711,8 @@ int __init ppc_init(void)
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
 	/* register CPU devices */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			register_cpu(&cpu_devices[i], i, NULL);
+	for_each_cpu(i)
+		register_cpu(&cpu_devices[i], i, NULL);
 
 	/* call platform init */
 	if (ppc_md.init != NULL) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7dbe00c76c6b..d52d6d211d9f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -799,9 +799,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
          */
 	print_cpu_info(&S390_lowcore.cpu_data);
 
-        for(i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i))
-			continue;
+        for_each_cpu(i) {
 		lowcore_ptr[i] = (struct _lowcore *)
 			__get_free_pages(GFP_KERNEL|GFP_DMA, 
 					sizeof(void*) == 8 ? 1 : 0);
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 6883c00728cb..b56e79632f24 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -35,9 +35,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_puts(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index a067a34e0b64..c0e79843f580 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -404,9 +404,8 @@ static int __init topology_init(void)
 {
 	int cpu_id;
 
-	for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++)
-		if (cpu_possible(cpu_id))
-			register_cpu(&cpu[cpu_id], cpu_id, NULL);
+	for_each_cpu(cpu_id)
+		register_cpu(&cpu[cpu_id], cpu_id, NULL);
 
 	return 0;
 }
diff --git a/arch/sh64/kernel/irq.c b/arch/sh64/kernel/irq.c
index 9fc2b71dbd84..d69879c0e063 100644
--- a/arch/sh64/kernel/irq.c
+++ b/arch/sh64/kernel/irq.c
@@ -53,9 +53,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_puts(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 410b9a72aba9..4c60a6ef54a9 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -184,9 +184,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++) {
-			if (cpu_online(j))
-				seq_printf(p, "%10u ",
+		for_each_online_cpu(j) {
+			seq_printf(p, "%10u ",
 				    kstat_cpu(cpu_logical_map(j)).irqs[i]);
 		}
 #endif
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index c6e721d8f477..ea5682ce7031 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -243,9 +243,8 @@ int setup_profiling_timer(unsigned int multiplier)
 		return -EINVAL;
 
 	spin_lock_irqsave(&prof_setup_lock, flags);
-	for(i = 0; i < NR_CPUS; i++) {
-		if (cpu_possible(i))
-			load_profile_irq(i, lvl14_resolution / multiplier);
+	for_each_cpu(i) {
+		load_profile_irq(i, lvl14_resolution / multiplier);
 		prof_multiplier(i) = multiplier;
 	}
 	spin_unlock_irqrestore(&prof_setup_lock, flags);
@@ -273,13 +272,12 @@ void smp_bogo(struct seq_file *m)
 {
 	int i;
 	
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i))
-			seq_printf(m,
-				   "Cpu%dBogo\t: %lu.%02lu\n", 
-				   i,
-				   cpu_data(i).udelay_val/(500000/HZ),
-				   (cpu_data(i).udelay_val/(5000/HZ))%100);
+	for_each_online_cpu(i) {
+		seq_printf(m,
+			   "Cpu%dBogo\t: %lu.%02lu\n",
+			   i,
+			   cpu_data(i).udelay_val/(500000/HZ),
+			   (cpu_data(i).udelay_val/(5000/HZ))%100);
 	}
 }
 
@@ -288,8 +286,6 @@ void smp_info(struct seq_file *m)
 	int i;
 
 	seq_printf(m, "State:\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i))
-			seq_printf(m, "CPU%d\t\t: online\n", i);
-	}
+	for_each_online_cpu(i)
+		seq_printf(m, "CPU%d\t\t: online\n", i);
 }
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 52621348a56c..cea7fc6fc6e5 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -103,11 +103,9 @@ found_it:	seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (x = 0; x < NR_CPUS; x++) {
-			if (cpu_online(x))
-				seq_printf(p, "%10u ",
-				       kstat_cpu(cpu_logical_map(x)).irqs[i]);
-		}
+		for_each_online_cpu(x)
+			seq_printf(p, "%10u ",
+			       kstat_cpu(cpu_logical_map(x)).irqs[i]);
 #endif
 		seq_printf(p, "%c %s",
 			(action->flags & SA_INTERRUPT) ? '+' : ' ',
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 4219dd2ce3a2..41bb9596be48 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -249,11 +249,9 @@ void __init smp4d_boot_cpus(void)
 	} else {
 		unsigned long bogosum = 0;
 		
-		for(i = 0; i < NR_CPUS; i++) {
-			if (cpu_isset(i, cpu_present_map)) {
-				bogosum += cpu_data(i).udelay_val;
-				smp_highest_cpu = i;
-			}
+		for_each_present_cpu(i) {
+			bogosum += cpu_data(i).udelay_val;
+			smp_highest_cpu = i;
 		}
 		SMP_PRINTK(("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n", cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100));
 		printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index fbbd8a474c4c..1dde312eebda 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -218,10 +218,8 @@ void __init smp4m_boot_cpus(void)
 		cpu_present_map = cpumask_of_cpu(smp_processor_id());
 	} else {
 		unsigned long bogosum = 0;
-		for(i = 0; i < NR_CPUS; i++) {
-			if (cpu_isset(i, cpu_present_map))
-				bogosum += cpu_data(i).udelay_val;
-		}
+		for_each_present_cpu(i)
+			bogosum += cpu_data(i).udelay_val;
 		printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
 		       cpucount + 1,
 		       bogosum/(500000/HZ),
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 8c93ba655b33..e505a4125e35 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -117,9 +117,7 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++) {
-			if (!cpu_online(j))
-				continue;
+		for_each_online_cpu(j) {
 			seq_printf(p, "%10u ",
 				   kstat_cpu(j).irqs[i]);
 		}
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 373a701c90a5..1b6e2ade1008 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -57,25 +57,21 @@ void smp_info(struct seq_file *m)
 	int i;
 	
 	seq_printf(m, "State:\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i))
-			seq_printf(m,
-				   "CPU%d:\t\tonline\n", i);
-	}
+	for_each_online_cpu(i)
+		seq_printf(m, "CPU%d:\t\tonline\n", i);
 }
 
 void smp_bogo(struct seq_file *m)
 {
 	int i;
 	
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_online(i))
-			seq_printf(m,
-				   "Cpu%dBogo\t: %lu.%02lu\n"
-				   "Cpu%dClkTck\t: %016lx\n",
-				   i, cpu_data(i).udelay_val / (500000/HZ),
-				   (cpu_data(i).udelay_val / (5000/HZ)) % 100,
-				   i, cpu_data(i).clock_tick);
+	for_each_online_cpu(i)
+		seq_printf(m,
+			   "Cpu%dBogo\t: %lu.%02lu\n"
+			   "Cpu%dClkTck\t: %016lx\n",
+			   i, cpu_data(i).udelay_val / (500000/HZ),
+			   (cpu_data(i).udelay_val / (5000/HZ)) % 100,
+			   i, cpu_data(i).clock_tick);
 }
 
 void __init smp_store_cpu_info(int id)
@@ -1282,7 +1278,7 @@ int setup_profiling_timer(unsigned int multiplier)
 		return -EINVAL;
 
 	spin_lock_irqsave(&prof_setup_lock, flags);
-	for (i = 0; i < NR_CPUS; i++)
+	for_each_cpu(i)
 		prof_multiplier(i) = multiplier;
 	current_tick_offset = (timer_tick_offset / multiplier);
 	spin_unlock_irqrestore(&prof_setup_lock, flags);
@@ -1384,10 +1380,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	unsigned long bogosum = 0;
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i))
-			bogosum += cpu_data(i).udelay_val;
-	}
+	for_each_online_cpu(i)
+		bogosum += cpu_data(i).udelay_val;
 	printk("Total of %ld processors activated "
 	       "(%lu.%02lu BogoMIPS).\n",
 	       (long) num_online_cpus(),
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 30d2a1e545fe..d8bd0b345b1e 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -38,9 +38,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -53,10 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-			seq_printf(p, "%10u ",
-				kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 
@@ -68,15 +65,13 @@ skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
 		seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
 		seq_printf(p, "LOC: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 5bf17e41cd2d..66c009e10bac 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -162,9 +162,7 @@ int __init check_nmi_watchdog (void)
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		if (!cpu_online(cpu))
-			continue;
+	for_each_online_cpu(cpu) {
 		if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
 			endflag = 1;
 			printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 4cbf6d91571f..51f9bed455fa 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -83,9 +83,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -98,9 +97,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
@@ -113,9 +111,8 @@ skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", nmi_count(j));
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", nmi_count(j));
 		seq_putc(p, '\n');
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 	}
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 690a1aae0b34..0c13795dca38 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -172,11 +172,9 @@ static struct net_device_stats *get_stats(struct net_device *dev)
 
 	memset(stats, 0, sizeof(struct net_device_stats));
 
-	for (i=0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		struct net_device_stats *lb_stats;
 
-		if (!cpu_possible(i)) 
-			continue;
 		lb_stats = &per_cpu(loopback_stats, i);
 		stats->rx_bytes   += lb_stats->rx_bytes;
 		stats->tx_bytes   += lb_stats->tx_bytes;
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 78193e4bbdb5..330d3869b41e 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -38,9 +38,8 @@ void free_cpu_buffers(void)
 {
 	int i;
  
-	for_each_online_cpu(i) {
+	for_each_online_cpu(i)
 		vfree(cpu_buffer[i].buffer);
-	}
 }
 
 int alloc_cpu_buffers(void)
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 8955720a2c6b..713e6a7505d0 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -62,18 +62,15 @@ xfs_read_xfsstats(
 		while (j < xstats[i].endpoint) {
 			val = 0;
 			/* sum over all cpus */
-			for (c = 0; c < NR_CPUS; c++) {
-				if (!cpu_possible(c)) continue;
+			for_each_cpu(c)
 				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-			}
 			len += sprintf(buffer + len, " %u", val);
 			j++;
 		}
 		buffer[len++] = '\n';
 	}
 	/* extra precision counters */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i)) continue;
+	for_each_cpu(i) {
 		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
 		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
 		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index a02564972420..7079cc837210 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -38,8 +38,7 @@ xfs_stats_clear_proc_handler(
 
 	if (!ret && write && *valp) {
 		printk("XFS Clearing xfsstats\n");
-		for (c = 0; c < NR_CPUS; c++) {
-			if (!cpu_possible(c)) continue;
+		for_each_cpu(c) {
 			preempt_disable();
 			/* save vn_active, it's a universal truth! */
 			vn_active = per_cpu(xfsstats, c).vn_active;
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 6f92482cc96c..0c017fc181c1 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -231,9 +231,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_online(i))
-			mm->context[i] = 0;
+	for_each_online_cpu(i)
+		mm->context[i] = 0;
 	if (tsk != current)
 		task_thread_info(tsk)->pcb.ptbr
 		  = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
diff --git a/include/asm-alpha/topology.h b/include/asm-alpha/topology.h
index eb740e280d9c..420ccde6b916 100644
--- a/include/asm-alpha/topology.h
+++ b/include/asm-alpha/topology.h
@@ -27,8 +27,8 @@ static inline cpumask_t node_to_cpumask(int node)
 	cpumask_t node_cpu_mask = CPU_MASK_NONE;
 	int cpu;
 
-	for(cpu = 0; cpu < NR_CPUS; cpu++) {
-		if (cpu_online(cpu) && (cpu_to_node(cpu) == node))
+	for_each_online_cpu(cpu) {
+		if (cpu_to_node(cpu) == node)
 			cpu_set(cpu, node_cpu_mask);
 	}
 
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 9044aeb37828..78cf45547e31 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -19,10 +19,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for (__i = 0; __i < NR_CPUS; __i++)			\
-		if (cpu_possible(__i))				\
-			memcpy((pcpudst)+__per_cpu_offset[__i],	\
-			       (src), (size));			\
+	for_each_cpu(__i)					\
+		memcpy((pcpudst)+__per_cpu_offset[__i],		\
+		       (src), (size));				\
 } while (0)
 #else /* ! SMP */
 
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index e31922c50e53..464301cd0d03 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -27,10 +27,9 @@
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for (__i = 0; __i < NR_CPUS; __i++)			\
-		if (cpu_possible(__i))				\
-			memcpy((pcpudst)+__per_cpu_offset(__i),	\
-			       (src), (size));			\
+	for_each_cpu(__i)					\
+		memcpy((pcpudst)+__per_cpu_offset(__i),		\
+		       (src), (size));				\
 } while (0)
 
 extern void setup_per_cpu_areas(void);
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 123fcaca295e..e10ed87094f0 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -46,10 +46,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for (__i = 0; __i < NR_CPUS; __i++)			\
-		if (cpu_possible(__i))				\
-			memcpy((pcpudst)+__per_cpu_offset[__i],	\
-			       (src), (size));			\
+	for_each_cpu(__i)					\
+		memcpy((pcpudst)+__per_cpu_offset[__i],		\
+		       (src), (size));				\
 } while (0)
 
 #else /* ! SMP */
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index aea4e51e7cd1..82032e159a76 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -26,10 +26,9 @@ register unsigned long __local_per_cpu_offset asm("g5");
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for (__i = 0; __i < NR_CPUS; __i++)			\
-		if (cpu_possible(__i))				\
-			memcpy((pcpudst)+__per_cpu_offset(__i),	\
-			       (src), (size));			\
+	for_each_cpu(__i)					\
+		memcpy((pcpudst)+__per_cpu_offset(__i),		\
+		       (src), (size));				\
 } while (0)
 #else /* ! SMP */
 
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 29a6b0408f75..4405b4adeaba 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -26,10 +26,9 @@
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for (__i = 0; __i < NR_CPUS; __i++)			\
-		if (cpu_possible(__i))				\
-			memcpy((pcpudst)+__per_cpu_offset(__i),	\
-			       (src), (size));			\
+	for_each_cpu(__i)					\
+		memcpy((pcpudst)+__per_cpu_offset(__i),		\
+		       (src), (size));				\
 } while (0)
 
 extern void setup_per_cpu_areas(void);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index eef5ccdcd731..fd647fde5ec1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -149,22 +149,16 @@ struct disk_attribute {
 ({									\
 	typeof(gendiskp->dkstats->field) res = 0;			\
 	int i;								\
-	for (i=0; i < NR_CPUS; i++) {					\
-		if (!cpu_possible(i))					\
-			continue;					\
+	for_each_cpu(i)							\
 		res += per_cpu_ptr(gendiskp->dkstats, i)->field;	\
-	}								\
 	res;								\
 })
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 	int i;
-	for (i=0; i < NR_CPUS; i++) {
-		if (cpu_possible(i)) {
-			memset(per_cpu_ptr(gendiskp->dkstats, i), value,	
-					sizeof (struct disk_stats));
-		}
-	}
+	for_each_cpu(i)
+		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
+				sizeof (struct disk_stats));
 }		
 				
 #else
-- 
cgit v1.2.3


From b0e6e962992b76580f4900b166a337bad7c1e81b Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Thu, 23 Mar 2006 03:01:08 -0800
Subject: [PATCH] reduce size of bio mempools

The biovec default mempool limit of 256 entries results in over 3MB of RAM
being permanently pinned, even on systems with only 128MB of RAM.  Since
mempool tries to allocate from the system pool first, it makes sense to
reduce the size of the mempool fallbacks to a more reasonable limit of 1-5
entries -- enough for the system to be able to make progress even under
load.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Jens Axboe <axboe@suse.de>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 1f3bb501c262..8f1d2e815c96 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1243,11 +1243,11 @@ static int __init init_bio(void)
 		scale = 4;
 
 	/*
-	 * scale number of entries
+	 * Limit number of entries reserved -- mempools are only used when
+	 * the system is completely unable to allocate memory, so we only
+	 * need enough to make progress.
 	 */
-	bvec_pool_entries = megabytes * 2;
-	if (bvec_pool_entries > 256)
-		bvec_pool_entries = 256;
+	bvec_pool_entries = 1 + scale;
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
 	if (!fs_bio_set)
-- 
cgit v1.2.3


From a0646a1f04f1ec4c7514e5b00496b54e054a2c99 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:53:03 +0000
Subject: NTFS: Add support for sparse files which have a compression unit of
 0.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/attrib.c  | 25 +++++++++++++-------
 fs/ntfs/inode.c   | 68 ++++++++++++++++++++++++++++++++++++-------------------
 fs/ntfs/layout.h  | 19 ++++++++++------
 4 files changed, 75 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 548d9059a697..b5774233ef1d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -31,6 +31,7 @@ ToDo/Notes:
 	- Fix comparison of $MFT and $MFTMirr to not bail out when there are
 	  unused, invalid mft records which are the same in both $MFT and
 	  $MFTMirr.
+	- Add support for sparse files which have a compression unit of 0.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index a92b9e9db91d..7a568eb7d80f 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1695,7 +1695,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 			a->data.non_resident.initialized_size =
 			cpu_to_sle64(attr_size);
 	if (NInoSparse(ni) || NInoCompressed(ni)) {
-		a->data.non_resident.compression_unit = 4;
+		a->data.non_resident.compression_unit = 0;
+		if (NInoCompressed(ni) || vol->major_ver < 3)
+			a->data.non_resident.compression_unit = 4;
 		a->data.non_resident.compressed_size =
 				a->data.non_resident.allocated_size;
 	} else
@@ -1714,13 +1716,20 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	ni->allocated_size = new_size;
 	if (NInoSparse(ni) || NInoCompressed(ni)) {
 		ni->itype.compressed.size = ni->allocated_size;
-		ni->itype.compressed.block_size = 1U <<
-				(a->data.non_resident.compression_unit +
-				vol->cluster_size_bits);
-		ni->itype.compressed.block_size_bits =
-				ffs(ni->itype.compressed.block_size) - 1;
-		ni->itype.compressed.block_clusters = 1U <<
-				a->data.non_resident.compression_unit;
+		if (a->data.non_resident.compression_unit) {
+			ni->itype.compressed.block_size = 1U << (a->data.
+					non_resident.compression_unit +
+					vol->cluster_size_bits);
+			ni->itype.compressed.block_size_bits =
+					ffs(ni->itype.compressed.block_size) -
+					1;
+			ni->itype.compressed.block_clusters = 1U <<
+					a->data.non_resident.compression_unit;
+		} else {
+			ni->itype.compressed.block_size = 0;
+			ni->itype.compressed.block_size_bits = 0;
+			ni->itype.compressed.block_clusters = 0;
+		}
 		vi->i_blocks = ni->itype.compressed.size >> 9;
 	} else
 		vi->i_blocks = ni->allocated_size >> 9;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 55263b7de9c0..ae341922f423 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
 /**
  * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -24,8 +24,10 @@
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
 #include <linux/mount.h>
+#include <linux/mutex.h>
 
 #include "aops.h"
+#include "attrib.h"
 #include "dir.h"
 #include "debug.h"
 #include "inode.h"
@@ -1064,10 +1066,10 @@ skip_large_dir_stuff:
 		if (a->non_resident) {
 			NInoSetNonResident(ni);
 			if (NInoCompressed(ni) || NInoSparse(ni)) {
-				if (a->data.non_resident.compression_unit !=
-						4) {
+				if (NInoCompressed(ni) && a->data.non_resident.
+						compression_unit != 4) {
 					ntfs_error(vi->i_sb, "Found "
-							"nonstandard "
+							"non-standard "
 							"compression unit (%u "
 							"instead of 4).  "
 							"Cannot handle this.",
@@ -1076,16 +1078,26 @@ skip_large_dir_stuff:
 					err = -EOPNOTSUPP;
 					goto unm_err_out;
 				}
-				ni->itype.compressed.block_clusters = 1U <<
-						a->data.non_resident.
-						compression_unit;
-				ni->itype.compressed.block_size = 1U << (
-						a->data.non_resident.
-						compression_unit +
-						vol->cluster_size_bits);
-				ni->itype.compressed.block_size_bits = ffs(
-						ni->itype.compressed.
-						block_size) - 1;
+				if (a->data.non_resident.compression_unit) {
+					ni->itype.compressed.block_size = 1U <<
+							(a->data.non_resident.
+							compression_unit +
+							vol->cluster_size_bits);
+					ni->itype.compressed.block_size_bits =
+							ffs(ni->itype.
+							compressed.
+							block_size) - 1;
+					ni->itype.compressed.block_clusters =
+							1U << a->data.
+							non_resident.
+							compression_unit;
+				} else {
+					ni->itype.compressed.block_size = 0;
+					ni->itype.compressed.block_size_bits =
+							0;
+					ni->itype.compressed.block_clusters =
+							0;
+				}
 				ni->itype.compressed.size = sle64_to_cpu(
 						a->data.non_resident.
 						compressed_size);
@@ -1338,8 +1350,9 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			goto unm_err_out;
 		}
 		if (NInoCompressed(ni) || NInoSparse(ni)) {
-			if (a->data.non_resident.compression_unit != 4) {
-				ntfs_error(vi->i_sb, "Found nonstandard "
+			if (NInoCompressed(ni) && a->data.non_resident.
+					compression_unit != 4) {
+				ntfs_error(vi->i_sb, "Found non-standard "
 						"compression unit (%u instead "
 						"of 4).  Cannot handle this.",
 						a->data.non_resident.
@@ -1347,13 +1360,22 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 				err = -EOPNOTSUPP;
 				goto unm_err_out;
 			}
-			ni->itype.compressed.block_clusters = 1U <<
-					a->data.non_resident.compression_unit;
-			ni->itype.compressed.block_size = 1U << (
-					a->data.non_resident.compression_unit +
-					vol->cluster_size_bits);
-			ni->itype.compressed.block_size_bits = ffs(
-					ni->itype.compressed.block_size) - 1;
+			if (a->data.non_resident.compression_unit) {
+				ni->itype.compressed.block_size = 1U <<
+						(a->data.non_resident.
+						compression_unit +
+						vol->cluster_size_bits);
+				ni->itype.compressed.block_size_bits =
+						ffs(ni->itype.compressed.
+						block_size) - 1;
+				ni->itype.compressed.block_clusters = 1U <<
+						a->data.non_resident.
+						compression_unit;
+			} else {
+				ni->itype.compressed.block_size = 0;
+				ni->itype.compressed.block_size_bits = 0;
+				ni->itype.compressed.block_clusters = 0;
+			}
 			ni->itype.compressed.size = sle64_to_cpu(
 					a->data.non_resident.compressed_size);
 		}
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index bb408d4dcbb0..f4283e120709 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -769,7 +769,7 @@ typedef struct {
 				compressed.  (This effectively limits the
 				compression unit size to be a power of two
 				clusters.)  WinNT4 only uses a value of 4.
-				Sparse files also have this set to 4. */
+				Sparse files have this set to 0 on XPSP2. */
 /* 35*/			u8 reserved[5];		/* Align to 8-byte boundary. */
 /* The sizes below are only used when lowest_vcn is zero, as otherwise it would
    be difficult to keep them up-to-date.*/
@@ -1076,16 +1076,21 @@ typedef struct {
 /* 20*/	sle64 last_access_time;		/* Time this mft record was last
 					   accessed. */
 /* 28*/	sle64 allocated_size;		/* Byte size of on-disk allocated space
-					   for the data attribute.  So for
-					   normal $DATA, this is the
+					   for the unnamed data attribute.  So
+					   for normal $DATA, this is the
 					   allocated_size from the unnamed
 					   $DATA attribute and for compressed
 					   and/or sparse $DATA, this is the
 					   compressed_size from the unnamed
-					   $DATA attribute.  NOTE: This is a
-					   multiple of the cluster size. */
-/* 30*/	sle64 data_size;		/* Byte size of actual data in data
-					   attribute. */
+					   $DATA attribute.  For a directory or
+					   other inode without an unnamed $DATA
+					   attribute, this is always 0.  NOTE:
+					   This is a multiple of the cluster
+					   size. */
+/* 30*/	sle64 data_size;		/* Byte size of actual data in unnamed
+					   data attribute.  For a directory or
+					   other inode without an unnamed $DATA
+					   attribute, this is always 0. */
 /* 38*/	FILE_ATTR_FLAGS file_attributes;	/* Flags describing the file. */
 /* 3c*/	union {
 	/* 3c*/	struct {
-- 
cgit v1.2.3


From f95c4018fd4b0bdef9b1bcb4eac7056e2a07282a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:59:32 +0000
Subject: NTFS: Remove all the make_bad_inode() calls.  This should only be
 called       from read inode and new inode code paths.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/aops.c    |  1 -
 fs/ntfs/attrib.c  |  6 ------
 fs/ntfs/file.c    | 13 +------------
 fs/ntfs/mft.c     |  1 +
 fs/ntfs/mft.h     |  5 +----
 6 files changed, 5 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b5774233ef1d..13e70d4e2fdb 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -32,6 +32,8 @@ ToDo/Notes:
 	  unused, invalid mft records which are the same in both $MFT and
 	  $MFTMirr.
 	- Add support for sparse files which have a compression unit of 0.
+	- Remove all the make_bad_inode() calls.  This should only be called
+	  from read inode and new inode code paths.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7c7e313620fa..1cf105b99202 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1530,7 +1530,6 @@ err_out:
 				"error %i.", err);
 		SetPageError(page);
 		NVolSetErrors(ni->vol);
-		make_bad_inode(vi);
 	}
 	unlock_page(page);
 	if (ctx)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7a568eb7d80f..1663f5c3c6aa 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2438,16 +2438,12 @@ undo_alloc:
 				"chkdsk to recover.", IS_ERR(m) ?
 				"restore attribute search context" :
 				"truncate attribute runlist");
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
 		NVolSetErrors(vol);
 	} else if (mp_rebuilt) {
 		if (ntfs_attr_record_resize(m, a, attr_len)) {
 			ntfs_error(vol->sb, "Failed to restore attribute "
 					"record in error code path.  Run "
 					"chkdsk to recover.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
@@ -2460,8 +2456,6 @@ undo_alloc:
 						"mapping pairs array in error "
 						"code path.  Run chkdsk to "
 						"recover.");
-				make_bad_inode(vi);
-				make_bad_inode(VFS_I(base_ni));
 				NVolSetErrors(vol);
 			}
 			flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 2e5ba0c535d9..f5d057e4acc2 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1207,8 +1207,6 @@ rl_not_mapped_enoent:
 					"attribute runlist in error code "
 					"path.  Run chkdsk to recover the "
 					"lost cluster.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			status.runlist_merged = 0;
@@ -1239,8 +1237,6 @@ rl_not_mapped_enoent:
 			ntfs_error(vol->sb, "Failed to restore attribute "
 					"record in error code path.  Run "
 					"chkdsk to recover.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			if (ntfs_mapping_pairs_build(vol, (u8*)a +
@@ -1253,8 +1249,6 @@ rl_not_mapped_enoent:
 						"mapping pairs array in error "
 						"code path.  Run chkdsk to "
 						"recover.");
-				make_bad_inode(vi);
-				make_bad_inode(VFS_I(base_ni));
 				NVolSetErrors(vol);
 			}
 			flush_dcache_mft_record_page(ctx->ntfs_ino);
@@ -1623,11 +1617,8 @@ err_out:
 		unmap_mft_record(base_ni);
 	ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
 			"code %i).", err);
-	if (err != -ENOMEM) {
+	if (err != -ENOMEM)
 		NVolSetErrors(ni->vol);
-		make_bad_inode(VFS_I(base_ni));
-		make_bad_inode(vi);
-	}
 	return err;
 }
 
@@ -1802,8 +1793,6 @@ err_out:
 		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
 				"with error %i.", err);
 		NVolSetErrors(ni->vol);
-		make_bad_inode(VFS_I(base_ni));
-		make_bad_inode(vi);
 	}
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 6499aafc2258..7254391b0e50 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -93,6 +93,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 				"Run chkdsk.", ni->mft_no);
 		ntfs_unmap_page(page);
 		page = ERR_PTR(-EIO);
+		NVolSetErrors(vol);
 	}
 err_out:
 	ni->page = NULL;
diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h
index 407de2cef1d6..639cd1bab08b 100644
--- a/fs/ntfs/mft.h
+++ b/fs/ntfs/mft.h
@@ -97,10 +97,7 @@ extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync);
  * uptodate.
  *
  * On success, clean the mft record and return 0.  On error, leave the mft
- * record dirty and return -errno.  The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode.  We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
  */
 static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
 {
-- 
cgit v1.2.3


From d4faf636d6f8d8940174e38317161eb08a7a57ec Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:05:11 +0000
Subject: NTFS: Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to
 maximum       allowed by NTFS, i.e. 255 Unicode characters, not including the
       terminating NULL (which is not stored on disk).

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/unistr.c  | 51 ++++++++++++++++++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 13e70d4e2fdb..41d0381be6ea 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -34,6 +34,9 @@ ToDo/Notes:
 	- Add support for sparse files which have a compression unit of 0.
 	- Remove all the make_bad_inode() calls.  This should only be called
 	  from read inode and new inode code paths.
+	- Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to maximum
+	  allowed by NTFS, i.e. 255 Unicode characters, not including the
+	  terminating NULL (which is not stored on disk).
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 0ea887fc859c..b123c0fa6bf6 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -1,7 +1,7 @@
 /*
  * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -19,6 +19,8 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/slab.h>
+
 #include "types.h"
 #include "debug.h"
 #include "ntfs.h"
@@ -242,7 +244,7 @@ int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
  * map dictates, into a little endian, 2-byte Unicode string.
  *
  * This function allocates the string and the caller is responsible for
- * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it.
+ * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
  *
  * On success the function returns the number of Unicode characters written to
  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
@@ -262,37 +264,48 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 	wchar_t wc;
 	int i, o, wc_len;
 
-	/* We don't trust outside sources. */
-	if (ins) {
+	/* We do not trust outside sources. */
+	if (likely(ins)) {
 		ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
-		if (ucs) {
+		if (likely(ucs)) {
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,
 						&wc);
-				if (wc_len >= 0) {
-					if (wc) {
+				if (likely(wc_len >= 0 &&
+						o < NTFS_MAX_NAME_LEN)) {
+					if (likely(wc)) {
 						ucs[o++] = cpu_to_le16(wc);
 						continue;
-					} /* else (!wc) */
+					} /* else if (!wc) */
 					break;
-				} /* else (wc_len < 0) */
-				goto conversion_err;
+				} /* else if (wc_len < 0 ||
+						o >= NTFS_MAX_NAME_LEN) */
+				goto name_err;
 			}
 			ucs[o] = 0;
 			*outs = ucs;
 			return o;
-		} /* else (!ucs) */
-		ntfs_error(vol->sb, "Failed to allocate name from "
-				"ntfs_name_cache!");
+		} /* else if (!ucs) */
+		ntfs_error(vol->sb, "Failed to allocate buffer for converted "
+				"name from ntfs_name_cache.");
 		return -ENOMEM;
-	} /* else (!ins) */
-	ntfs_error(NULL, "Received NULL pointer.");
+	} /* else if (!ins) */
+	ntfs_error(vol->sb, "Received NULL pointer.");
 	return -EINVAL;
-conversion_err:
-	ntfs_error(vol->sb, "Name using character set %s contains characters "
-			"that cannot be converted to Unicode.", nls->charset);
+name_err:
 	kmem_cache_free(ntfs_name_cache, ucs);
-	return -EILSEQ;
+	if (wc_len < 0) {
+		ntfs_error(vol->sb, "Name using character set %s contains "
+				"characters that cannot be converted to "
+				"Unicode.", nls->charset);
+		i = -EILSEQ;
+	} else /* if (o >= NTFS_MAX_NAME_LEN) */ {
+		ntfs_error(vol->sb, "Name is too long (maximum length for a "
+				"name on NTFS is %d Unicode characters.",
+				NTFS_MAX_NAME_LEN);
+		i = -ENAMETOOLONG;
+	}
+	return i;
 }
 
 /**
-- 
cgit v1.2.3


From 2c2c8c1c211c75d0cc9d7642a569ceac1aecd96d Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:09:40 +0000
Subject: NTFS: Improve comments on file attribute flags in fs/ntfs/layout.h.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/layout.h  | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 41d0381be6ea..a3a9d4b97979 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -37,6 +37,7 @@ ToDo/Notes:
 	- Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to maximum
 	  allowed by NTFS, i.e. 255 Unicode characters, not including the
 	  terminating NULL (which is not stored on disk).
+	- Improve comments on file attribute flags in fs/ntfs/layout.h.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index f4283e120709..d34b93cb8b48 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -801,13 +801,16 @@ typedef struct {
 typedef ATTR_RECORD ATTR_REC;
 
 /*
- * File attribute flags (32-bit).
+ * File attribute flags (32-bit) appearing in the file_attributes fields of the
+ * STANDARD_INFORMATION attribute of MFT_RECORDs and the FILENAME_ATTR
+ * attributes of MFT_RECORDs and directory index entries.
+ *
+ * All of the below flags appear in the directory index entries but only some
+ * appear in the STANDARD_INFORMATION attribute whilst only some others appear
+ * in the FILENAME_ATTR attribute of MFT_RECORDs.  Unless otherwise stated the
+ * flags appear in all of the above.
  */
 enum {
-	/*
-	 * The following flags are only present in the STANDARD_INFORMATION
-	 * attribute (in the field file_attributes).
-	 */
 	FILE_ATTR_READONLY		= const_cpu_to_le32(0x00000001),
 	FILE_ATTR_HIDDEN		= const_cpu_to_le32(0x00000002),
 	FILE_ATTR_SYSTEM		= const_cpu_to_le32(0x00000004),
@@ -839,18 +842,14 @@ enum {
 	   F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
 	   is used to to obtain all flags that are valid for setting. */
 	/*
-	 * The following flag is only present in the FILE_NAME attribute (in
-	 * the field file_attributes).
+	 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
+	 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
+	 * attribute of an mft record.
 	 */
 	FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT	= const_cpu_to_le32(0x10000000),
 	/* Note, this is a copy of the corresponding bit from the mft record,
 	   telling us whether this is a directory or not, i.e. whether it has
 	   an index root attribute or not. */
-	/*
-	 * The following flag is present both in the STANDARD_INFORMATION
-	 * attribute and in the FILE_NAME attribute (in the field
-	 * file_attributes).
-	 */
 	FILE_ATTR_DUP_VIEW_INDEX_PRESENT	= const_cpu_to_le32(0x20000000),
 	/* Note, this is a copy of the corresponding bit from the mft record,
 	   telling us whether this file has a view index present (eg. object id
@@ -891,7 +890,7 @@ typedef struct {
 					   Windows this is only updated when
 					   accessed if some time delta has
 					   passed since the last update. Also,
-					   last access times updates can be
+					   last access time updates can be
 					   disabled altogether for speed. */
 /* 32*/	FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
 /* 36*/	union {
-- 
cgit v1.2.3


From a778f217328a7391e0919b6463ec7f143851d12d Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:18:23 +0000
Subject: NTFS: Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode()
 where we       forgot to update a temporary variable so loading index inodes
 which       have an index allocation attribute failed.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/inode.c   | 26 +++++++++++---------------
 fs/ntfs/mft.c     |  5 +----
 3 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index a3a9d4b97979..5fb74e62f535 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -38,6 +38,9 @@ ToDo/Notes:
 	  allowed by NTFS, i.e. 255 Unicode characters, not including the
 	  terminating NULL (which is not stored on disk).
 	- Improve comments on file attribute flags in fs/ntfs/layout.h.
+	- Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode() where we
+	  forgot to update a temporary variable so loading index inodes which
+	  have an index allocation attribute failed.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index ae341922f423..5f4b23d213b9 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -19,15 +19,19 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
-#include <linux/quotaops.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
 
 #include "aops.h"
 #include "attrib.h"
+#include "bitmap.h"
 #include "dir.h"
 #include "debug.h"
 #include "inode.h"
@@ -1428,7 +1432,6 @@ err_out:
 			"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
 			base_vi->i_ino);
 	make_bad_inode(vi);
-	make_bad_inode(base_vi);
 	if (err != -ENOMEM)
 		NVolSetErrors(vol);
 	return err;
@@ -1613,6 +1616,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 					"$INDEX_ALLOCATION attribute.");
 		goto unm_err_out;
 	}
+	a = ctx->attr;
 	if (!a->non_resident) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
 				"resident.");
@@ -2845,11 +2849,8 @@ done:
 old_bad_out:
 	old_size = -1;
 bad_out:
-	if (err != -ENOMEM && err != -EOPNOTSUPP) {
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
+	if (err != -ENOMEM && err != -EOPNOTSUPP)
 		NVolSetErrors(vol);
-	}
 	if (err != -EOPNOTSUPP)
 		NInoSetTruncateFailed(ni);
 	else if (old_size >= 0)
@@ -2864,11 +2865,8 @@ out:
 	ntfs_debug("Failed.  Returning error code %i.", err);
 	return err;
 conv_err_out:
-	if (err != -ENOMEM && err != -EOPNOTSUPP) {
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
+	if (err != -ENOMEM && err != -EOPNOTSUPP)
 		NVolSetErrors(vol);
-	}
 	if (err != -EOPNOTSUPP)
 		NInoSetTruncateFailed(ni);
 	else
@@ -3116,9 +3114,7 @@ err_out:
 				"retries later.");
 		mark_inode_dirty(vi);
 	} else {
-		ntfs_error(vi->i_sb, "Failed (error code %i):  Marking inode "
-				"as bad.  You should run chkdsk.", -err);
-		make_bad_inode(vi);
+		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
 		NVolSetErrors(ni->vol);
 	}
 	return err;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 7254391b0e50..eb3eb143a32c 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -651,10 +651,7 @@ err_out:
  * fs/ntfs/aops.c::mark_ntfs_record_dirty().
  *
  * On success, clean the mft record and return 0.  On error, leave the mft
- * record dirty and return -errno.  The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode.  We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
  *
  * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
  * However, if the mft record has a counterpart in the mft mirror and @sync is
-- 
cgit v1.2.3


From 20fdcf1d543b1285ef8b1c1993a9221f2eda52dc Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:21:02 +0000
Subject: NTFS: Add a missing call to flush_dcache_mft_record_page() in      
 fs/ntfs/inode.c::ntfs_write_inode().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/inode.c   | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 5fb74e62f535..d20031587bbb 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -41,6 +41,8 @@ ToDo/Notes:
 	- Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode() where we
 	  forgot to update a temporary variable so loading index inodes which
 	  have an index allocation attribute failed.
+	- Add a missing call to flush_dcache_mft_record_page() in
+	  fs/ntfs/inode.c::ntfs_write_inode().
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 5f4b23d213b9..73791b2d9495 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -3064,9 +3064,12 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	 * record will be cleaned and written out to disk below, i.e. before
 	 * this function returns.
 	 */
-	if (modified && !NInoTestSetDirty(ctx->ntfs_ino))
-		mark_ntfs_record_dirty(ctx->ntfs_ino->page,
-				ctx->ntfs_ino->page_ofs);
+	if (modified) {
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		if (!NInoTestSetDirty(ctx->ntfs_ino)) {
+			mark_ntfs_record_dirty(ctx->ntfs_ino->page,
+					ctx->ntfs_ino->page_ofs);
+	}
 	ntfs_attr_put_search_ctx(ctx);
 	/* Now the access times are updated, write the base mft record. */
 	if (NInoDirty(ni))
-- 
cgit v1.2.3


From 834ba600cefe6847acaebe5e8e984476dfeebf55 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:25:23 +0000
Subject: NTFS: Handle the recently introduced -ENAMETOOLONG return value from 
      fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 ++--
 fs/ntfs/namei.c   | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index d20031587bbb..9fb08ef3a7f8 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -16,8 +16,6 @@ ToDo/Notes:
 	  inode having been discarded already.  Whether this can actually ever
 	  happen is unclear however so it is worth waiting until someone hits
 	  the problem.
-	- Enable the code for setting the NT4 compatibility flag when we start
-	  making NTFS 1.2 specific modifications.
 
 2.1.27 - Various bug fixes and cleanups.
 
@@ -43,6 +41,8 @@ ToDo/Notes:
 	  have an index allocation attribute failed.
 	- Add a missing call to flush_dcache_mft_record_page() in
 	  fs/ntfs/inode.c::ntfs_write_inode().
+	- Handle the recently introduced -ENAMETOOLONG return value from
+	  fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 78e0cf738e24..eddb2247cec5 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -115,7 +115,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
 			&uname);
 	if (uname_len < 0) {
-		ntfs_error(vol->sb, "Failed to convert name to Unicode.");
+		if (uname_len != -ENAMETOOLONG)
+			ntfs_error(vol->sb, "Failed to convert name to "
+					"Unicode.");
 		return ERR_PTR(uname_len);
 	}
 	mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len,
@@ -157,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 		/* Return the error code. */
 		return (struct dentry *)dent_inode;
 	}
-	/* It is guaranteed that name is no longer allocated at this point. */
+	/* It is guaranteed that @name is no longer allocated at this point. */
 	if (MREF_ERR(mref) == -ENOENT) {
 		ntfs_debug("Entry was not found, adding negative dentry.");
 		/* The dcache will handle negative entries. */
@@ -168,7 +170,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error "
 			"code %i.", -MREF_ERR(mref));
 	return ERR_PTR(MREF_ERR(mref));
-
 	// TODO: Consider moving this lot to a separate function! (AIA)
 handle_name:
    {
-- 
cgit v1.2.3


From 4e5e529ad684f1b3fba957f5dd4eb7c2b534ee92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 16:57:48 +0000
Subject: NTFS: Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  1 +
 fs/ntfs/aops.c     |  6 +++---
 fs/ntfs/compress.c |  4 ++--
 fs/ntfs/inode.c    | 10 ++++-----
 fs/ntfs/inode.h    | 13 ++++++------
 fs/ntfs/mft.c      | 62 +++++++++++++++++++++++++++---------------------------
 fs/ntfs/ntfs.h     |  2 +-
 fs/ntfs/super.c    | 42 ++++++++++++++++++------------------
 8 files changed, 71 insertions(+), 69 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9fb08ef3a7f8..35cc4b1d60f7 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -43,6 +43,7 @@ ToDo/Notes:
 	  fs/ntfs/inode.c::ntfs_write_inode().
 	- Handle the recently introduced -ENAMETOOLONG return value from
 	  fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().
+	- Semaphore to mutex conversion.  (Ingo Molnar)
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 1cf105b99202..580412d330cb 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1278,18 +1278,18 @@ unm_done:
 		
 		tni = locked_nis[nr_locked_nis];
 		/* Get the base inode. */
-		down(&tni->extent_lock);
+		mutex_lock(&tni->extent_lock);
 		if (tni->nr_extents >= 0)
 			base_tni = tni;
 		else {
 			base_tni = tni->ext.base_ntfs_ino;
 			BUG_ON(!base_tni);
 		}
-		up(&tni->extent_lock);
+		mutex_unlock(&tni->extent_lock);
 		ntfs_debug("Unlocking %s inode 0x%lx.",
 				tni == base_tni ? "base" : "extent",
 				tni->mft_no);
-		up(&tni->mrec_lock);
+		mutex_unlock(&tni->mrec_lock);
 		atomic_dec(&tni->count);
 		iput(VFS_I(base_tni));
 	}
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 25d24106f893..68a607ff9fd3 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -67,7 +67,7 @@ static DEFINE_SPINLOCK(ntfs_cb_lock);
 /**
  * allocate_compression_buffers - allocate the decompression buffers
  *
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
  *
  * Return 0 on success or -ENOMEM if the allocations failed.
  */
@@ -84,7 +84,7 @@ int allocate_compression_buffers(void)
 /**
  * free_compression_buffers - free the decompression buffers
  *
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
  */
 void free_compression_buffers(void)
 {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 73791b2d9495..4c86b7e1d1eb 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -388,7 +388,7 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	atomic_set(&ni->count, 1);
 	ni->vol = NTFS_SB(sb);
 	ntfs_init_runlist(&ni->runlist);
-	init_MUTEX(&ni->mrec_lock);
+	mutex_init(&ni->mrec_lock);
 	ni->page = NULL;
 	ni->page_ofs = 0;
 	ni->attr_list_size = 0;
@@ -400,7 +400,7 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->itype.index.collation_rule = 0;
 	ni->itype.index.block_size_bits = 0;
 	ni->itype.index.vcn_size_bits = 0;
-	init_MUTEX(&ni->extent_lock);
+	mutex_init(&ni->extent_lock);
 	ni->nr_extents = 0;
 	ni->ext.base_ntfs_ino = NULL;
 }
@@ -3066,7 +3066,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	 */
 	if (modified) {
 		flush_dcache_mft_record_page(ctx->ntfs_ino);
-		if (!NInoTestSetDirty(ctx->ntfs_ino)) {
+		if (!NInoTestSetDirty(ctx->ntfs_ino))
 			mark_ntfs_record_dirty(ctx->ntfs_ino->page,
 					ctx->ntfs_ino->page_ofs);
 	}
@@ -3075,7 +3075,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	if (NInoDirty(ni))
 		err = write_mft_record(ni, m, sync);
 	/* Write all attached extent mft records. */
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (ni->nr_extents > 0) {
 		ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
 		int i;
@@ -3102,7 +3102,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 			}
 		}
 	}
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	unmap_mft_record(ni);
 	if (unlikely(err))
 		goto err_out;
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 3de5c0231966..f088291e017c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -24,12 +24,13 @@
 #ifndef _LINUX_NTFS_INODE_H
 #define _LINUX_NTFS_INODE_H
 
-#include <linux/mm.h>
+#include <asm/atomic.h>
+
 #include <linux/fs.h>
-#include <linux/seq_file.h>
 #include <linux/list.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
 
 #include "layout.h"
 #include "volume.h"
@@ -81,7 +82,7 @@ struct _ntfs_inode {
 	 * The following fields are only valid for real inodes and extent
 	 * inodes.
 	 */
-	struct semaphore mrec_lock; /* Lock for serializing access to the
+	struct mutex mrec_lock;	/* Lock for serializing access to the
 				   mft record belonging to this inode. */
 	struct page *page;	/* The page containing the mft record of the
 				   inode. This should only be touched by the
@@ -119,7 +120,7 @@ struct _ntfs_inode {
 			u8 block_clusters;	/* Number of clusters per cb. */
 		} compressed;
 	} itype;
-	struct semaphore extent_lock;	/* Lock for accessing/modifying the
+	struct mutex extent_lock;	/* Lock for accessing/modifying the
 					   below . */
 	s32 nr_extents;	/* For a base mft record, the number of attached extent
 			   inodes (0 if none), for extent records and for fake
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index eb3eb143a32c..4e72bc7afdf9 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -105,8 +105,8 @@ err_out:
  * map_mft_record - map, pin and lock an mft record
  * @ni:		ntfs inode whose MFT record to map
  *
- * First, take the mrec_lock semaphore. We might now be sleeping, while waiting
- * for the semaphore if it was already locked by someone else.
+ * First, take the mrec_lock mutex.  We might now be sleeping, while waiting
+ * for the mutex if it was already locked by someone else.
  *
  * The page of the record is mapped using map_mft_record_page() before being
  * returned to the caller.
@@ -136,9 +136,9 @@ err_out:
  * So that code will end up having to own the mrec_lock of all mft
  * records/inodes present in the page before I/O can proceed. In that case we
  * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
- * accessing anything without owning the mrec_lock semaphore. But we do need
- * to use them because of the read_cache_page() invocation and the code becomes
- * so much simpler this way that it is well worth it.
+ * accessing anything without owning the mrec_lock mutex.  But we do need to
+ * use them because of the read_cache_page() invocation and the code becomes so
+ * much simpler this way that it is well worth it.
  *
  * The mft record is now ours and we return a pointer to it. You need to check
  * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
@@ -161,13 +161,13 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni)
 	atomic_inc(&ni->count);
 
 	/* Serialize access to this mft record. */
-	down(&ni->mrec_lock);
+	mutex_lock(&ni->mrec_lock);
 
 	m = map_mft_record_page(ni);
 	if (likely(!IS_ERR(m)))
 		return m;
 
-	up(&ni->mrec_lock);
+	mutex_unlock(&ni->mrec_lock);
 	atomic_dec(&ni->count);
 	ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
 	return m;
@@ -218,7 +218,7 @@ void unmap_mft_record(ntfs_inode *ni)
 	ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
 
 	unmap_mft_record_page(ni);
-	up(&ni->mrec_lock);
+	mutex_unlock(&ni->mrec_lock);
 	atomic_dec(&ni->count);
 	/*
 	 * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
@@ -262,7 +262,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
 	 * in which case just return it. If not found, add it to the base
 	 * inode before returning it.
 	 */
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 	if (base_ni->nr_extents > 0) {
 		extent_nis = base_ni->ext.extent_ntfs_inos;
 		for (i = 0; i < base_ni->nr_extents; i++) {
@@ -275,7 +275,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
 		}
 	}
 	if (likely(ni != NULL)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		/* We found the record; just have to map and return it. */
 		m = map_mft_record(ni);
@@ -302,7 +302,7 @@ map_err_out:
 	/* Record wasn't there. Get a new ntfs inode and initialize it. */
 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
 	if (unlikely(!ni)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -313,7 +313,7 @@ map_err_out:
 	/* Now map the record. */
 	m = map_mft_record(ni);
 	if (IS_ERR(m)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		ntfs_clear_extent_inode(ni);
 		goto map_err_out;
@@ -348,14 +348,14 @@ map_err_out:
 		base_ni->ext.extent_ntfs_inos = tmp;
 	}
 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	atomic_dec(&base_ni->count);
 	ntfs_debug("Done 2.");
 	*ntfs_ino = ni;
 	return m;
 unm_err_out:
 	unmap_mft_record(ni);
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	atomic_dec(&base_ni->count);
 	/*
 	 * If the extent inode was not attached to the base inode we need to
@@ -400,12 +400,12 @@ void __mark_mft_record_dirty(ntfs_inode *ni)
 	BUG_ON(NInoAttr(ni));
 	mark_ntfs_record_dirty(ni->page, ni->page_ofs);
 	/* Determine the base vfs inode and mark it dirty, too. */
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (likely(ni->nr_extents >= 0))
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 }
 
@@ -981,7 +981,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 		}
 		ntfs_debug("Inode 0x%lx is not dirty.", mft_no);
 		/* The inode is not dirty, try to take the mft record lock. */
-		if (unlikely(down_trylock(&ni->mrec_lock))) {
+		if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
 			ntfs_debug("Mft record 0x%lx is already locked, do "
 					"not write it.", mft_no);
 			atomic_dec(&ni->count);
@@ -1041,13 +1041,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	 * corresponding to this extent mft record attached.
 	 */
 	ni = NTFS_I(vi);
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (ni->nr_extents <= 0) {
 		/*
 		 * The base inode has no attached extent inodes, write this
 		 * extent mft record.
 		 */
-		up(&ni->extent_lock);
+		mutex_unlock(&ni->extent_lock);
 		iput(vi);
 		ntfs_debug("Base inode 0x%lx has no attached extent inodes, "
 				"write the extent record.", na.mft_no);
@@ -1070,7 +1070,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	 * extent mft record.
 	 */
 	if (!eni) {
-		up(&ni->extent_lock);
+		mutex_unlock(&ni->extent_lock);
 		iput(vi);
 		ntfs_debug("Extent inode 0x%lx is not attached to its base "
 				"inode 0x%lx, write the extent record.",
@@ -1081,12 +1081,12 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 			mft_no, na.mft_no);
 	/* Take a reference to the extent ntfs inode. */
 	atomic_inc(&eni->count);
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	/*
 	 * Found the extent inode coresponding to this extent mft record.
 	 * Try to take the mft record lock.
 	 */
-	if (unlikely(down_trylock(&eni->mrec_lock))) {
+	if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
 		atomic_dec(&eni->count);
 		iput(vi);
 		ntfs_debug("Extent mft record 0x%lx is already locked, do "
@@ -2709,7 +2709,7 @@ mft_rec_already_initialized:
 		 * have its page mapped and it is very easy to do.
 		 */
 		atomic_inc(&ni->count);
-		down(&ni->mrec_lock);
+		mutex_lock(&ni->mrec_lock);
 		ni->page = page;
 		ni->page_ofs = ofs;
 		/*
@@ -2796,22 +2796,22 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
 	BUG_ON(NInoAttr(ni));
 	BUG_ON(ni->nr_extents != -1);
 
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	base_ni = ni->ext.base_ntfs_ino;
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 
 	BUG_ON(base_ni->nr_extents <= 0);
 
 	ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n",
 			mft_no, base_ni->mft_no);
 
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 
 	/* Make sure we are holding the only reference to the extent inode. */
 	if (atomic_read(&ni->count) > 2) {
 		ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, "
 				"not freeing.", base_ni->mft_no);
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		return -EBUSY;
 	}
 
@@ -2829,7 +2829,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
 		break;
 	}
 
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 
 	if (unlikely(err)) {
 		ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to "
@@ -2888,7 +2888,7 @@ rollback_error:
 	return 0;
 rollback:
 	/* Rollback what we did... */
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 	extent_nis = base_ni->ext.extent_ntfs_inos;
 	if (!(base_ni->nr_extents & 3)) {
 		int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
@@ -2897,7 +2897,7 @@ rollback:
 		if (unlikely(!extent_nis)) {
 			ntfs_error(vol->sb, "Failed to allocate internal "
 					"buffer during rollback.%s", es);
-			up(&base_ni->extent_lock);
+			mutex_unlock(&base_ni->extent_lock);
 			NVolSetErrors(vol);
 			goto rollback_error;
 		}
@@ -2912,7 +2912,7 @@ rollback:
 	m->flags |= MFT_RECORD_IN_USE;
 	m->sequence_number = old_seq_no;
 	extent_nis[base_ni->nr_extents++] = ni;
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	mark_mft_record_dirty(ni);
 	return err;
 }
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 653d2a5c4899..0624c8ef4d9c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -91,7 +91,7 @@ extern void free_compression_buffers(void);
 
 /* From fs/ntfs/super.c */
 #define default_upcase_len 0x10000
-extern struct semaphore ntfs_lock;
+extern struct mutex ntfs_lock;
 
 typedef struct {
 	int val;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index fd4aecc5548e..6816edafe4dd 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1677,11 +1677,11 @@ read_partial_upcase_page:
 	ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
 			i_size, 64 * 1024 * sizeof(ntfschar));
 	iput(ino);
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (!default_upcase) {
 		ntfs_debug("Using volume specified $UpCase since default is "
 				"not present.");
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		return TRUE;
 	}
 	max = default_upcase_len;
@@ -1695,12 +1695,12 @@ read_partial_upcase_page:
 		vol->upcase = default_upcase;
 		vol->upcase_len = max;
 		ntfs_nr_upcase_users++;
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		ntfs_debug("Volume specified $UpCase matches default. Using "
 				"default.");
 		return TRUE;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	ntfs_debug("Using volume specified $UpCase since it does not match "
 			"the default.");
 	return TRUE;
@@ -1709,17 +1709,17 @@ iput_upcase_failed:
 	ntfs_free(vol->upcase);
 	vol->upcase = NULL;
 upcase_failed:
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (default_upcase) {
 		vol->upcase = default_upcase;
 		vol->upcase_len = default_upcase_len;
 		ntfs_nr_upcase_users++;
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		ntfs_error(sb, "Failed to load $UpCase from the volume. Using "
 				"default.");
 		return TRUE;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	ntfs_error(sb, "Failed to initialize upcase table.");
 	return FALSE;
 }
@@ -2195,12 +2195,12 @@ iput_attrdef_err_out:
 iput_upcase_err_out:
 #endif /* NTFS_RW */
 	vol->upcase_len = 0;
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -2405,7 +2405,7 @@ static void ntfs_put_super(struct super_block *sb)
 	 * Destroy the global default upcase table if necessary.  Also decrease
 	 * the number of upcase users if we are a user.
 	 */
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
@@ -2416,7 +2416,7 @@ static void ntfs_put_super(struct super_block *sb)
 	}
 	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
 		free_compression_buffers();
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -2890,7 +2890,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 			ntfs_error(sb, "Failed to load essential metadata.");
 		goto iput_tmp_ino_err_out_now;
 	}
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	/*
 	 * The current mount is a compression user if the cluster size is
 	 * less than or equal 4kiB.
@@ -2901,7 +2901,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 			ntfs_error(NULL, "Failed to allocate buffers "
 					"for compression engine.");
 			ntfs_nr_compression_users--;
-			up(&ntfs_lock);
+			mutex_unlock(&ntfs_lock);
 			goto iput_tmp_ino_err_out_now;
 		}
 	}
@@ -2913,7 +2913,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	if (!default_upcase)
 		default_upcase = generate_default_upcase();
 	ntfs_nr_upcase_users++;
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	/*
 	 * From now on, ignore @silent parameter. If we fail below this line,
 	 * it will be due to a corrupt fs or a system error, so we report it.
@@ -2931,12 +2931,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		atomic_inc(&vol->root_ino->i_count);
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
-		down(&ntfs_lock);
+		mutex_lock(&ntfs_lock);
 		if (!--ntfs_nr_upcase_users && default_upcase) {
 			ntfs_free(default_upcase);
 			default_upcase = NULL;
 		}
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		sb->s_export_op = &ntfs_export_ops;
 		lock_kernel();
 		return 0;
@@ -3004,12 +3004,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		vol->attrdef = NULL;
 	}
 	vol->upcase_len = 0;
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -3024,14 +3024,14 @@ unl_upcase_iput_tmp_ino_err_out_now:
 	 * Decrease the number of upcase users and destroy the global default
 	 * upcase table if necessary.
 	 */
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (!--ntfs_nr_upcase_users && default_upcase) {
 		ntfs_free(default_upcase);
 		default_upcase = NULL;
 	}
 	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
 		free_compression_buffers();
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 iput_tmp_ino_err_out_now:
 	iput(tmp_ino);
 	if (vol->mft_ino && vol->mft_ino != tmp_ino)
@@ -3091,7 +3091,7 @@ struct kmem_cache *ntfs_attr_ctx_cache;
 struct kmem_cache *ntfs_index_ctx_cache;
 
 /* Driver wide semaphore. */
-DECLARE_MUTEX(ntfs_lock);
+DEFINE_MUTEX(ntfs_lock);
 
 static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
-- 
cgit v1.2.3


From e750d1c7cc314b9ba1934b0b474b7d39f906f865 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 17:04:12 +0000
Subject: NTFS: 2.1.27 - Various bug fixes and cleanups.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt | 5 +++++
 fs/ntfs/super.c                    | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 251168587899..638cbd3d2b00 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -457,6 +457,11 @@ ChangeLog
 
 Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 
+2.1.27:
+	- Implement page migration support so the kernel can move memory used
+	  by NTFS files and directories around for management purposes.
+	- Add support for writing to sparse files created with Windows XP SP2.
+	- Many minor improvements and bug fixes.
 2.1.26:
 	- Implement support for sector sizes above 512 bytes (up to the maximum
 	  supported by NTFS which is 4096 bytes).
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6816edafe4dd..7646b5059389 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3090,7 +3090,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep,
 struct kmem_cache *ntfs_attr_ctx_cache;
 struct kmem_cache *ntfs_index_ctx_cache;
 
-/* Driver wide semaphore. */
+/* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
 static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
-- 
cgit v1.2.3


From b86ff981a8252d83d6a7719ae09f3a05307e3592 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 23 Mar 2006 19:56:55 +0100
Subject: [PATCH] relay: migrate from relayfs to a generic relay API

Original patch from Paul Mundt, sysfs parts removed by me since they
were broken.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 fs/Kconfig            |  12 -
 fs/Makefile           |   1 -
 fs/relayfs/Makefile   |   4 -
 fs/relayfs/buffers.c  | 190 -----------
 fs/relayfs/buffers.h  |  12 -
 fs/relayfs/inode.c    | 581 -------------------------------
 fs/relayfs/relay.c    | 482 --------------------------
 fs/relayfs/relay.h    |   8 -
 include/linux/relay.h | 281 +++++++++++++++
 init/Kconfig          |  11 +
 kernel/Makefile       |   1 +
 kernel/relay.c        | 919 ++++++++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 1212 insertions(+), 1290 deletions(-)
 delete mode 100644 fs/relayfs/Makefile
 delete mode 100644 fs/relayfs/buffers.c
 delete mode 100644 fs/relayfs/buffers.h
 delete mode 100644 fs/relayfs/inode.c
 delete mode 100644 fs/relayfs/relay.c
 delete mode 100644 fs/relayfs/relay.h
 create mode 100644 include/linux/relay.h
 create mode 100644 kernel/relay.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index e9749b0eecd8..c8d0a209120c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -859,18 +859,6 @@ config RAMFS
 	  To compile this as a module, choose M here: the module will be called
 	  ramfs.
 
-config RELAYFS_FS
-	tristate "Relayfs file system support"
-	---help---
-	  Relayfs is a high-speed data relay filesystem designed to provide
-	  an efficient mechanism for tools and facilities to relay large
-	  amounts of data from kernel space to user space.
-
-	  To compile this code as a module, choose M here: the module will be
-	  called relayfs.
-
-	  If unsure, say N.
-
 config CONFIGFS_FS
 	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 1db711319c80..080b3867be4d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -91,7 +91,6 @@ obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_UDF_FS)		+= udf/
-obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
deleted file mode 100644
index e76e182cdb38..000000000000
--- a/fs/relayfs/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-obj-$(CONFIG_RELAYFS_FS) += relayfs.o
-
-relayfs-y := relay.o inode.o buffers.o
-
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
deleted file mode 100644
index 10187812771e..000000000000
--- a/fs/relayfs/buffers.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * RelayFS buffer management code.
- *
- * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
- * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
- *
- * This file is released under the GPL.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-/*
- * close() vm_op implementation for relayfs file mapping.
- */
-static void relay_file_mmap_close(struct vm_area_struct *vma)
-{
-	struct rchan_buf *buf = vma->vm_private_data;
-	buf->chan->cb->buf_unmapped(buf, vma->vm_file);
-}
-
-/*
- * nopage() vm_op implementation for relayfs file mapping.
- */
-static struct page *relay_buf_nopage(struct vm_area_struct *vma,
-				     unsigned long address,
-				     int *type)
-{
-	struct page *page;
-	struct rchan_buf *buf = vma->vm_private_data;
-	unsigned long offset = address - vma->vm_start;
-
-	if (address > vma->vm_end)
-		return NOPAGE_SIGBUS; /* Disallow mremap */
-	if (!buf)
-		return NOPAGE_OOM;
-
-	page = vmalloc_to_page(buf->start + offset);
-	if (!page)
-		return NOPAGE_OOM;
-	get_page(page);
-
-	if (type)
-		*type = VM_FAULT_MINOR;
-
-	return page;
-}
-
-/*
- * vm_ops for relay file mappings.
- */
-static struct vm_operations_struct relay_file_mmap_ops = {
-	.nopage = relay_buf_nopage,
-	.close = relay_file_mmap_close,
-};
-
-/**
- *	relay_mmap_buf: - mmap channel buffer to process address space
- *	@buf: relay channel buffer
- *	@vma: vm_area_struct describing memory to be mapped
- *
- *	Returns 0 if ok, negative on error
- *
- *	Caller should already have grabbed mmap_sem.
- */
-int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
-{
-	unsigned long length = vma->vm_end - vma->vm_start;
-	struct file *filp = vma->vm_file;
-
-	if (!buf)
-		return -EBADF;
-
-	if (length != (unsigned long)buf->chan->alloc_size)
-		return -EINVAL;
-
-	vma->vm_ops = &relay_file_mmap_ops;
-	vma->vm_private_data = buf;
-	buf->chan->cb->buf_mapped(buf, filp);
-
-	return 0;
-}
-
-/**
- *	relay_alloc_buf - allocate a channel buffer
- *	@buf: the buffer struct
- *	@size: total size of the buffer
- *
- *	Returns a pointer to the resulting buffer, NULL if unsuccessful
- */
-static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
-{
-	void *mem;
-	unsigned int i, j, n_pages;
-
-	size = PAGE_ALIGN(size);
-	n_pages = size >> PAGE_SHIFT;
-
-	buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
-	if (!buf->page_array)
-		return NULL;
-
-	for (i = 0; i < n_pages; i++) {
-		buf->page_array[i] = alloc_page(GFP_KERNEL);
-		if (unlikely(!buf->page_array[i]))
-			goto depopulate;
-	}
-	mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
-	if (!mem)
-		goto depopulate;
-
-	memset(mem, 0, size);
-	buf->page_count = n_pages;
-	return mem;
-
-depopulate:
-	for (j = 0; j < i; j++)
-		__free_page(buf->page_array[j]);
-	kfree(buf->page_array);
-	return NULL;
-}
-
-/**
- *	relay_create_buf - allocate and initialize a channel buffer
- *	@alloc_size: size of the buffer to allocate
- *	@n_subbufs: number of sub-buffers in the channel
- *
- *	Returns channel buffer if successful, NULL otherwise
- */
-struct rchan_buf *relay_create_buf(struct rchan *chan)
-{
-	struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
-	if (!buf)
-		return NULL;
-
-	buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
-	if (!buf->padding)
-		goto free_buf;
-
-	buf->start = relay_alloc_buf(buf, chan->alloc_size);
-	if (!buf->start)
-		goto free_buf;
-
-	buf->chan = chan;
-	kref_get(&buf->chan->kref);
-	return buf;
-
-free_buf:
-	kfree(buf->padding);
-	kfree(buf);
-	return NULL;
-}
-
-/**
- *	relay_destroy_buf - destroy an rchan_buf struct and associated buffer
- *	@buf: the buffer struct
- */
-void relay_destroy_buf(struct rchan_buf *buf)
-{
-	struct rchan *chan = buf->chan;
-	unsigned int i;
-
-	if (likely(buf->start)) {
-		vunmap(buf->start);
-		for (i = 0; i < buf->page_count; i++)
-			__free_page(buf->page_array[i]);
-		kfree(buf->page_array);
-	}
-	kfree(buf->padding);
-	kfree(buf);
-	kref_put(&chan->kref, relay_destroy_channel);
-}
-
-/**
- *	relay_remove_buf - remove a channel buffer
- *
- *	Removes the file from the relayfs fileystem, which also frees the
- *	rchan_buf_struct and the channel buffer.  Should only be called from
- *	kref_put().
- */
-void relay_remove_buf(struct kref *kref)
-{
-	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-	buf->chan->cb->remove_buf_file(buf->dentry);
-	relay_destroy_buf(buf);
-}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
deleted file mode 100644
index 37a12493f641..000000000000
--- a/fs/relayfs/buffers.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _BUFFERS_H
-#define _BUFFERS_H
-
-/* This inspired by rtai/shmem */
-#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
-
-extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
-extern struct rchan_buf *relay_create_buf(struct rchan *chan);
-extern void relay_destroy_buf(struct rchan_buf *buf);
-extern void relay_remove_buf(struct kref *kref);
-
-#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
deleted file mode 100644
index 383523011aad..000000000000
--- a/fs/relayfs/inode.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * VFS-related code for RelayFS, a high-speed data relay filesystem.
- *
- * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
- * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
- *
- * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
- *
- * This file is released under the GPL.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-#define RELAYFS_MAGIC			0xF0B4A981
-
-static struct vfsmount *		relayfs_mount;
-static int				relayfs_mount_count;
-
-static struct backing_dev_info		relayfs_backing_dev_info = {
-	.ra_pages	= 0,	/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
-static struct inode *relayfs_get_inode(struct super_block *sb,
-				       int mode,
- 				       struct file_operations *fops,
-				       void *data)
-{
-	struct inode *inode;
-
-	inode = new_inode(sb);
-	if (!inode)
-		return NULL;
-
-	inode->i_mode = mode;
-	inode->i_uid = 0;
-	inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
-	inode->i_blocks = 0;
-	inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	switch (mode & S_IFMT) {
-	case S_IFREG:
-		inode->i_fop = fops;
-		if (data)
-			inode->u.generic_ip = data;
-		break;
-	case S_IFDIR:
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-
-		/* directory inodes start off with i_nlink == 2 (for "." entry) */
-		inode->i_nlink++;
-		break;
-	default:
-		break;
-	}
-
-	return inode;
-}
-
-/**
- *	relayfs_create_entry - create a relayfs directory or file
- *	@name: the name of the file to create
- *	@parent: parent directory
- *	@mode: mode
- *	@fops: file operations to use for the file
- *	@data: user-associated data for this file
- *
- *	Returns the new dentry, NULL on failure
- *
- *	Creates a file or directory with the specifed permissions.
- */
-static struct dentry *relayfs_create_entry(const char *name,
-					   struct dentry *parent,
-					   int mode,
-					   struct file_operations *fops,
-					   void *data)
-{
-	struct dentry *d;
-	struct inode *inode;
-	int error = 0;
-
-	BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
-
-	error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
-	if (error) {
-		printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
-		return NULL;
-	}
-
-	if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
-		parent = relayfs_mount->mnt_sb->s_root;
-
-	if (!parent) {
-		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-		return NULL;
-	}
-
-	parent = dget(parent);
-	mutex_lock(&parent->d_inode->i_mutex);
-	d = lookup_one_len(name, parent, strlen(name));
-	if (IS_ERR(d)) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	if (d->d_inode) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
-	if (!inode) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	d_instantiate(d, inode);
-	dget(d);	/* Extra count - pin the dentry in core */
-
-	if (S_ISDIR(mode))
-		parent->d_inode->i_nlink++;
-
-	goto exit;
-
-release_mount:
-	simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-
-exit:
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
-	return d;
-}
-
-/**
- *	relayfs_create_file - create a file in the relay filesystem
- *	@name: the name of the file to create
- *	@parent: parent directory
- *	@mode: mode, if not specied the default perms are used
- *	@fops: file operations to use for the file
- *	@data: user-associated data for this file
- *
- *	Returns file dentry if successful, NULL otherwise.
- *
- *	The file will be created user r on behalf of current user.
- */
-struct dentry *relayfs_create_file(const char *name,
-				   struct dentry *parent,
-				   int mode,
-				   struct file_operations *fops,
-				   void *data)
-{
-	BUG_ON(!fops);
-
-	if (!mode)
-		mode = S_IRUSR;
-	mode = (mode & S_IALLUGO) | S_IFREG;
-
-	return relayfs_create_entry(name, parent, mode, fops, data);
-}
-
-/**
- *	relayfs_create_dir - create a directory in the relay filesystem
- *	@name: the name of the directory to create
- *	@parent: parent directory, NULL if parent should be fs root
- *
- *	Returns directory dentry if successful, NULL otherwise.
- *
- *	The directory will be created world rwx on behalf of current user.
- */
-struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
-{
-	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	return relayfs_create_entry(name, parent, mode, NULL, NULL);
-}
-
-/**
- *	relayfs_remove - remove a file or directory in the relay filesystem
- *	@dentry: file or directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove(struct dentry *dentry)
-{
-	struct dentry *parent;
-	int error = 0;
-
-	if (!dentry)
-		return -EINVAL;
-	parent = dentry->d_parent;
-	if (!parent)
-		return -EINVAL;
-
-	parent = dget(parent);
-	mutex_lock(&parent->d_inode->i_mutex);
-	if (dentry->d_inode) {
-		if (S_ISDIR(dentry->d_inode->i_mode))
-			error = simple_rmdir(parent->d_inode, dentry);
-		else
-			error = simple_unlink(parent->d_inode, dentry);
-		if (!error)
-			d_delete(dentry);
-	}
-	if (!error)
-		dput(dentry);
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
-
-	if (!error)
-		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-
-	return error;
-}
-
-/**
- *	relayfs_remove_file - remove a file from relay filesystem
- *	@dentry: directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove_file(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/**
- *	relayfs_remove_dir - remove a directory in the relay filesystem
- *	@dentry: directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove_dir(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/**
- *	relay_file_open - open file op for relay files
- *	@inode: the inode
- *	@filp: the file
- *
- *	Increments the channel buffer refcount.
- */
-static int relay_file_open(struct inode *inode, struct file *filp)
-{
-	struct rchan_buf *buf = inode->u.generic_ip;
-	kref_get(&buf->kref);
-	filp->private_data = buf;
-
-	return 0;
-}
-
-/**
- *	relay_file_mmap - mmap file op for relay files
- *	@filp: the file
- *	@vma: the vma describing what to map
- *
- *	Calls upon relay_mmap_buf to map the file into user space.
- */
-static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct rchan_buf *buf = filp->private_data;
-	return relay_mmap_buf(buf, vma);
-}
-
-/**
- *	relay_file_poll - poll file op for relay files
- *	@filp: the file
- *	@wait: poll table
- *
- *	Poll implemention.
- */
-static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
-{
-	unsigned int mask = 0;
-	struct rchan_buf *buf = filp->private_data;
-
-	if (buf->finalized)
-		return POLLERR;
-
-	if (filp->f_mode & FMODE_READ) {
-		poll_wait(filp, &buf->read_wait, wait);
-		if (!relay_buf_empty(buf))
-			mask |= POLLIN | POLLRDNORM;
-	}
-
-	return mask;
-}
-
-/**
- *	relay_file_release - release file op for relay files
- *	@inode: the inode
- *	@filp: the file
- *
- *	Decrements the channel refcount, as the filesystem is
- *	no longer using it.
- */
-static int relay_file_release(struct inode *inode, struct file *filp)
-{
-	struct rchan_buf *buf = filp->private_data;
-	kref_put(&buf->kref, relay_remove_buf);
-
-	return 0;
-}
-
-/**
- *	relay_file_read_consume - update the consumed count for the buffer
- */
-static void relay_file_read_consume(struct rchan_buf *buf,
-				    size_t read_pos,
-				    size_t bytes_consumed)
-{
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-	size_t read_subbuf;
-
-	if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
-		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
-		buf->bytes_consumed = 0;
-	}
-
-	buf->bytes_consumed += bytes_consumed;
-	read_subbuf = read_pos / buf->chan->subbuf_size;
-	if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
-		if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
-		    (buf->offset == subbuf_size))
-			return;
-		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
-		buf->bytes_consumed = 0;
-	}
-}
-
-/**
- *	relay_file_read_avail - boolean, are there unconsumed bytes available?
- */
-static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
-{
-	size_t bytes_produced, bytes_consumed, write_offset;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-	size_t produced = buf->subbufs_produced % n_subbufs;
-	size_t consumed = buf->subbufs_consumed % n_subbufs;
-
-	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
-
-	if (consumed > produced) {
-		if ((produced > n_subbufs) &&
-		    (produced + n_subbufs - consumed <= n_subbufs))
-			produced += n_subbufs;
-	} else if (consumed == produced) {
-		if (buf->offset > subbuf_size) {
-			produced += n_subbufs;
-			if (buf->subbufs_produced == buf->subbufs_consumed)
-				consumed += n_subbufs;
-		}
-	}
-
-	if (buf->offset > subbuf_size)
-		bytes_produced = (produced - 1) * subbuf_size + write_offset;
-	else
-		bytes_produced = produced * subbuf_size + write_offset;
-	bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
-
-	if (bytes_produced == bytes_consumed)
-		return 0;
-
-	relay_file_read_consume(buf, read_pos, 0);
-
-	return 1;
-}
-
-/**
- *	relay_file_read_subbuf_avail - return bytes available in sub-buffer
- */
-static size_t relay_file_read_subbuf_avail(size_t read_pos,
-					   struct rchan_buf *buf)
-{
-	size_t padding, avail = 0;
-	size_t read_subbuf, read_offset, write_subbuf, write_offset;
-	size_t subbuf_size = buf->chan->subbuf_size;
-
-	write_subbuf = (buf->data - buf->start) / subbuf_size;
-	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
-	read_subbuf = read_pos / subbuf_size;
-	read_offset = read_pos % subbuf_size;
-	padding = buf->padding[read_subbuf];
-
-	if (read_subbuf == write_subbuf) {
-		if (read_offset + padding < write_offset)
-			avail = write_offset - (read_offset + padding);
-	} else
-		avail = (subbuf_size - padding) - read_offset;
-
-	return avail;
-}
-
-/**
- *	relay_file_read_start_pos - find the first available byte to read
- *
- *	If the read_pos is in the middle of padding, return the
- *	position of the first actually available byte, otherwise
- *	return the original value.
- */
-static size_t relay_file_read_start_pos(size_t read_pos,
-					struct rchan_buf *buf)
-{
-	size_t read_subbuf, padding, padding_start, padding_end;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-
-	read_subbuf = read_pos / subbuf_size;
-	padding = buf->padding[read_subbuf];
-	padding_start = (read_subbuf + 1) * subbuf_size - padding;
-	padding_end = (read_subbuf + 1) * subbuf_size;
-	if (read_pos >= padding_start && read_pos < padding_end) {
-		read_subbuf = (read_subbuf + 1) % n_subbufs;
-		read_pos = read_subbuf * subbuf_size;
-	}
-
-	return read_pos;
-}
-
-/**
- *	relay_file_read_end_pos - return the new read position
- */
-static size_t relay_file_read_end_pos(struct rchan_buf *buf,
-				      size_t read_pos,
-				      size_t count)
-{
-	size_t read_subbuf, padding, end_pos;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-
-	read_subbuf = read_pos / subbuf_size;
-	padding = buf->padding[read_subbuf];
-	if (read_pos % subbuf_size + count + padding == subbuf_size)
-		end_pos = (read_subbuf + 1) * subbuf_size;
-	else
-		end_pos = read_pos + count;
-	if (end_pos >= subbuf_size * n_subbufs)
-		end_pos = 0;
-
-	return end_pos;
-}
-
-/**
- *	relay_file_read - read file op for relay files
- *	@filp: the file
- *	@buffer: the userspace buffer
- *	@count: number of bytes to read
- *	@ppos: position to read from
- *
- *	Reads count bytes or the number of bytes available in the
- *	current sub-buffer being read, whichever is smaller.
- */
-static ssize_t relay_file_read(struct file *filp,
-			       char __user *buffer,
-			       size_t count,
-			       loff_t *ppos)
-{
-	struct rchan_buf *buf = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
-	size_t read_start, avail;
-	ssize_t ret = 0;
-	void *from;
-
-	mutex_lock(&inode->i_mutex);
-	if(!relay_file_read_avail(buf, *ppos))
-		goto out;
-
-	read_start = relay_file_read_start_pos(*ppos, buf);
-	avail = relay_file_read_subbuf_avail(read_start, buf);
-	if (!avail)
-		goto out;
-
-	from = buf->start + read_start;
-	ret = count = min(count, avail);
-	if (copy_to_user(buffer, from, count)) {
-		ret = -EFAULT;
-		goto out;
-	}
-	relay_file_read_consume(buf, read_start, count);
-	*ppos = relay_file_read_end_pos(buf, read_start, count);
-out:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-struct file_operations relay_file_operations = {
-	.open		= relay_file_open,
-	.poll		= relay_file_poll,
-	.mmap		= relay_file_mmap,
-	.read		= relay_file_read,
-	.llseek		= no_llseek,
-	.release	= relay_file_release,
-};
-
-static struct super_operations relayfs_ops = {
-	.statfs		= simple_statfs,
-	.drop_inode	= generic_delete_inode,
-};
-
-static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
-{
-	struct inode *inode;
-	struct dentry *root;
-	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = RELAYFS_MAGIC;
-	sb->s_op = &relayfs_ops;
-	inode = relayfs_get_inode(sb, mode, NULL, NULL);
-
-	if (!inode)
-		return -ENOMEM;
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
-		return -ENOMEM;
-	}
-	sb->s_root = root;
-
-	return 0;
-}
-
-static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
-					   int flags, const char *dev_name,
-					   void *data)
-{
-	return get_sb_single(fs_type, flags, data, relayfs_fill_super);
-}
-
-static struct file_system_type relayfs_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "relayfs",
-	.get_sb		= relayfs_get_sb,
-	.kill_sb	= kill_litter_super,
-};
-
-static int __init init_relayfs_fs(void)
-{
-	return register_filesystem(&relayfs_fs_type);
-}
-
-static void __exit exit_relayfs_fs(void)
-{
-
-
-
-
-
-	unregister_filesystem(&relayfs_fs_type);
-}
-
-module_init(init_relayfs_fs)
-module_exit(exit_relayfs_fs)
-
-EXPORT_SYMBOL_GPL(relay_file_operations);
-EXPORT_SYMBOL_GPL(relayfs_create_dir);
-EXPORT_SYMBOL_GPL(relayfs_remove_dir);
-EXPORT_SYMBOL_GPL(relayfs_create_file);
-EXPORT_SYMBOL_GPL(relayfs_remove_file);
-
-MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
-MODULE_DESCRIPTION("Relay Filesystem");
-MODULE_LICENSE("GPL");
-
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
deleted file mode 100644
index abf3ceaace49..000000000000
--- a/fs/relayfs/relay.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Public API and common code for RelayFS.
- *
- * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
- *
- * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
- * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
- *
- * This file is released under the GPL.
- */
-
-#include <linux/errno.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-/**
- *	relay_buf_empty - boolean, is the channel buffer empty?
- *	@buf: channel buffer
- *
- *	Returns 1 if the buffer is empty, 0 otherwise.
- */
-int relay_buf_empty(struct rchan_buf *buf)
-{
-	return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
-}
-
-/**
- *	relay_buf_full - boolean, is the channel buffer full?
- *	@buf: channel buffer
- *
- *	Returns 1 if the buffer is full, 0 otherwise.
- */
-int relay_buf_full(struct rchan_buf *buf)
-{
-	size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
-	return (ready >= buf->chan->n_subbufs) ? 1 : 0;
-}
-
-/*
- * High-level relayfs kernel API and associated functions.
- */
-
-/*
- * rchan_callback implementations defining default channel behavior.  Used
- * in place of corresponding NULL values in client callback struct.
- */
-
-/*
- * subbuf_start() default callback.  Does nothing.
- */
-static int subbuf_start_default_callback (struct rchan_buf *buf,
-					  void *subbuf,
-					  void *prev_subbuf,
-					  size_t prev_padding)
-{
-	if (relay_buf_full(buf))
-		return 0;
-
-	return 1;
-}
-
-/*
- * buf_mapped() default callback.  Does nothing.
- */
-static void buf_mapped_default_callback(struct rchan_buf *buf,
-					struct file *filp)
-{
-}
-
-/*
- * buf_unmapped() default callback.  Does nothing.
- */
-static void buf_unmapped_default_callback(struct rchan_buf *buf,
-					  struct file *filp)
-{
-}
-
-/*
- * create_buf_file_create() default callback.  Creates file to represent buf.
- */
-static struct dentry *create_buf_file_default_callback(const char *filename,
-						       struct dentry *parent,
-						       int mode,
-						       struct rchan_buf *buf,
-						       int *is_global)
-{
-	return relayfs_create_file(filename, parent, mode,
-				   &relay_file_operations, buf);
-}
-
-/*
- * remove_buf_file() default callback.  Removes file representing relay buffer.
- */
-static int remove_buf_file_default_callback(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/* relay channel default callbacks */
-static struct rchan_callbacks default_channel_callbacks = {
-	.subbuf_start = subbuf_start_default_callback,
-	.buf_mapped = buf_mapped_default_callback,
-	.buf_unmapped = buf_unmapped_default_callback,
-	.create_buf_file = create_buf_file_default_callback,
-	.remove_buf_file = remove_buf_file_default_callback,
-};
-
-/**
- *	wakeup_readers - wake up readers waiting on a channel
- *	@private: the channel buffer
- *
- *	This is the work function used to defer reader waking.  The
- *	reason waking is deferred is that calling directly from write
- *	causes problems if you're writing from say the scheduler.
- */
-static void wakeup_readers(void *private)
-{
-	struct rchan_buf *buf = private;
-	wake_up_interruptible(&buf->read_wait);
-}
-
-/**
- *	__relay_reset - reset a channel buffer
- *	@buf: the channel buffer
- *	@init: 1 if this is a first-time initialization
- *
- *	See relay_reset for description of effect.
- */
-static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
-{
-	size_t i;
-
-	if (init) {
-		init_waitqueue_head(&buf->read_wait);
-		kref_init(&buf->kref);
-		INIT_WORK(&buf->wake_readers, NULL, NULL);
-	} else {
-		cancel_delayed_work(&buf->wake_readers);
-		flush_scheduled_work();
-	}
-
-	buf->subbufs_produced = 0;
-	buf->subbufs_consumed = 0;
-	buf->bytes_consumed = 0;
-	buf->finalized = 0;
-	buf->data = buf->start;
-	buf->offset = 0;
-
-	for (i = 0; i < buf->chan->n_subbufs; i++)
-		buf->padding[i] = 0;
-
-	buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
-}
-
-/**
- *	relay_reset - reset the channel
- *	@chan: the channel
- *
- *	This has the effect of erasing all data from all channel buffers
- *	and restarting the channel in its initial state.  The buffers
- *	are not freed, so any mappings are still in effect.
- *
- *	NOTE: Care should be taken that the channel isn't actually
- *	being used by anything when this call is made.
- */
-void relay_reset(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		__relay_reset(chan->buf[i], 0);
-		prev = chan->buf[i];
-	}
-}
-
-/**
- *	relay_open_buf - create a new channel buffer in relayfs
- *
- *	Internal - used by relay_open().
- */
-static struct rchan_buf *relay_open_buf(struct rchan *chan,
-					const char *filename,
-					struct dentry *parent,
-					int *is_global)
-{
-	struct rchan_buf *buf;
-	struct dentry *dentry;
-
-	if (*is_global)
-		return chan->buf[0];
-
- 	buf = relay_create_buf(chan);
- 	if (!buf)
- 		return NULL;
-
-	/* Create file in fs */
- 	dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
- 					   buf, is_global);
- 	if (!dentry) {
- 		relay_destroy_buf(buf);
-		return NULL;
- 	}
-
-	buf->dentry = dentry;
-	__relay_reset(buf, 1);
-
-	return buf;
-}
-
-/**
- *	relay_close_buf - close a channel buffer
- *	@buf: channel buffer
- *
- *	Marks the buffer finalized and restores the default callbacks.
- *	The channel buffer and channel buffer data structure are then freed
- *	automatically when the last reference is given up.
- */
-static inline void relay_close_buf(struct rchan_buf *buf)
-{
-	buf->finalized = 1;
-	buf->chan->cb = &default_channel_callbacks;
-	cancel_delayed_work(&buf->wake_readers);
-	flush_scheduled_work();
-	kref_put(&buf->kref, relay_remove_buf);
-}
-
-static inline void setup_callbacks(struct rchan *chan,
-				   struct rchan_callbacks *cb)
-{
-	if (!cb) {
-		chan->cb = &default_channel_callbacks;
-		return;
-	}
-
-	if (!cb->subbuf_start)
-		cb->subbuf_start = subbuf_start_default_callback;
-	if (!cb->buf_mapped)
-		cb->buf_mapped = buf_mapped_default_callback;
-	if (!cb->buf_unmapped)
-		cb->buf_unmapped = buf_unmapped_default_callback;
-	if (!cb->create_buf_file)
-		cb->create_buf_file = create_buf_file_default_callback;
-	if (!cb->remove_buf_file)
-		cb->remove_buf_file = remove_buf_file_default_callback;
-	chan->cb = cb;
-}
-
-/**
- *	relay_open - create a new relayfs channel
- *	@base_filename: base name of files to create
- *	@parent: dentry of parent directory, NULL for root directory
- *	@subbuf_size: size of sub-buffers
- *	@n_subbufs: number of sub-buffers
- *	@cb: client callback functions
- *
- *	Returns channel pointer if successful, NULL otherwise.
- *
- *	Creates a channel buffer for each cpu using the sizes and
- *	attributes specified.  The created channel buffer files
- *	will be named base_filename0...base_filenameN-1.  File
- *	permissions will be S_IRUSR.
- */
-struct rchan *relay_open(const char *base_filename,
-			 struct dentry *parent,
-			 size_t subbuf_size,
-			 size_t n_subbufs,
-			 struct rchan_callbacks *cb)
-{
-	unsigned int i;
-	struct rchan *chan;
-	char *tmpname;
-	int is_global = 0;
-
-	if (!base_filename)
-		return NULL;
-
-	if (!(subbuf_size && n_subbufs))
-		return NULL;
-
-	chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
-	if (!chan)
-		return NULL;
-
-	chan->version = RELAYFS_CHANNEL_VERSION;
-	chan->n_subbufs = n_subbufs;
-	chan->subbuf_size = subbuf_size;
-	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
-	setup_callbacks(chan, cb);
-	kref_init(&chan->kref);
-
-	tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-	if (!tmpname)
-		goto free_chan;
-
-	for_each_online_cpu(i) {
-		sprintf(tmpname, "%s%d", base_filename, i);
-		chan->buf[i] = relay_open_buf(chan, tmpname, parent,
-					      &is_global);
-		chan->buf[i]->cpu = i;
-		if (!chan->buf[i])
-			goto free_bufs;
-	}
-
-	kfree(tmpname);
-	return chan;
-
-free_bufs:
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i])
-			break;
-		relay_close_buf(chan->buf[i]);
-		if (is_global)
-			break;
-	}
-	kfree(tmpname);
-
-free_chan:
-	kref_put(&chan->kref, relay_destroy_channel);
-	return NULL;
-}
-
-/**
- *	relay_switch_subbuf - switch to a new sub-buffer
- *	@buf: channel buffer
- *	@length: size of current event
- *
- *	Returns either the length passed in or 0 if full.
-
- *	Performs sub-buffer-switch tasks such as invoking callbacks,
- *	updating padding counts, waking up readers, etc.
- */
-size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
-{
-	void *old, *new;
-	size_t old_subbuf, new_subbuf;
-
-	if (unlikely(length > buf->chan->subbuf_size))
-		goto toobig;
-
-	if (buf->offset != buf->chan->subbuf_size + 1) {
-		buf->prev_padding = buf->chan->subbuf_size - buf->offset;
-		old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
-		buf->padding[old_subbuf] = buf->prev_padding;
-		buf->subbufs_produced++;
-		if (waitqueue_active(&buf->read_wait)) {
-			PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
-			schedule_delayed_work(&buf->wake_readers, 1);
-		}
-	}
-
-	old = buf->data;
-	new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
-	new = buf->start + new_subbuf * buf->chan->subbuf_size;
-	buf->offset = 0;
-	if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
-		buf->offset = buf->chan->subbuf_size + 1;
-		return 0;
-	}
-	buf->data = new;
-	buf->padding[new_subbuf] = 0;
-
-	if (unlikely(length + buf->offset > buf->chan->subbuf_size))
-		goto toobig;
-
-	return length;
-
-toobig:
-	buf->chan->last_toobig = length;
-	return 0;
-}
-
-/**
- *	relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
- *	@chan: the channel
- *	@cpu: the cpu associated with the channel buffer to update
- *	@subbufs_consumed: number of sub-buffers to add to current buf's count
- *
- *	Adds to the channel buffer's consumed sub-buffer count.
- *	subbufs_consumed should be the number of sub-buffers newly consumed,
- *	not the total consumed.
- *
- *	NOTE: kernel clients don't need to call this function if the channel
- *	mode is 'overwrite'.
- */
-void relay_subbufs_consumed(struct rchan *chan,
-			    unsigned int cpu,
-			    size_t subbufs_consumed)
-{
-	struct rchan_buf *buf;
-
-	if (!chan)
-		return;
-
-	if (cpu >= NR_CPUS || !chan->buf[cpu])
-		return;
-
-	buf = chan->buf[cpu];
-	buf->subbufs_consumed += subbufs_consumed;
-	if (buf->subbufs_consumed > buf->subbufs_produced)
-		buf->subbufs_consumed = buf->subbufs_produced;
-}
-
-/**
- *	relay_destroy_channel - free the channel struct
- *
- *	Should only be called from kref_put().
- */
-void relay_destroy_channel(struct kref *kref)
-{
-	struct rchan *chan = container_of(kref, struct rchan, kref);
-	kfree(chan);
-}
-
-/**
- *	relay_close - close the channel
- *	@chan: the channel
- *
- *	Closes all channel buffers and frees the channel.
- */
-void relay_close(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_close_buf(chan->buf[i]);
-		prev = chan->buf[i];
-	}
-
-	if (chan->last_toobig)
-		printk(KERN_WARNING "relayfs: one or more items not logged "
-		       "[item size (%Zd) > sub-buffer size (%Zd)]\n",
-		       chan->last_toobig, chan->subbuf_size);
-
-	kref_put(&chan->kref, relay_destroy_channel);
-}
-
-/**
- *	relay_flush - close the channel
- *	@chan: the channel
- *
- *	Flushes all channel buffers i.e. forces buffer switch.
- */
-void relay_flush(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_switch_subbuf(chan->buf[i], 0);
-		prev = chan->buf[i];
-	}
-}
-
-EXPORT_SYMBOL_GPL(relay_open);
-EXPORT_SYMBOL_GPL(relay_close);
-EXPORT_SYMBOL_GPL(relay_flush);
-EXPORT_SYMBOL_GPL(relay_reset);
-EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
-EXPORT_SYMBOL_GPL(relay_switch_subbuf);
-EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
deleted file mode 100644
index 0993d3e5753b..000000000000
--- a/fs/relayfs/relay.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _RELAY_H
-#define _RELAY_H
-
-extern int relayfs_remove(struct dentry *dentry);
-extern int relay_buf_empty(struct rchan_buf *buf);
-extern void relay_destroy_channel(struct kref *kref);
-
-#endif /* _RELAY_H */
diff --git a/include/linux/relay.h b/include/linux/relay.h
new file mode 100644
index 000000000000..4bcc1531d6a9
--- /dev/null
+++ b/include/linux/relay.h
@@ -0,0 +1,281 @@
+/*
+ * linux/include/linux/relay.h
+ *
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * CONFIG_RELAY definitions and declarations
+ */
+
+#ifndef _LINUX_RELAY_H
+#define _LINUX_RELAY_H
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/kref.h>
+
+/* Needs a _much_ better name... */
+#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE)
+
+/*
+ * Tracks changes to rchan/rchan_buf structs
+ */
+#define RELAYFS_CHANNEL_VERSION		6
+
+/*
+ * Per-cpu relay channel buffer
+ */
+struct rchan_buf
+{
+	void *start;			/* start of channel buffer */
+	void *data;			/* start of current sub-buffer */
+	size_t offset;			/* current offset into sub-buffer */
+	size_t subbufs_produced;	/* count of sub-buffers produced */
+	size_t subbufs_consumed;	/* count of sub-buffers consumed */
+	struct rchan *chan;		/* associated channel */
+	wait_queue_head_t read_wait;	/* reader wait queue */
+	struct work_struct wake_readers; /* reader wake-up work struct */
+	struct dentry *dentry;		/* channel file dentry */
+	struct kref kref;		/* channel buffer refcount */
+	struct page **page_array;	/* array of current buffer pages */
+	unsigned int page_count;	/* number of current buffer pages */
+	unsigned int finalized;		/* buffer has been finalized */
+	size_t *padding;		/* padding counts per sub-buffer */
+	size_t prev_padding;		/* temporary variable */
+	size_t bytes_consumed;		/* bytes consumed in cur read subbuf */
+	unsigned int cpu;		/* this buf's cpu */
+} ____cacheline_aligned;
+
+/*
+ * Relay channel data structure
+ */
+struct rchan
+{
+	u32 version;			/* the version of this struct */
+	size_t subbuf_size;		/* sub-buffer size */
+	size_t n_subbufs;		/* number of sub-buffers per buffer */
+	size_t alloc_size;		/* total buffer size allocated */
+	struct rchan_callbacks *cb;	/* client callbacks */
+	struct kref kref;		/* channel refcount */
+	void *private_data;		/* for user-defined data */
+	size_t last_toobig;		/* tried to log event > subbuf size */
+	struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+};
+
+/*
+ * Relay channel client callbacks
+ */
+struct rchan_callbacks
+{
+	/*
+	 * subbuf_start - called on buffer-switch to a new sub-buffer
+	 * @buf: the channel buffer containing the new sub-buffer
+	 * @subbuf: the start of the new sub-buffer
+	 * @prev_subbuf: the start of the previous sub-buffer
+	 * @prev_padding: unused space at the end of previous sub-buffer
+	 *
+	 * The client should return 1 to continue logging, 0 to stop
+	 * logging.
+	 *
+	 * NOTE: subbuf_start will also be invoked when the buffer is
+	 *       created, so that the first sub-buffer can be initialized
+	 *       if necessary.  In this case, prev_subbuf will be NULL.
+	 *
+	 * NOTE: the client can reserve bytes at the beginning of the new
+	 *       sub-buffer by calling subbuf_start_reserve() in this callback.
+	 */
+	int (*subbuf_start) (struct rchan_buf *buf,
+			     void *subbuf,
+			     void *prev_subbuf,
+			     size_t prev_padding);
+
+	/*
+	 * buf_mapped - relay buffer mmap notification
+	 * @buf: the channel buffer
+	 * @filp: relay file pointer
+	 *
+	 * Called when a relay file is successfully mmapped
+	 */
+        void (*buf_mapped)(struct rchan_buf *buf,
+			   struct file *filp);
+
+	/*
+	 * buf_unmapped - relay buffer unmap notification
+	 * @buf: the channel buffer
+	 * @filp: relay file pointer
+	 *
+	 * Called when a relay file is successfully unmapped
+	 */
+        void (*buf_unmapped)(struct rchan_buf *buf,
+			     struct file *filp);
+	/*
+	 * create_buf_file - create file to represent a relay channel buffer
+	 * @filename: the name of the file to create
+	 * @parent: the parent of the file to create
+	 * @mode: the mode of the file to create
+	 * @buf: the channel buffer
+	 * @is_global: outparam - set non-zero if the buffer should be global
+	 *
+	 * Called during relay_open(), once for each per-cpu buffer,
+	 * to allow the client to create a file to be used to
+	 * represent the corresponding channel buffer.  If the file is
+	 * created outside of relay, the parent must also exist in
+	 * that filesystem.
+	 *
+	 * The callback should return the dentry of the file created
+	 * to represent the relay buffer.
+	 *
+	 * Setting the is_global outparam to a non-zero value will
+	 * cause relay_open() to create a single global buffer rather
+	 * than the default set of per-cpu buffers.
+	 *
+	 * See Documentation/filesystems/relayfs.txt for more info.
+	 */
+	struct dentry *(*create_buf_file)(const char *filename,
+					  struct dentry *parent,
+					  int mode,
+					  struct rchan_buf *buf,
+					  int *is_global);
+
+	/*
+	 * remove_buf_file - remove file representing a relay channel buffer
+	 * @dentry: the dentry of the file to remove
+	 *
+	 * Called during relay_close(), once for each per-cpu buffer,
+	 * to allow the client to remove a file used to represent a
+	 * channel buffer.
+	 *
+	 * The callback should return 0 if successful, negative if not.
+	 */
+	int (*remove_buf_file)(struct dentry *dentry);
+};
+
+/*
+ * CONFIG_RELAY kernel API, kernel/relay.c
+ */
+
+struct rchan *relay_open(const char *base_filename,
+			 struct dentry *parent,
+			 size_t subbuf_size,
+			 size_t n_subbufs,
+			 struct rchan_callbacks *cb);
+extern void relay_close(struct rchan *chan);
+extern void relay_flush(struct rchan *chan);
+extern void relay_subbufs_consumed(struct rchan *chan,
+				   unsigned int cpu,
+				   size_t consumed);
+extern void relay_reset(struct rchan *chan);
+extern int relay_buf_full(struct rchan_buf *buf);
+
+extern size_t relay_switch_subbuf(struct rchan_buf *buf,
+				  size_t length);
+
+/**
+ *	relay_write - write data into the channel
+ *	@chan: relay channel
+ *	@data: data to be written
+ *	@length: number of bytes to write
+ *
+ *	Writes data into the current cpu's channel buffer.
+ *
+ *	Protects the buffer by disabling interrupts.  Use this
+ *	if you might be logging from interrupt context.  Try
+ *	__relay_write() if you know you	won't be logging from
+ *	interrupt context.
+ */
+static inline void relay_write(struct rchan *chan,
+			       const void *data,
+			       size_t length)
+{
+	unsigned long flags;
+	struct rchan_buf *buf;
+
+	local_irq_save(flags);
+	buf = chan->buf[smp_processor_id()];
+	if (unlikely(buf->offset + length > chan->subbuf_size))
+		length = relay_switch_subbuf(buf, length);
+	memcpy(buf->data + buf->offset, data, length);
+	buf->offset += length;
+	local_irq_restore(flags);
+}
+
+/**
+ *	__relay_write - write data into the channel
+ *	@chan: relay channel
+ *	@data: data to be written
+ *	@length: number of bytes to write
+ *
+ *	Writes data into the current cpu's channel buffer.
+ *
+ *	Protects the buffer by disabling preemption.  Use
+ *	relay_write() if you might be logging from interrupt
+ *	context.
+ */
+static inline void __relay_write(struct rchan *chan,
+				 const void *data,
+				 size_t length)
+{
+	struct rchan_buf *buf;
+
+	buf = chan->buf[get_cpu()];
+	if (unlikely(buf->offset + length > buf->chan->subbuf_size))
+		length = relay_switch_subbuf(buf, length);
+	memcpy(buf->data + buf->offset, data, length);
+	buf->offset += length;
+	put_cpu();
+}
+
+/**
+ *	relay_reserve - reserve slot in channel buffer
+ *	@chan: relay channel
+ *	@length: number of bytes to reserve
+ *
+ *	Returns pointer to reserved slot, NULL if full.
+ *
+ *	Reserves a slot in the current cpu's channel buffer.
+ *	Does not protect the buffer at all - caller must provide
+ *	appropriate synchronization.
+ */
+static inline void *relay_reserve(struct rchan *chan, size_t length)
+{
+	void *reserved;
+	struct rchan_buf *buf = chan->buf[smp_processor_id()];
+
+	if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
+		length = relay_switch_subbuf(buf, length);
+		if (!length)
+			return NULL;
+	}
+	reserved = buf->data + buf->offset;
+	buf->offset += length;
+
+	return reserved;
+}
+
+/**
+ *	subbuf_start_reserve - reserve bytes at the start of a sub-buffer
+ *	@buf: relay channel buffer
+ *	@length: number of bytes to reserve
+ *
+ *	Helper function used to reserve bytes at the beginning of
+ *	a sub-buffer in the subbuf_start() callback.
+ */
+static inline void subbuf_start_reserve(struct rchan_buf *buf,
+					size_t length)
+{
+	BUG_ON(length >= buf->chan->subbuf_size - 1);
+	buf->offset = length;
+}
+
+/*
+ * exported relay file operations, kernel/relay.c
+ */
+extern struct file_operations relay_file_operations;
+
+#endif /* _LINUX_RELAY_H */
+
diff --git a/init/Kconfig b/init/Kconfig
index 38416a199def..1d19fd25204b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -214,6 +214,17 @@ config CPUSETS
 
 	  Say N if unsure.
 
+config RELAY
+	bool "Kernel->user space relay support (formerly relayfs)"
+	help
+	  This option enables support for relay interface support in
+	  certain file systems (such as debugfs).
+	  It is designed to provide an efficient mechanism for tools and
+	  facilities to relay large amounts of data from kernel space to
+	  user space.
+
+	  If unsure, say N.
+
 source "usr/Kconfig"
 
 config UID16
diff --git a/kernel/Makefile b/kernel/Makefile
index 4ae0fbde815d..aebd7a78984e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_RELAY) += relay.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/relay.c b/kernel/relay.c
new file mode 100644
index 000000000000..9358e8eb8476
--- /dev/null
+++ b/kernel/relay.c
@@ -0,0 +1,919 @@
+/*
+ * Public API and common code for kernel->userspace relay file support.
+ *
+ * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
+ *
+ * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
+ *
+ * Moved to kernel/relay.c by Paul Mundt, 2006.
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/relay.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+
+/*
+ * close() vm_op implementation for relay file mapping.
+ */
+static void relay_file_mmap_close(struct vm_area_struct *vma)
+{
+	struct rchan_buf *buf = vma->vm_private_data;
+	buf->chan->cb->buf_unmapped(buf, vma->vm_file);
+}
+
+/*
+ * nopage() vm_op implementation for relay file mapping.
+ */
+static struct page *relay_buf_nopage(struct vm_area_struct *vma,
+				     unsigned long address,
+				     int *type)
+{
+	struct page *page;
+	struct rchan_buf *buf = vma->vm_private_data;
+	unsigned long offset = address - vma->vm_start;
+
+	if (address > vma->vm_end)
+		return NOPAGE_SIGBUS; /* Disallow mremap */
+	if (!buf)
+		return NOPAGE_OOM;
+
+	page = vmalloc_to_page(buf->start + offset);
+	if (!page)
+		return NOPAGE_OOM;
+	get_page(page);
+
+	if (type)
+		*type = VM_FAULT_MINOR;
+
+	return page;
+}
+
+/*
+ * vm_ops for relay file mappings.
+ */
+static struct vm_operations_struct relay_file_mmap_ops = {
+	.nopage = relay_buf_nopage,
+	.close = relay_file_mmap_close,
+};
+
+/**
+ *	relay_mmap_buf: - mmap channel buffer to process address space
+ *	@buf: relay channel buffer
+ *	@vma: vm_area_struct describing memory to be mapped
+ *
+ *	Returns 0 if ok, negative on error
+ *
+ *	Caller should already have grabbed mmap_sem.
+ */
+int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
+{
+	unsigned long length = vma->vm_end - vma->vm_start;
+	struct file *filp = vma->vm_file;
+
+	if (!buf)
+		return -EBADF;
+
+	if (length != (unsigned long)buf->chan->alloc_size)
+		return -EINVAL;
+
+	vma->vm_ops = &relay_file_mmap_ops;
+	vma->vm_private_data = buf;
+	buf->chan->cb->buf_mapped(buf, filp);
+
+	return 0;
+}
+
+/**
+ *	relay_alloc_buf - allocate a channel buffer
+ *	@buf: the buffer struct
+ *	@size: total size of the buffer
+ *
+ *	Returns a pointer to the resulting buffer, NULL if unsuccessful
+ */
+static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
+{
+	void *mem;
+	unsigned int i, j, n_pages;
+
+	size = PAGE_ALIGN(size);
+	n_pages = size >> PAGE_SHIFT;
+
+	buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!buf->page_array)
+		return NULL;
+
+	for (i = 0; i < n_pages; i++) {
+		buf->page_array[i] = alloc_page(GFP_KERNEL);
+		if (unlikely(!buf->page_array[i]))
+			goto depopulate;
+	}
+	mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
+	if (!mem)
+		goto depopulate;
+
+	memset(mem, 0, size);
+	buf->page_count = n_pages;
+	return mem;
+
+depopulate:
+	for (j = 0; j < i; j++)
+		__free_page(buf->page_array[j]);
+	kfree(buf->page_array);
+	return NULL;
+}
+
+/**
+ *	relay_create_buf - allocate and initialize a channel buffer
+ *	@alloc_size: size of the buffer to allocate
+ *	@n_subbufs: number of sub-buffers in the channel
+ *
+ *	Returns channel buffer if successful, NULL otherwise
+ */
+struct rchan_buf *relay_create_buf(struct rchan *chan)
+{
+	struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
+	if (!buf->padding)
+		goto free_buf;
+
+	buf->start = relay_alloc_buf(buf, chan->alloc_size);
+	if (!buf->start)
+		goto free_buf;
+
+	buf->chan = chan;
+	kref_get(&buf->chan->kref);
+	return buf;
+
+free_buf:
+	kfree(buf->padding);
+	kfree(buf);
+	return NULL;
+}
+
+/**
+ *	relay_destroy_channel - free the channel struct
+ *
+ *	Should only be called from kref_put().
+ */
+void relay_destroy_channel(struct kref *kref)
+{
+	struct rchan *chan = container_of(kref, struct rchan, kref);
+	kfree(chan);
+}
+
+/**
+ *	relay_destroy_buf - destroy an rchan_buf struct and associated buffer
+ *	@buf: the buffer struct
+ */
+void relay_destroy_buf(struct rchan_buf *buf)
+{
+	struct rchan *chan = buf->chan;
+	unsigned int i;
+
+	if (likely(buf->start)) {
+		vunmap(buf->start);
+		for (i = 0; i < buf->page_count; i++)
+			__free_page(buf->page_array[i]);
+		kfree(buf->page_array);
+	}
+	kfree(buf->padding);
+	kfree(buf);
+	kref_put(&chan->kref, relay_destroy_channel);
+}
+
+/**
+ *	relay_remove_buf - remove a channel buffer
+ *
+ *	Removes the file from the fileystem, which also frees the
+ *	rchan_buf_struct and the channel buffer.  Should only be called from
+ *	kref_put().
+ */
+void relay_remove_buf(struct kref *kref)
+{
+	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
+	buf->chan->cb->remove_buf_file(buf->dentry);
+	relay_destroy_buf(buf);
+}
+
+/**
+ *	relay_buf_empty - boolean, is the channel buffer empty?
+ *	@buf: channel buffer
+ *
+ *	Returns 1 if the buffer is empty, 0 otherwise.
+ */
+int relay_buf_empty(struct rchan_buf *buf)
+{
+	return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
+}
+EXPORT_SYMBOL_GPL(relay_buf_empty);
+
+/**
+ *	relay_buf_full - boolean, is the channel buffer full?
+ *	@buf: channel buffer
+ *
+ *	Returns 1 if the buffer is full, 0 otherwise.
+ */
+int relay_buf_full(struct rchan_buf *buf)
+{
+	size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
+	return (ready >= buf->chan->n_subbufs) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(relay_buf_full);
+
+/*
+ * High-level relay kernel API and associated functions.
+ */
+
+/*
+ * rchan_callback implementations defining default channel behavior.  Used
+ * in place of corresponding NULL values in client callback struct.
+ */
+
+/*
+ * subbuf_start() default callback.  Does nothing.
+ */
+static int subbuf_start_default_callback (struct rchan_buf *buf,
+					  void *subbuf,
+					  void *prev_subbuf,
+					  size_t prev_padding)
+{
+	if (relay_buf_full(buf))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * buf_mapped() default callback.  Does nothing.
+ */
+static void buf_mapped_default_callback(struct rchan_buf *buf,
+					struct file *filp)
+{
+}
+
+/*
+ * buf_unmapped() default callback.  Does nothing.
+ */
+static void buf_unmapped_default_callback(struct rchan_buf *buf,
+					  struct file *filp)
+{
+}
+
+/*
+ * create_buf_file_create() default callback.  Does nothing.
+ */
+static struct dentry *create_buf_file_default_callback(const char *filename,
+						       struct dentry *parent,
+						       int mode,
+						       struct rchan_buf *buf,
+						       int *is_global)
+{
+	return NULL;
+}
+
+/*
+ * remove_buf_file() default callback.  Does nothing.
+ */
+static int remove_buf_file_default_callback(struct dentry *dentry)
+{
+	return -EINVAL;
+}
+
+/* relay channel default callbacks */
+static struct rchan_callbacks default_channel_callbacks = {
+	.subbuf_start = subbuf_start_default_callback,
+	.buf_mapped = buf_mapped_default_callback,
+	.buf_unmapped = buf_unmapped_default_callback,
+	.create_buf_file = create_buf_file_default_callback,
+	.remove_buf_file = remove_buf_file_default_callback,
+};
+
+/**
+ *	wakeup_readers - wake up readers waiting on a channel
+ *	@private: the channel buffer
+ *
+ *	This is the work function used to defer reader waking.  The
+ *	reason waking is deferred is that calling directly from write
+ *	causes problems if you're writing from say the scheduler.
+ */
+static void wakeup_readers(void *private)
+{
+	struct rchan_buf *buf = private;
+	wake_up_interruptible(&buf->read_wait);
+}
+
+/**
+ *	__relay_reset - reset a channel buffer
+ *	@buf: the channel buffer
+ *	@init: 1 if this is a first-time initialization
+ *
+ *	See relay_reset for description of effect.
+ */
+static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
+{
+	size_t i;
+
+	if (init) {
+		init_waitqueue_head(&buf->read_wait);
+		kref_init(&buf->kref);
+		INIT_WORK(&buf->wake_readers, NULL, NULL);
+	} else {
+		cancel_delayed_work(&buf->wake_readers);
+		flush_scheduled_work();
+	}
+
+	buf->subbufs_produced = 0;
+	buf->subbufs_consumed = 0;
+	buf->bytes_consumed = 0;
+	buf->finalized = 0;
+	buf->data = buf->start;
+	buf->offset = 0;
+
+	for (i = 0; i < buf->chan->n_subbufs; i++)
+		buf->padding[i] = 0;
+
+	buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
+}
+
+/**
+ *	relay_reset - reset the channel
+ *	@chan: the channel
+ *
+ *	This has the effect of erasing all data from all channel buffers
+ *	and restarting the channel in its initial state.  The buffers
+ *	are not freed, so any mappings are still in effect.
+ *
+ *	NOTE: Care should be taken that the channel isn't actually
+ *	being used by anything when this call is made.
+ */
+void relay_reset(struct rchan *chan)
+{
+	unsigned int i;
+	struct rchan_buf *prev = NULL;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
+		__relay_reset(chan->buf[i], 0);
+		prev = chan->buf[i];
+	}
+}
+EXPORT_SYMBOL_GPL(relay_reset);
+
+/**
+ *	relay_open_buf - create a new relay channel buffer
+ *
+ *	Internal - used by relay_open().
+ */
+static struct rchan_buf *relay_open_buf(struct rchan *chan,
+					const char *filename,
+					struct dentry *parent,
+					int *is_global)
+{
+	struct rchan_buf *buf;
+	struct dentry *dentry;
+
+	if (*is_global)
+		return chan->buf[0];
+
+	buf = relay_create_buf(chan);
+	if (!buf)
+		return NULL;
+
+	/* Create file in fs */
+	dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
+					   buf, is_global);
+	if (!dentry) {
+		relay_destroy_buf(buf);
+		return NULL;
+	}
+
+	buf->dentry = dentry;
+	__relay_reset(buf, 1);
+
+	return buf;
+}
+
+/**
+ *	relay_close_buf - close a channel buffer
+ *	@buf: channel buffer
+ *
+ *	Marks the buffer finalized and restores the default callbacks.
+ *	The channel buffer and channel buffer data structure are then freed
+ *	automatically when the last reference is given up.
+ */
+static inline void relay_close_buf(struct rchan_buf *buf)
+{
+	buf->finalized = 1;
+	cancel_delayed_work(&buf->wake_readers);
+	flush_scheduled_work();
+	kref_put(&buf->kref, relay_remove_buf);
+}
+
+static inline void setup_callbacks(struct rchan *chan,
+				   struct rchan_callbacks *cb)
+{
+	if (!cb) {
+		chan->cb = &default_channel_callbacks;
+		return;
+	}
+
+	if (!cb->subbuf_start)
+		cb->subbuf_start = subbuf_start_default_callback;
+	if (!cb->buf_mapped)
+		cb->buf_mapped = buf_mapped_default_callback;
+	if (!cb->buf_unmapped)
+		cb->buf_unmapped = buf_unmapped_default_callback;
+	if (!cb->create_buf_file)
+		cb->create_buf_file = create_buf_file_default_callback;
+	if (!cb->remove_buf_file)
+		cb->remove_buf_file = remove_buf_file_default_callback;
+	chan->cb = cb;
+}
+
+/**
+ *	relay_open - create a new relay channel
+ *	@base_filename: base name of files to create
+ *	@parent: dentry of parent directory, NULL for root directory
+ *	@subbuf_size: size of sub-buffers
+ *	@n_subbufs: number of sub-buffers
+ *	@cb: client callback functions
+ *
+ *	Returns channel pointer if successful, NULL otherwise.
+ *
+ *	Creates a channel buffer for each cpu using the sizes and
+ *	attributes specified.  The created channel buffer files
+ *	will be named base_filename0...base_filenameN-1.  File
+ *	permissions will be S_IRUSR.
+ */
+struct rchan *relay_open(const char *base_filename,
+			 struct dentry *parent,
+			 size_t subbuf_size,
+			 size_t n_subbufs,
+			 struct rchan_callbacks *cb)
+{
+	unsigned int i;
+	struct rchan *chan;
+	char *tmpname;
+	int is_global = 0;
+
+	if (!base_filename)
+		return NULL;
+
+	if (!(subbuf_size && n_subbufs))
+		return NULL;
+
+	chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
+	if (!chan)
+		return NULL;
+
+	chan->version = RELAYFS_CHANNEL_VERSION;
+	chan->n_subbufs = n_subbufs;
+	chan->subbuf_size = subbuf_size;
+	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
+	setup_callbacks(chan, cb);
+	kref_init(&chan->kref);
+
+	tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+	if (!tmpname)
+		goto free_chan;
+
+	for_each_online_cpu(i) {
+		sprintf(tmpname, "%s%d", base_filename, i);
+		chan->buf[i] = relay_open_buf(chan, tmpname, parent,
+					      &is_global);
+		if (!chan->buf[i])
+			goto free_bufs;
+
+		chan->buf[i]->cpu = i;
+	}
+
+	kfree(tmpname);
+	return chan;
+
+free_bufs:
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i])
+			break;
+		relay_close_buf(chan->buf[i]);
+		if (is_global)
+			break;
+	}
+	kfree(tmpname);
+
+free_chan:
+	kref_put(&chan->kref, relay_destroy_channel);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(relay_open);
+
+/**
+ *	relay_switch_subbuf - switch to a new sub-buffer
+ *	@buf: channel buffer
+ *	@length: size of current event
+ *
+ *	Returns either the length passed in or 0 if full.
+ *
+ *	Performs sub-buffer-switch tasks such as invoking callbacks,
+ *	updating padding counts, waking up readers, etc.
+ */
+size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
+{
+	void *old, *new;
+	size_t old_subbuf, new_subbuf;
+
+	if (unlikely(length > buf->chan->subbuf_size))
+		goto toobig;
+
+	if (buf->offset != buf->chan->subbuf_size + 1) {
+		buf->prev_padding = buf->chan->subbuf_size - buf->offset;
+		old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
+		buf->padding[old_subbuf] = buf->prev_padding;
+		buf->subbufs_produced++;
+		if (waitqueue_active(&buf->read_wait)) {
+			PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
+			schedule_delayed_work(&buf->wake_readers, 1);
+		}
+	}
+
+	old = buf->data;
+	new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
+	new = buf->start + new_subbuf * buf->chan->subbuf_size;
+	buf->offset = 0;
+	if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
+		buf->offset = buf->chan->subbuf_size + 1;
+		return 0;
+	}
+	buf->data = new;
+	buf->padding[new_subbuf] = 0;
+
+	if (unlikely(length + buf->offset > buf->chan->subbuf_size))
+		goto toobig;
+
+	return length;
+
+toobig:
+	buf->chan->last_toobig = length;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(relay_switch_subbuf);
+
+/**
+ *	relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
+ *	@chan: the channel
+ *	@cpu: the cpu associated with the channel buffer to update
+ *	@subbufs_consumed: number of sub-buffers to add to current buf's count
+ *
+ *	Adds to the channel buffer's consumed sub-buffer count.
+ *	subbufs_consumed should be the number of sub-buffers newly consumed,
+ *	not the total consumed.
+ *
+ *	NOTE: kernel clients don't need to call this function if the channel
+ *	mode is 'overwrite'.
+ */
+void relay_subbufs_consumed(struct rchan *chan,
+			    unsigned int cpu,
+			    size_t subbufs_consumed)
+{
+	struct rchan_buf *buf;
+
+	if (!chan)
+		return;
+
+	if (cpu >= NR_CPUS || !chan->buf[cpu])
+		return;
+
+	buf = chan->buf[cpu];
+	buf->subbufs_consumed += subbufs_consumed;
+	if (buf->subbufs_consumed > buf->subbufs_produced)
+		buf->subbufs_consumed = buf->subbufs_produced;
+}
+EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
+
+/**
+ *	relay_close - close the channel
+ *	@chan: the channel
+ *
+ *	Closes all channel buffers and frees the channel.
+ */
+void relay_close(struct rchan *chan)
+{
+	unsigned int i;
+	struct rchan_buf *prev = NULL;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
+		relay_close_buf(chan->buf[i]);
+		prev = chan->buf[i];
+	}
+
+	if (chan->last_toobig)
+		printk(KERN_WARNING "relay: one or more items not logged "
+		       "[item size (%Zd) > sub-buffer size (%Zd)]\n",
+		       chan->last_toobig, chan->subbuf_size);
+
+	kref_put(&chan->kref, relay_destroy_channel);
+}
+EXPORT_SYMBOL_GPL(relay_close);
+
+/**
+ *	relay_flush - close the channel
+ *	@chan: the channel
+ *
+ *	Flushes all channel buffers i.e. forces buffer switch.
+ */
+void relay_flush(struct rchan *chan)
+{
+	unsigned int i;
+	struct rchan_buf *prev = NULL;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i] || chan->buf[i] == prev)
+			break;
+		relay_switch_subbuf(chan->buf[i], 0);
+		prev = chan->buf[i];
+	}
+}
+EXPORT_SYMBOL_GPL(relay_flush);
+
+/**
+ *	relay_file_open - open file op for relay files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Increments the channel buffer refcount.
+ */
+static int relay_file_open(struct inode *inode, struct file *filp)
+{
+	struct rchan_buf *buf = inode->u.generic_ip;
+	kref_get(&buf->kref);
+	filp->private_data = buf;
+
+	return 0;
+}
+
+/**
+ *	relay_file_mmap - mmap file op for relay files
+ *	@filp: the file
+ *	@vma: the vma describing what to map
+ *
+ *	Calls upon relay_mmap_buf to map the file into user space.
+ */
+static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct rchan_buf *buf = filp->private_data;
+	return relay_mmap_buf(buf, vma);
+}
+
+/**
+ *	relay_file_poll - poll file op for relay files
+ *	@filp: the file
+ *	@wait: poll table
+ *
+ *	Poll implemention.
+ */
+static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct rchan_buf *buf = filp->private_data;
+
+	if (buf->finalized)
+		return POLLERR;
+
+	if (filp->f_mode & FMODE_READ) {
+		poll_wait(filp, &buf->read_wait, wait);
+		if (!relay_buf_empty(buf))
+			mask |= POLLIN | POLLRDNORM;
+	}
+
+	return mask;
+}
+
+/**
+ *	relay_file_release - release file op for relay files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Decrements the channel refcount, as the filesystem is
+ *	no longer using it.
+ */
+static int relay_file_release(struct inode *inode, struct file *filp)
+{
+	struct rchan_buf *buf = filp->private_data;
+	kref_put(&buf->kref, relay_remove_buf);
+
+	return 0;
+}
+
+/**
+ *	relay_file_read_consume - update the consumed count for the buffer
+ */
+static void relay_file_read_consume(struct rchan_buf *buf,
+				    size_t read_pos,
+				    size_t bytes_consumed)
+{
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+	size_t read_subbuf;
+
+	if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
+		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
+		buf->bytes_consumed = 0;
+	}
+
+	buf->bytes_consumed += bytes_consumed;
+	read_subbuf = read_pos / buf->chan->subbuf_size;
+	if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
+		if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
+		    (buf->offset == subbuf_size))
+			return;
+		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
+		buf->bytes_consumed = 0;
+	}
+}
+
+/**
+ *	relay_file_read_avail - boolean, are there unconsumed bytes available?
+ */
+static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
+{
+	size_t bytes_produced, bytes_consumed, write_offset;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+	size_t produced = buf->subbufs_produced % n_subbufs;
+	size_t consumed = buf->subbufs_consumed % n_subbufs;
+
+	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
+
+	if (consumed > produced) {
+		if ((produced > n_subbufs) &&
+		    (produced + n_subbufs - consumed <= n_subbufs))
+			produced += n_subbufs;
+	} else if (consumed == produced) {
+		if (buf->offset > subbuf_size) {
+			produced += n_subbufs;
+			if (buf->subbufs_produced == buf->subbufs_consumed)
+				consumed += n_subbufs;
+		}
+	}
+
+	if (buf->offset > subbuf_size)
+		bytes_produced = (produced - 1) * subbuf_size + write_offset;
+	else
+		bytes_produced = produced * subbuf_size + write_offset;
+	bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
+
+	if (bytes_produced == bytes_consumed)
+		return 0;
+
+	relay_file_read_consume(buf, read_pos, 0);
+
+	return 1;
+}
+
+/**
+ *	relay_file_read_subbuf_avail - return bytes available in sub-buffer
+ */
+static size_t relay_file_read_subbuf_avail(size_t read_pos,
+					   struct rchan_buf *buf)
+{
+	size_t padding, avail = 0;
+	size_t read_subbuf, read_offset, write_subbuf, write_offset;
+	size_t subbuf_size = buf->chan->subbuf_size;
+
+	write_subbuf = (buf->data - buf->start) / subbuf_size;
+	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
+	read_subbuf = read_pos / subbuf_size;
+	read_offset = read_pos % subbuf_size;
+	padding = buf->padding[read_subbuf];
+
+	if (read_subbuf == write_subbuf) {
+		if (read_offset + padding < write_offset)
+			avail = write_offset - (read_offset + padding);
+	} else
+		avail = (subbuf_size - padding) - read_offset;
+
+	return avail;
+}
+
+/**
+ *	relay_file_read_start_pos - find the first available byte to read
+ *
+ *	If the read_pos is in the middle of padding, return the
+ *	position of the first actually available byte, otherwise
+ *	return the original value.
+ */
+static size_t relay_file_read_start_pos(size_t read_pos,
+					struct rchan_buf *buf)
+{
+	size_t read_subbuf, padding, padding_start, padding_end;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+
+	read_subbuf = read_pos / subbuf_size;
+	padding = buf->padding[read_subbuf];
+	padding_start = (read_subbuf + 1) * subbuf_size - padding;
+	padding_end = (read_subbuf + 1) * subbuf_size;
+	if (read_pos >= padding_start && read_pos < padding_end) {
+		read_subbuf = (read_subbuf + 1) % n_subbufs;
+		read_pos = read_subbuf * subbuf_size;
+	}
+
+	return read_pos;
+}
+
+/**
+ *	relay_file_read_end_pos - return the new read position
+ */
+static size_t relay_file_read_end_pos(struct rchan_buf *buf,
+				      size_t read_pos,
+				      size_t count)
+{
+	size_t read_subbuf, padding, end_pos;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+
+	read_subbuf = read_pos / subbuf_size;
+	padding = buf->padding[read_subbuf];
+	if (read_pos % subbuf_size + count + padding == subbuf_size)
+		end_pos = (read_subbuf + 1) * subbuf_size;
+	else
+		end_pos = read_pos + count;
+	if (end_pos >= subbuf_size * n_subbufs)
+		end_pos = 0;
+
+	return end_pos;
+}
+
+/**
+ *	relay_file_read - read file op for relay files
+ *	@filp: the file
+ *	@buffer: the userspace buffer
+ *	@count: number of bytes to read
+ *	@ppos: position to read from
+ *
+ *	Reads count bytes or the number of bytes available in the
+ *	current sub-buffer being read, whichever is smaller.
+ */
+static ssize_t relay_file_read(struct file *filp,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *ppos)
+{
+	struct rchan_buf *buf = filp->private_data;
+	struct inode *inode = filp->f_dentry->d_inode;
+	size_t read_start, avail;
+	ssize_t ret = 0;
+	void *from;
+
+	mutex_lock(&inode->i_mutex);
+	if(!relay_file_read_avail(buf, *ppos))
+		goto out;
+
+	read_start = relay_file_read_start_pos(*ppos, buf);
+	avail = relay_file_read_subbuf_avail(read_start, buf);
+	if (!avail)
+		goto out;
+
+	from = buf->start + read_start;
+	ret = count = min(count, avail);
+	if (copy_to_user(buffer, from, count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	relay_file_read_consume(buf, read_start, count);
+	*ppos = relay_file_read_end_pos(buf, read_start, count);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+struct file_operations relay_file_operations = {
+	.open		= relay_file_open,
+	.poll		= relay_file_poll,
+	.mmap		= relay_file_mmap,
+	.read		= relay_file_read,
+	.llseek		= no_llseek,
+	.release	= relay_file_release,
+};
+EXPORT_SYMBOL_GPL(relay_file_operations);
-- 
cgit v1.2.3


From 2056a782f8e7e65fd4bfd027506b4ce1c5e9ccd4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 23 Mar 2006 20:00:26 +0100
Subject: [PATCH] Block queue IO tracing support (blktrace) as of 2006-03-23

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/Kconfig                |  12 +
 block/Makefile               |   2 +
 block/blktrace.c             | 538 +++++++++++++++++++++++++++++++++++++++++++
 block/elevator.c             |   4 +
 block/ioctl.c                |   6 +
 block/ll_rw_blk.c            |  44 +++-
 drivers/block/cciss.c        |   2 +
 drivers/md/dm.c              |  13 +-
 fs/bio.c                     |   4 +
 fs/compat_ioctl.c            |   1 +
 include/linux/blkdev.h       |   3 +
 include/linux/blktrace_api.h | 277 ++++++++++++++++++++++
 include/linux/compat_ioctl.h |   4 +
 include/linux/fs.h           |   4 +
 include/linux/sched.h        |   1 +
 kernel/fork.c                |   1 +
 mm/highmem.c                 |   3 +
 17 files changed, 916 insertions(+), 3 deletions(-)
 create mode 100644 block/blktrace.c
 create mode 100644 include/linux/blktrace_api.h

(limited to 'fs')

diff --git a/block/Kconfig b/block/Kconfig
index 377f6dd20e17..96783645092d 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -11,4 +11,16 @@ config LBD
 	  your machine, or if you want to have a raid or loopback device
 	  bigger than 2TB.  Otherwise say N.
 
+config BLK_DEV_IO_TRACE
+	bool "Support for tracing block io actions"
+	select RELAY
+	select DEBUG_FS
+	help
+	  Say Y here, if you want to be able to trace the block layer actions
+	  on a given queue. Tracing allows you to see any traffic happening
+	  on a block device queue. For more information (and the user space
+	  support tools needed), fetch the blktrace app from:
+
+	  git://brick.kernel.dk/data/git/blktrace.git
+
 source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index 7e4f93e2b44e..c05de0e0037f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,3 +8,5 @@ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
+
+obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
diff --git a/block/blktrace.c b/block/blktrace.c
new file mode 100644
index 000000000000..36f3a172275f
--- /dev/null
+++ b/block/blktrace.c
@@ -0,0 +1,538 @@
+/*
+ * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/blktrace_api.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+
+static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
+static unsigned int blktrace_seq __read_mostly = 1;
+
+/*
+ * Send out a notify for this process, if we haven't done so since a trace
+ * started
+ */
+static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
+{
+	struct blk_io_trace *t;
+
+	t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
+	if (t) {
+		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+		t->device = bt->dev;
+		t->action = BLK_TC_ACT(BLK_TC_NOTIFY);
+		t->pid = tsk->pid;
+		t->cpu = smp_processor_id();
+		t->pdu_len = sizeof(tsk->comm);
+		memcpy((void *) t + sizeof(*t), tsk->comm, t->pdu_len);
+		tsk->btrace_seq = blktrace_seq;
+	}
+}
+
+static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
+			 pid_t pid)
+{
+	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
+		return 1;
+	if (sector < bt->start_lba || sector > bt->end_lba)
+		return 1;
+	if (bt->pid && pid != bt->pid)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Data direction bit lookup
+ */
+static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
+
+/*
+ * Bio action bits of interest
+ */
+static u32 bio_act[3] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC) };
+
+/*
+ * More could be added as needed, taking care to increment the decrementer
+ * to get correct indexing
+ */
+#define trace_barrier_bit(rw)	\
+	(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
+#define trace_sync_bit(rw)	\
+	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
+
+/*
+ * The worker for the various blk_add_trace*() types. Fills out a
+ * blk_io_trace structure and places it in a per-cpu subbuffer.
+ */
+void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+		     int rw, u32 what, int error, int pdu_len, void *pdu_data)
+{
+	struct task_struct *tsk = current;
+	struct blk_io_trace *t;
+	unsigned long flags;
+	unsigned long *sequence;
+	pid_t pid;
+	int cpu;
+
+	if (unlikely(bt->trace_state != Blktrace_running))
+		return;
+
+	what |= ddir_act[rw & WRITE];
+	what |= bio_act[trace_barrier_bit(rw)];
+	what |= bio_act[trace_sync_bit(rw)];
+
+	pid = tsk->pid;
+	if (unlikely(act_log_check(bt, what, sector, pid)))
+		return;
+
+	/*
+	 * A word about the locking here - we disable interrupts to reserve
+	 * some space in the relay per-cpu buffer, to prevent an irq
+	 * from coming in and stepping on our toes. Once reserved, it's
+	 * enough to get preemption disabled to prevent read of this data
+	 * before we are through filling it. get_cpu()/put_cpu() does this
+	 * for us
+	 */
+	local_irq_save(flags);
+
+	if (unlikely(tsk->btrace_seq != blktrace_seq))
+		trace_note_tsk(bt, tsk);
+
+	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
+	if (t) {
+		cpu = smp_processor_id();
+		sequence = per_cpu_ptr(bt->sequence, cpu);
+
+		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+		t->sequence = ++(*sequence);
+		t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
+		t->sector = sector;
+		t->bytes = bytes;
+		t->action = what;
+		t->pid = pid;
+		t->device = bt->dev;
+		t->cpu = cpu;
+		t->error = error;
+		t->pdu_len = pdu_len;
+
+		if (pdu_len)
+			memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
+	}
+
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(__blk_add_trace);
+
+static struct dentry *blk_tree_root;
+static struct mutex blk_tree_mutex;
+static unsigned int root_users;
+
+static inline void blk_remove_root(void)
+{
+	if (blk_tree_root) {
+		debugfs_remove(blk_tree_root);
+		blk_tree_root = NULL;
+	}
+}
+
+static void blk_remove_tree(struct dentry *dir)
+{
+	mutex_lock(&blk_tree_mutex);
+	debugfs_remove(dir);
+	if (--root_users == 0)
+		blk_remove_root();
+	mutex_unlock(&blk_tree_mutex);
+}
+
+static struct dentry *blk_create_tree(const char *blk_name)
+{
+	struct dentry *dir = NULL;
+
+	mutex_lock(&blk_tree_mutex);
+
+	if (!blk_tree_root) {
+		blk_tree_root = debugfs_create_dir("block", NULL);
+		if (!blk_tree_root)
+			goto err;
+	}
+
+	dir = debugfs_create_dir(blk_name, blk_tree_root);
+	if (dir)
+		root_users++;
+	else
+		blk_remove_root();
+
+err:
+	mutex_unlock(&blk_tree_mutex);
+	return dir;
+}
+
+static void blk_trace_cleanup(struct blk_trace *bt)
+{
+	relay_close(bt->rchan);
+	debugfs_remove(bt->dropped_file);
+	blk_remove_tree(bt->dir);
+	free_percpu(bt->sequence);
+	kfree(bt);
+}
+
+static int blk_trace_remove(request_queue_t *q)
+{
+	struct blk_trace *bt;
+
+	bt = xchg(&q->blk_trace, NULL);
+	if (!bt)
+		return -EINVAL;
+
+	if (bt->trace_state == Blktrace_setup ||
+	    bt->trace_state == Blktrace_stopped)
+		blk_trace_cleanup(bt);
+
+	return 0;
+}
+
+static int blk_dropped_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->u.generic_ip;
+
+	return 0;
+}
+
+static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	struct blk_trace *bt = filp->private_data;
+	char buf[16];
+
+	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
+
+	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations blk_dropped_fops = {
+	.owner =	THIS_MODULE,
+	.open =		blk_dropped_open,
+	.read =		blk_dropped_read,
+};
+
+/*
+ * Keep track of how many times we encountered a full subbuffer, to aid
+ * the user space app in telling how many lost events there were.
+ */
+static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+				     void *prev_subbuf, size_t prev_padding)
+{
+	struct blk_trace *bt;
+
+	if (!relay_buf_full(buf))
+		return 1;
+
+	bt = buf->chan->private_data;
+	atomic_inc(&bt->dropped);
+	return 0;
+}
+
+static int blk_remove_buf_file_callback(struct dentry *dentry)
+{
+	debugfs_remove(dentry);
+	return 0;
+}
+
+static struct dentry *blk_create_buf_file_callback(const char *filename,
+						   struct dentry *parent,
+						   int mode,
+						   struct rchan_buf *buf,
+						   int *is_global)
+{
+	return debugfs_create_file(filename, mode, parent, buf,
+					&relay_file_operations);
+}
+
+static struct rchan_callbacks blk_relay_callbacks = {
+	.subbuf_start		= blk_subbuf_start_callback,
+	.create_buf_file	= blk_create_buf_file_callback,
+	.remove_buf_file	= blk_remove_buf_file_callback,
+};
+
+/*
+ * Setup everything required to start tracing
+ */
+static int blk_trace_setup(request_queue_t *q, struct block_device *bdev,
+			   char __user *arg)
+{
+	struct blk_user_trace_setup buts;
+	struct blk_trace *old_bt, *bt = NULL;
+	struct dentry *dir = NULL;
+	char b[BDEVNAME_SIZE];
+	int ret, i;
+
+	if (copy_from_user(&buts, arg, sizeof(buts)))
+		return -EFAULT;
+
+	if (!buts.buf_size || !buts.buf_nr)
+		return -EINVAL;
+
+	strcpy(buts.name, bdevname(bdev, b));
+
+	/*
+	 * some device names have larger paths - convert the slashes
+	 * to underscores for this to work as expected
+	 */
+	for (i = 0; i < strlen(buts.name); i++)
+		if (buts.name[i] == '/')
+			buts.name[i] = '_';
+
+	if (copy_to_user(arg, &buts, sizeof(buts)))
+		return -EFAULT;
+
+	ret = -ENOMEM;
+	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
+	if (!bt)
+		goto err;
+
+	bt->sequence = alloc_percpu(unsigned long);
+	if (!bt->sequence)
+		goto err;
+
+	ret = -ENOENT;
+	dir = blk_create_tree(buts.name);
+	if (!dir)
+		goto err;
+
+	bt->dir = dir;
+	bt->dev = bdev->bd_dev;
+	atomic_set(&bt->dropped, 0);
+
+	ret = -EIO;
+	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+	if (!bt->dropped_file)
+		goto err;
+
+	bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
+	if (!bt->rchan)
+		goto err;
+	bt->rchan->private_data = bt;
+
+	bt->act_mask = buts.act_mask;
+	if (!bt->act_mask)
+		bt->act_mask = (u16) -1;
+
+	bt->start_lba = buts.start_lba;
+	bt->end_lba = buts.end_lba;
+	if (!bt->end_lba)
+		bt->end_lba = -1ULL;
+
+	bt->pid = buts.pid;
+	bt->trace_state = Blktrace_setup;
+
+	ret = -EBUSY;
+	old_bt = xchg(&q->blk_trace, bt);
+	if (old_bt) {
+		(void) xchg(&q->blk_trace, old_bt);
+		goto err;
+	}
+
+	return 0;
+err:
+	if (dir)
+		blk_remove_tree(dir);
+	if (bt) {
+		if (bt->dropped_file)
+			debugfs_remove(bt->dropped_file);
+		if (bt->sequence)
+			free_percpu(bt->sequence);
+		if (bt->rchan)
+			relay_close(bt->rchan);
+		kfree(bt);
+	}
+	return ret;
+}
+
+static int blk_trace_startstop(request_queue_t *q, int start)
+{
+	struct blk_trace *bt;
+	int ret;
+
+	if ((bt = q->blk_trace) == NULL)
+		return -EINVAL;
+
+	/*
+	 * For starting a trace, we can transition from a setup or stopped
+	 * trace. For stopping a trace, the state must be running
+	 */
+	ret = -EINVAL;
+	if (start) {
+		if (bt->trace_state == Blktrace_setup ||
+		    bt->trace_state == Blktrace_stopped) {
+			blktrace_seq++;
+			smp_mb();
+			bt->trace_state = Blktrace_running;
+			ret = 0;
+		}
+	} else {
+		if (bt->trace_state == Blktrace_running) {
+			bt->trace_state = Blktrace_stopped;
+			relay_flush(bt->rchan);
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * blk_trace_ioctl: - handle the ioctls associated with tracing
+ * @bdev:	the block device
+ * @cmd: 	the ioctl cmd
+ * @arg:	the argument data, if any
+ *
+ **/
+int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+{
+	request_queue_t *q;
+	int ret, start = 0;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	mutex_lock(&bdev->bd_mutex);
+
+	switch (cmd) {
+	case BLKTRACESETUP:
+		ret = blk_trace_setup(q, bdev, arg);
+		break;
+	case BLKTRACESTART:
+		start = 1;
+	case BLKTRACESTOP:
+		ret = blk_trace_startstop(q, start);
+		break;
+	case BLKTRACETEARDOWN:
+		ret = blk_trace_remove(q);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
+	return ret;
+}
+
+/**
+ * blk_trace_shutdown: - stop and cleanup trace structures
+ * @q:    the request queue associated with the device
+ *
+ **/
+void blk_trace_shutdown(request_queue_t *q)
+{
+	blk_trace_startstop(q, 0);
+	blk_trace_remove(q);
+}
+
+/*
+ * Average offset over two calls to sched_clock() with a gettimeofday()
+ * in the middle
+ */
+static void blk_check_time(unsigned long long *t)
+{
+	unsigned long long a, b;
+	struct timeval tv;
+
+	a = sched_clock();
+	do_gettimeofday(&tv);
+	b = sched_clock();
+
+	*t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
+	*t -= (a + b) / 2;
+}
+
+static void blk_trace_check_cpu_time(void *data)
+{
+	unsigned long long *t;
+	int cpu = get_cpu();
+
+	t = &per_cpu(blk_trace_cpu_offset, cpu);
+
+	/*
+	 * Just call it twice, hopefully the second call will be cache hot
+	 * and a little more precise
+	 */
+	blk_check_time(t);
+	blk_check_time(t);
+
+	put_cpu();
+}
+
+/*
+ * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
+ * timings
+ */
+static void blk_trace_calibrate_offsets(void)
+{
+	unsigned long flags;
+
+	smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
+	local_irq_save(flags);
+	blk_trace_check_cpu_time(NULL);
+	local_irq_restore(flags);
+}
+
+static void blk_trace_set_ht_offsets(void)
+{
+#if defined(CONFIG_SCHED_SMT)
+	int cpu, i;
+
+	/*
+	 * now make sure HT siblings have the same time offset
+	 */
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		unsigned long long *cpu_off, *sibling_off;
+
+		for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
+			if (i == cpu)
+				continue;
+
+			cpu_off = &per_cpu(blk_trace_cpu_offset, cpu);
+			sibling_off = &per_cpu(blk_trace_cpu_offset, i);
+			*sibling_off = *cpu_off;
+		}
+	}
+	preempt_enable();
+#endif
+}
+
+static __init int blk_trace_init(void)
+{
+	mutex_init(&blk_tree_mutex);
+	blk_trace_calibrate_offsets();
+	blk_trace_set_ht_offsets();
+
+	return 0;
+}
+
+module_init(blk_trace_init);
+
diff --git a/block/elevator.c b/block/elevator.c
index db3d0d8296a0..5e558c4689a4 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
+#include <linux/blktrace_api.h>
 
 #include <asm/uaccess.h>
 
@@ -333,6 +334,8 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 	struct list_head *pos;
 	unsigned ordseq;
 
+	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+
 	rq->q = q;
 
 	switch (where) {
@@ -499,6 +502,7 @@ struct request *elv_next_request(request_queue_t *q)
 			 * not be passed by new incoming requests
 			 */
 			rq->flags |= REQ_STARTED;
+			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
diff --git a/block/ioctl.c b/block/ioctl.c
index 35fdb7dc6512..9cfa2e1ecb24 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -5,6 +5,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
+#include <linux/blktrace_api.h>
 #include <asm/uaccess.h>
 
 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
@@ -189,6 +190,11 @@ static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev,
 		return put_ulong(arg, bdev->bd_inode->i_size >> 9);
 	case BLKGETSIZE64:
 		return put_u64(arg, bdev->bd_inode->i_size);
+	case BLKTRACESTART:
+	case BLKTRACESTOP:
+	case BLKTRACESETUP:
+	case BLKTRACETEARDOWN:
+		return blk_trace_ioctl(bdev, cmd, (char __user *) arg);
 	}
 	return -ENOIOCTLCMD;
 }
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 6c793b196aa9..062067fa7ead 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -28,6 +28,7 @@
 #include <linux/writeback.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/blktrace_api.h>
 
 /*
  * for max sense size
@@ -1556,8 +1557,10 @@ void blk_plug_device(request_queue_t *q)
 	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
 		return;
 
-	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
+		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+	}
 }
 
 EXPORT_SYMBOL(blk_plug_device);
@@ -1621,14 +1624,21 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 	/*
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
-	if (q->unplug_fn)
+	if (q->unplug_fn) {
+		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+					q->rq.count[READ] + q->rq.count[WRITE]);
+
 		q->unplug_fn(q);
+	}
 }
 
 static void blk_unplug_work(void *data)
 {
 	request_queue_t *q = data;
 
+	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+				q->rq.count[READ] + q->rq.count[WRITE]);
+
 	q->unplug_fn(q);
 }
 
@@ -1636,6 +1646,9 @@ static void blk_unplug_timeout(unsigned long data)
 {
 	request_queue_t *q = (request_queue_t *)data;
 
+	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
+				q->rq.count[READ] + q->rq.count[WRITE]);
+
 	kblockd_schedule_work(&q->unplug_work);
 }
 
@@ -1753,6 +1766,9 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
+	if (q->blk_trace)
+		blk_trace_shutdown(q);
+
 	kmem_cache_free(requestq_cachep, q);
 }
 
@@ -2129,6 +2145,8 @@ rq_starved:
 	
 	rq_init(q, rq);
 	rq->rl = rl;
+
+	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
 	return rq;
 }
@@ -2157,6 +2175,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 		if (!rq) {
 			struct io_context *ioc;
 
+			blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+
 			__generic_unplug_device(q);
 			spin_unlock_irq(q->queue_lock);
 			io_schedule();
@@ -2210,6 +2230,8 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(request_queue_t *q, struct request *rq)
 {
+	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 
@@ -2844,6 +2866,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			if (!q->back_merge_fn(q, req, bio))
 				break;
 
+			blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
@@ -2859,6 +2883,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			if (!q->front_merge_fn(q, req, bio))
 				break;
 
+			blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+
 			bio->bi_next = req->bio;
 			req->bio = bio;
 
@@ -2976,6 +3002,7 @@ void generic_make_request(struct bio *bio)
 	request_queue_t *q;
 	sector_t maxsector;
 	int ret, nr_sectors = bio_sectors(bio);
+	dev_t old_dev;
 
 	might_sleep();
 	/* Test device or partition size, when known. */
@@ -3002,6 +3029,8 @@ void generic_make_request(struct bio *bio)
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
+	maxsector = -1;
+	old_dev = 0;
 	do {
 		char b[BDEVNAME_SIZE];
 
@@ -3034,6 +3063,15 @@ end_io:
 		 */
 		blk_partition_remap(bio);
 
+		if (maxsector != -1)
+			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
+					    maxsector);
+
+		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+
+		maxsector = bio->bi_sector;
+		old_dev = bio->bi_bdev->bd_dev;
+
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 }
@@ -3153,6 +3191,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
 	int total_bytes, bio_nbytes, error, next_idx = 0;
 	struct bio *bio;
 
+	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+
 	/*
 	 * extend uptodate bool to allow < 0 value to be direct io error
 	 */
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e29b8926f80e..1f2890989b56 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -38,6 +38,7 @@
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/blktrace_api.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
@@ -2331,6 +2332,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd,
 
 	cmd->rq->completion_data = cmd;
 	cmd->rq->errors = status;
+	blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
 	blk_complete_request(cmd->rq);
 }
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 26b08ee425c7..8c82373f7ff3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -17,6 +17,7 @@
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/blktrace_api.h>
 
 static const char *_name = DM_NAME;
 
@@ -334,6 +335,8 @@ static void dec_pending(struct dm_io *io, int error)
 			/* nudge anyone waiting on suspend queue */
 			wake_up(&io->md->wait);
 
+		blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE);
+
 		bio_endio(io->bio, io->bio->bi_size, io->error);
 		free_io(io->md, io);
 	}
@@ -392,6 +395,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 		      struct target_io *tio)
 {
 	int r;
+	sector_t sector;
 
 	/*
 	 * Sanity checks.
@@ -407,10 +411,17 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 	 * this io.
 	 */
 	atomic_inc(&tio->io->io_count);
+	sector = clone->bi_sector;
 	r = ti->type->map(ti, clone, &tio->info);
-	if (r > 0)
+	if (r > 0) {
 		/* the bio has been remapped so dispatch it */
+
+		blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 
+				    tio->io->bio->bi_bdev->bd_dev, sector, 
+				    clone->bi_sector);
+
 		generic_make_request(clone);
+	}
 
 	else if (r < 0) {
 		/* error the io and bail out */
diff --git a/fs/bio.c b/fs/bio.c
index 8f1d2e815c96..0a8c59cb68f5 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
+#include <linux/blktrace_api.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
 #define BIO_POOL_SIZE 256
@@ -1095,6 +1096,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	if (!bp)
 		return bp;
 
+	blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
+				bi->bi_sector + first_sectors);
+
 	BUG_ON(bi->bi_vcnt != 1);
 	BUG_ON(bi->bi_idx != 0);
 	atomic_set(&bp->cnt, 3);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c666769a875d..7c031f00fd79 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -72,6 +72,7 @@
 #include <linux/i2c-dev.h>
 #include <linux/wireless.h>
 #include <linux/atalk.h>
+#include <linux/blktrace_api.h>
 
 #include <net/sock.h>          /* siocdevprivate_ioctl */
 #include <net/bluetooth/bluetooth.h>
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 56bb6a4e15f3..c179966f1a2f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -22,6 +22,7 @@ typedef struct request_queue request_queue_t;
 struct elevator_queue;
 typedef struct elevator_queue elevator_t;
 struct request_pm_state;
+struct blk_trace;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -416,6 +417,8 @@ struct request_queue
 	unsigned int		sg_reserved_size;
 	int			node;
 
+	struct blk_trace	*blk_trace;
+
 	/*
 	 * reserved for flush operations
 	 */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
new file mode 100644
index 000000000000..b34d3e73d5ea
--- /dev/null
+++ b/include/linux/blktrace_api.h
@@ -0,0 +1,277 @@
+#ifndef BLKTRACE_H
+#define BLKTRACE_H
+
+#include <linux/config.h>
+#include <linux/blkdev.h>
+#include <linux/relay.h>
+
+/*
+ * Trace categories
+ */
+enum blktrace_cat {
+	BLK_TC_READ	= 1 << 0,	/* reads */
+	BLK_TC_WRITE	= 1 << 1,	/* writes */
+	BLK_TC_BARRIER	= 1 << 2,	/* barrier */
+	BLK_TC_SYNC	= 1 << 3,	/* barrier */
+	BLK_TC_QUEUE	= 1 << 4,	/* queueing/merging */
+	BLK_TC_REQUEUE	= 1 << 5,	/* requeueing */
+	BLK_TC_ISSUE	= 1 << 6,	/* issue */
+	BLK_TC_COMPLETE	= 1 << 7,	/* completions */
+	BLK_TC_FS	= 1 << 8,	/* fs requests */
+	BLK_TC_PC	= 1 << 9,	/* pc requests */
+	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
+
+	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
+};
+
+#define BLK_TC_SHIFT		(16)
+#define BLK_TC_ACT(act)		((act) << BLK_TC_SHIFT)
+
+/*
+ * Basic trace actions
+ */
+enum blktrace_act {
+	__BLK_TA_QUEUE = 1,		/* queued */
+	__BLK_TA_BACKMERGE,		/* back merged to existing rq */
+	__BLK_TA_FRONTMERGE,		/* front merge to existing rq */
+	__BLK_TA_GETRQ,			/* allocated new request */
+	__BLK_TA_SLEEPRQ,		/* sleeping on rq allocation */
+	__BLK_TA_REQUEUE,		/* request requeued */
+	__BLK_TA_ISSUE,			/* sent to driver */
+	__BLK_TA_COMPLETE,		/* completed by driver */
+	__BLK_TA_PLUG,			/* queue was plugged */
+	__BLK_TA_UNPLUG_IO,		/* queue was unplugged by io */
+	__BLK_TA_UNPLUG_TIMER,		/* queue was unplugged by timer */
+	__BLK_TA_INSERT,		/* insert request */
+	__BLK_TA_SPLIT,			/* bio was split */
+	__BLK_TA_BOUNCE,		/* bio was bounced */
+	__BLK_TA_REMAP,			/* bio was remapped */
+};
+
+/*
+ * Trace actions in full. Additionally, read or write is masked
+ */
+#define BLK_TA_QUEUE		(__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_BACKMERGE	(__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_FRONTMERGE	(__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_GETRQ		(__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_SLEEPRQ		(__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_REQUEUE		(__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
+#define BLK_TA_ISSUE		(__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
+#define BLK_TA_COMPLETE		(__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
+#define BLK_TA_PLUG		(__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_IO	(__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_TIMER	(__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_INSERT		(__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
+#define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
+#define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+
+#define BLK_IO_TRACE_MAGIC	0x65617400
+#define BLK_IO_TRACE_VERSION	0x07
+
+/*
+ * The trace itself
+ */
+struct blk_io_trace {
+	u32 magic;		/* MAGIC << 8 | version */
+	u32 sequence;		/* event number */
+	u64 time;		/* in microseconds */
+	u64 sector;		/* disk offset */
+	u32 bytes;		/* transfer length */
+	u32 action;		/* what happened */
+	u32 pid;		/* who did it */
+	u32 device;		/* device number */
+	u32 cpu;		/* on what cpu did it happen */
+	u16 error;		/* completion error */
+	u16 pdu_len;		/* length of data after this trace */
+};
+
+/*
+ * The remap event
+ */
+struct blk_io_trace_remap {
+	u32 device;
+	u32 __pad;
+	u64 sector;
+};
+
+enum {
+	Blktrace_setup = 1,
+	Blktrace_running,
+	Blktrace_stopped,
+};
+
+struct blk_trace {
+	int trace_state;
+	struct rchan *rchan;
+	unsigned long *sequence;
+	u16 act_mask;
+	u64 start_lba;
+	u64 end_lba;
+	u32 pid;
+	u32 dev;
+	struct dentry *dir;
+	struct dentry *dropped_file;
+	atomic_t dropped;
+};
+
+/*
+ * User setup structure passed with BLKTRACESTART
+ */
+struct blk_user_trace_setup {
+	char name[BDEVNAME_SIZE];	/* output */
+	u16 act_mask;			/* input */
+	u32 buf_size;			/* input */
+	u32 buf_nr;			/* input */
+	u64 start_lba;
+	u64 end_lba;
+	u32 pid;
+};
+
+#if defined(CONFIG_BLK_DEV_IO_TRACE)
+extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
+extern void blk_trace_shutdown(request_queue_t *);
+extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
+
+/**
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+				    u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+	int rw = rq->flags & 0x07;
+
+	if (likely(!bt))
+		return;
+
+	if (blk_pc_request(rq)) {
+		what |= BLK_TC_ACT(BLK_TC_PC);
+		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
+	} else  {
+		what |= BLK_TC_ACT(BLK_TC_FS);
+		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
+	}
+}
+
+/**
+ * blk_add_trace_bio - Add a trace for a bio oriented action
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+				     u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+/**
+ * blk_add_trace_generic - Add a trace for a generic action
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @rw:		the data direction
+ * @what:	the action
+ *
+ * Description:
+ *     Records a simple trace
+ *
+ **/
+static inline void blk_add_trace_generic(struct request_queue *q,
+					 struct bio *bio, int rw, u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	if (bio)
+		blk_add_trace_bio(q, bio, what);
+	else
+		__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
+}
+
+/**
+ * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
+ * @q:		queue the io is for
+ * @what:	the action
+ * @bio:	the source bio
+ * @pdu:	the integer payload
+ *
+ * Description:
+ *     Adds a trace with some integer payload. This might be an unplug
+ *     option given as the action, with the depth at unplug time given
+ *     as the payload
+ *
+ **/
+static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
+					 struct bio *bio, unsigned int pdu)
+{
+	struct blk_trace *bt = q->blk_trace;
+	u64 rpdu = cpu_to_be64(pdu);
+
+	if (likely(!bt))
+		return;
+
+	if (bio)
+		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
+	else
+		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
+}
+
+/**
+ * blk_add_trace_remap - Add a trace for a remap operation
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @dev:	target device
+ * @from:	source sector
+ * @to:		target sector
+ *
+ * Description:
+ *     Device mapper or raid target sometimes need to split a bio because
+ *     it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+				       dev_t dev, sector_t from, sector_t to)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device = cpu_to_be32(dev);
+	r.sector = cpu_to_be64(to);
+
+	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+#else /* !CONFIG_BLK_DEV_IO_TRACE */
+#define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+#define blk_trace_shutdown(q)			do { } while (0)
+#define blk_add_trace_rq(q, rq, what)		do { } while (0)
+#define blk_add_trace_bio(q, rq, what)		do { } while (0)
+#define blk_add_trace_generic(q, rq, rw, what)	do { } while (0)
+#define blk_add_trace_pdu_int(q, what, bio, pdu)	do { } while (0)
+#define blk_add_trace_remap(q, bio, dev, f, t)	do {} while (0)
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#endif
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index ae7dfb790df3..efb518f16bb3 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -97,6 +97,10 @@ COMPATIBLE_IOCTL(BLKRRPART)
 COMPATIBLE_IOCTL(BLKFLSBUF)
 COMPATIBLE_IOCTL(BLKSECTSET)
 COMPATIBLE_IOCTL(BLKSSZGET)
+COMPATIBLE_IOCTL(BLKTRACESTART)
+COMPATIBLE_IOCTL(BLKTRACESTOP)
+COMPATIBLE_IOCTL(BLKTRACESETUP)
+COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
 ULONG_IOCTL(BLKRASET)
 ULONG_IOCTL(BLKFRASET)
 /* RAID */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f9c9dea636d0..9b34a1b03455 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -197,6 +197,10 @@ extern int dir_notify_enable;
 #define BLKBSZGET  _IOR(0x12,112,size_t)
 #define BLKBSZSET  _IOW(0x12,113,size_t)
 #define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62e6314382f0..e60a91d5b369 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -706,6 +706,7 @@ struct task_struct {
 	prio_array_t *array;
 
 	unsigned short ioprio;
+	unsigned int btrace_seq;
 
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
diff --git a/kernel/fork.c b/kernel/fork.c
index c79ae0b19a49..c21bae8c93b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -181,6 +181,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
+	tsk->btrace_seq = 0;
 	return tsk;
 }
 
diff --git a/mm/highmem.c b/mm/highmem.c
index ce2e7e8bbfa7..d0ea1eec6a9a 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
+#include <linux/blktrace_api.h>
 #include <asm/tlbflush.h>
 
 static mempool_t *page_pool, *isa_page_pool;
@@ -483,6 +484,8 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
 		pool = isa_page_pool;
 	}
 
+	blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
 	/*
 	 * slow path
 	 */
-- 
cgit v1.2.3


From 9b04c997b1120feefa1e6ee8e2902270bc055cd2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 24 Mar 2006 03:15:10 -0800
Subject: [PATCH] vfs: MS_VERBOSE should be MS_SILENT

The meaning of MS_VERBOSE is backwards; if the bit is set, it really means,
"don't be verbose".  This is confusing and counter-intuitive.

In addition, there is also no way to set the MS_VERBOSE flag in the
mount(8) program in util-linux, but interesting, it does define options
which would do the right thing if MS_SILENT were defined, which
unfortunately we do not:

#ifdef MS_SILENT
  { "quiet",    0, 0, MS_SILENT    },   /* be quiet  */
  { "loud",     0, 1, MS_SILENT    },   /* print out messages. */
#endif

So the obvious fix is to deprecate the use of MS_VERBOSE and replace it
with MS_SILENT.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/super.c     | 2 +-
 fs/cifs/cifsfs.c   | 2 +-
 fs/jffs2/super.c   | 4 ++--
 fs/nfs/inode.c     | 4 ++--
 fs/super.c         | 6 +++---
 include/linux/fs.h | 4 +++-
 init/do_mounts.c   | 2 +-
 7 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/super.c b/fs/afs/super.c
index d6fa8e5999df..53c56e7231ab 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -341,7 +341,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
 
 	sb->s_flags = flags;
 
-	ret = afs_fill_super(sb, &params, flags & MS_VERBOSE ? 1 : 0);
+	ret = afs_fill_super(sb, &params, flags & MS_SILENT ? 1 : 0);
 	if (ret < 0) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 79eeccd0437f..1cd044ce82a6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -479,7 +479,7 @@ cifs_get_sb(struct file_system_type *fs_type,
 
 	sb->s_flags = flags;
 
-	rc = cifs_read_super(sb, data, dev_name, flags & MS_VERBOSE ? 1 : 0);
+	rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
 	if (rc) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 93883817cbd0..c8fac352a4cf 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -152,7 +152,7 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
 	sb->s_op = &jffs2_super_operations;
 	sb->s_flags = flags | MS_NOATIME;
 
-	ret = jffs2_do_fill_super(sb, data, (flags&MS_VERBOSE)?1:0);
+	ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 
 	if (ret) {
 		/* Failure case... */
@@ -257,7 +257,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 	}
 
 	if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) {
-		if (!(flags & MS_VERBOSE)) /* Yes I mean this. Strangely */
+		if (!(flags & MS_SILENT))
 			printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n",
 			       dev_name);
 		goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95b7efb..37e55c328ebc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1679,7 +1679,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
@@ -1996,7 +1996,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
diff --git a/fs/super.c b/fs/super.c
index 425861cb1caa..37554b876182 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -712,7 +712,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
@@ -756,7 +756,7 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
@@ -785,7 +785,7 @@ struct super_block *get_sb_single(struct file_system_type *fs_type,
 		return s;
 	if (!s->s_root) {
 		s->s_flags = flags;
-		error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b34a1b03455..65e6df247ea5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -103,7 +103,9 @@ extern int dir_notify_enable;
 #define MS_BIND		4096
 #define MS_MOVE		8192
 #define MS_REC		16384
-#define MS_VERBOSE	32768
+#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
+				   MS_VERBOSE is deprecated. */
+#define MS_SILENT	32768
 #define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
 #define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
 #define MS_PRIVATE	(1<<18)	/* change to private */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index b27c11064409..8b671fe68afa 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -19,7 +19,7 @@ extern int get_filesystem_list(char * buf);
 
 int __initdata rd_doload;	/* 1 = load RAM disk, 0 = don't load */
 
-int root_mountflags = MS_RDONLY | MS_VERBOSE;
+int root_mountflags = MS_RDONLY | MS_SILENT;
 char * __initdata root_device_name;
 static char __initdata saved_root_name[64];
 
-- 
cgit v1.2.3


From ef1597d527f119f14f093a7a3712221b9cb072c0 Mon Sep 17 00:00:00 2001
From: Horst Hummel <horst.hummel@de.ibm.com>
Date: Fri, 24 Mar 2006 03:15:23 -0800
Subject: [PATCH] s390: Remove old history/whitespave from partition code

Remove obsolete history and trailing whitespace.

Signed-off-by: Horst Hummel <horst.hummel@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/ibm.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 1e4a93835fed..830c55d86ab1 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -1,15 +1,9 @@
 /*
- * File...........: linux/fs/partitions/ibm.c      
+ * File...........: linux/fs/partitions/ibm.c
  * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
  *                  Volker Sameske <sameske@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
-
- * History of changes (starts July 2000)
- * 07/10/00 Fixed detection of CMS formatted disks     
- * 02/13/00 VTOC partition support added
- * 12/27/01 fixed PL030593 (CMS reserved minidisk not detected on 64 bit)
- * 07/24/03 no longer using contents of freed page for CMS label recognition (BZ3611)
  */
 
 #include <linux/config.h>
@@ -25,7 +19,7 @@
 #include "ibm.h"
 
 /*
- * compute the block number from a 
+ * compute the block number from a
  * cyl-cyl-head-head structure
  */
 static inline int
@@ -34,9 +28,8 @@ cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
 	       ptr->hh * geo->sectors;
 }
 
-
 /*
- * compute the block number from a 
+ * compute the block number from a
  * cyl-cyl-head-head-block structure
  */
 static inline int
@@ -48,7 +41,7 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
 
 /*
  */
-int 
+int
 ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
 	int blocksize, offset, size;
@@ -77,7 +70,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		goto out_nogeo;
 	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
 		goto out_nolab;
-	
+
 	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
 		goto out_noioctl;
@@ -154,13 +147,13 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 			/* OK, we got valid partition data */
 		        offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-			size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) - 
+			size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
 				offset + geo->sectors;
 			if (counter >= state->limit)
 				break;
-			put_partition(state, counter + 1, 
-					 offset * (blocksize >> 9),
-					 size * (blocksize >> 9));
+			put_partition(state, counter + 1,
+				      offset * (blocksize >> 9),
+				      size * (blocksize >> 9));
 			counter++;
 			blk++;
 		}
@@ -175,7 +168,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		offset = (info->label_block + 1);
 		size = i_size >> 9;
 		put_partition(state, 1, offset*(blocksize >> 9),
-				 size-offset*(blocksize >> 9));
+			      size-offset*(blocksize >> 9));
 	}
 
 	printk("\n");
@@ -183,7 +176,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	kfree(geo);
 	kfree(info);
 	return 1;
-	
+
 out_readerr:
 out_noioctl:
 	kfree(label);
-- 
cgit v1.2.3


From e8c96f8c29d89af0c13dc2819a9a00575846ca18 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@nuerscht.ch>
Date: Fri, 24 Mar 2006 03:15:34 -0800
Subject: [PATCH] fs: Use ARRAY_SIZE macro

Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0]) and remove a
duplicate of ARRAY_SIZE.  Some trailing whitespaces are also deleted.

Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Nathan Scott <nathans@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/cmservice.c           |  2 +-
 fs/compat_ioctl.c            |  5 ++---
 fs/jfs/jfs_debug.c           |  2 +-
 fs/lockd/mon.c               |  6 +++---
 fs/lockd/svc.c               |  2 +-
 fs/lockd/xdr.c               |  2 +-
 fs/nfs/inode.c               |  4 ++--
 fs/nfs/mount_clnt.c          |  2 +-
 fs/nfs/nfs2xdr.c             |  2 +-
 fs/nfs/nfs3xdr.c             |  2 +-
 fs/nfs/nfs4xdr.c             |  2 +-
 fs/nfsctl.c                  |  2 +-
 fs/nfsd/nfs4acl.c            |  4 ++--
 fs/nfsd/nfs4callback.c       |  4 ++--
 fs/nfsd/nfs4xdr.c            |  2 +-
 fs/nfsd/nfsctl.c             |  2 +-
 fs/nfsd/nfssvc.c             |  4 ++--
 fs/nls/nls_euc-jp.c          |  6 ++----
 fs/reiserfs/prints.c         |  9 +++------
 fs/sysv/super.c              |  4 ++--
 fs/udf/super.c               | 19 +++++++------------
 fs/xfs/linux-2.6/xfs_stats.c |  2 +-
 fs/xfs/xfs_vnodeops.c        |  2 +-
 23 files changed, 40 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 9eef6bf156ab..3d097fddcb7a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -94,7 +94,7 @@ static struct rxrpc_service AFSCM_service = {
 	.error_func	= afscm_error,
 	.aemap_func	= afscm_aemap,
 	.ops_begin	= &AFSCM_ops[0],
-	.ops_end	= &AFSCM_ops[sizeof(AFSCM_ops) / sizeof(AFSCM_ops[0])],
+	.ops_end	= &AFSCM_ops[ARRAY_SIZE(AFSCM_ops)],
 };
 
 static DECLARE_COMPLETION(kafscmd_alive);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7c031f00fd79..d2c38875ab29 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1522,8 +1522,7 @@ static struct {
 	{ ATM_QUERYLOOP32,   ATM_QUERYLOOP }
 };
 
-#define NR_ATM_IOCTL (sizeof(atm_ioctl_map)/sizeof(atm_ioctl_map[0]))
-
+#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
 
 static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
@@ -1824,7 +1823,7 @@ static struct {
 	{ FDWERRORGET32, FDWERRORGET }
 };
 
-#define NR_FD_IOCTL_TRANS (sizeof(fd_ioctl_trans_table)/sizeof(fd_ioctl_trans_table[0]))
+#define NR_FD_IOCTL_TRANS ARRAY_SIZE(fd_ioctl_trans_table)
 
 static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 4caea6b43b92..81f0e514c490 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -111,7 +111,7 @@ static struct {
 	{ "loglevel",	loglevel_read, loglevel_write }
 #endif
 };
-#define NPROCENT	(sizeof(Entries)/sizeof(Entries[0]))
+#define NPROCENT	ARRAY_SIZE(Entries)
 
 void jfs_proc_init(void)
 {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0edc03e67966..a89cb8aa2c88 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -224,8 +224,8 @@ static struct rpc_procinfo	nsm_procedures[] = {
 };
 
 static struct rpc_version	nsm_version1 = {
-		.number		= 1, 
-		.nrprocs	= sizeof(nsm_procedures)/sizeof(nsm_procedures[0]),
+		.number		= 1,
+		.nrprocs	= ARRAY_SIZE(nsm_procedures),
 		.procs		= nsm_procedures
 };
 
@@ -238,7 +238,7 @@ static struct rpc_stat		nsm_stats;
 static struct rpc_program	nsm_program = {
 		.name		= "statd",
 		.number		= SM_PROGRAM,
-		.nrvers		= sizeof(nsm_version)/sizeof(nsm_version[0]),
+		.nrvers		= ARRAY_SIZE(nsm_version),
 		.version	= nsm_version,
 		.stats		= &nsm_stats
 };
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 71a30b416d1a..5e85bde6c123 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -509,7 +509,7 @@ static struct svc_version *	nlmsvc_version[] = {
 
 static struct svc_stat		nlmsvc_stats;
 
-#define NLM_NRVERS	(sizeof(nlmsvc_version)/sizeof(nlmsvc_version[0]))
+#define NLM_NRVERS	ARRAY_SIZE(nlmsvc_version)
 static struct svc_program	nlmsvc_program = {
 	.pg_prog		= NLM_PROGRAM,		/* program number */
 	.pg_nvers		= NLM_NRVERS,		/* number of entries in nlmsvc_version */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 200fbda2c6d1..1d700a4dd0b5 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -599,7 +599,7 @@ static struct rpc_stat		nlm_stats;
 struct rpc_program		nlm_program = {
 		.name		= "lockd",
 		.number		= NLM_PROGRAM,
-		.nrvers		= sizeof(nlm_versions) / sizeof(nlm_versions[0]),
+		.nrvers		= ARRAY_SIZE(nlm_versions),
 		.version	= nlm_versions,
 		.stats		= &nlm_stats,
 };
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 37e55c328ebc..419d1d254f9f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -103,7 +103,7 @@ static struct rpc_version *	nfs_version[] = {
 static struct rpc_program	nfs_program = {
 	.name			= "nfs",
 	.number			= NFS_PROGRAM,
-	.nrvers			= sizeof(nfs_version) / sizeof(nfs_version[0]),
+	.nrvers			= ARRAY_SIZE(nfs_version),
 	.version		= nfs_version,
 	.stats			= &nfs_rpcstat,
 	.pipe_dir_name		= "/nfs",
@@ -118,7 +118,7 @@ static struct rpc_version *	nfsacl_version[] = {
 struct rpc_program		nfsacl_program = {
 	.name =			"nfsacl",
 	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+	.nrvers =		ARRAY_SIZE(nfsacl_version),
 	.version =		nfsacl_version,
 	.stats =		&nfsacl_rpcstat,
 };
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f678f8..0b9a78353d6e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -174,7 +174,7 @@ static struct rpc_stat		mnt_stats;
 static struct rpc_program	mnt_program = {
 	.name		= "mount",
 	.number		= NFS_MNT_PROGRAM,
-	.nrvers		= sizeof(mnt_version)/sizeof(mnt_version[0]),
+	.nrvers		= ARRAY_SIZE(mnt_version),
 	.version	= mnt_version,
 	.stats		= &mnt_stats,
 };
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560c89c9..6548a65de944 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -704,6 +704,6 @@ struct rpc_procinfo	nfs_procedures[] = {
 
 struct rpc_version		nfs_version2 = {
 	.number			= 2,
-	.nrprocs		= sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs_procedures),
 	.procs			= nfs_procedures
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b5012bce..5224a191efb6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1138,7 +1138,7 @@ struct rpc_procinfo	nfs3_procedures[] = {
 
 struct rpc_version		nfs_version3 = {
 	.number			= 3,
-	.nrprocs		= sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
 	.procs			= nfs3_procedures
 };
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef57785..0a1bd36a4837 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4384,7 +4384,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 
 struct rpc_version		nfs_version4 = {
 	.number			= 4,
-	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
 	.procs			= nfs4_procedures
 };
 
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 1c72c7f85ddc..a5a18d4aca40 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -101,7 +101,7 @@ asmlinkage sys_nfsservctl(int cmd, struct nfsctl_arg __user *arg, void __user *r
 	if (version != NFSCTL_VERSION)
 		return -EINVAL;
 
-	if (cmd < 0 || cmd >= sizeof(map)/sizeof(map[0]) || !map[cmd].name)
+	if (cmd < 0 || cmd >= ARRAY_SIZE(map) || !map[cmd].name)
 		return -EINVAL;
 
 	file = do_open(map[cmd].name, map[cmd].rsize ? O_RDWR : O_WRONLY);	
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 4a2105552ac4..7391f4aabedb 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -907,7 +907,7 @@ nfs4_acl_get_whotype(char *p, u32 len)
 {
 	int i;
 
-	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
 		if (s2t_map[i].stringlen == len &&
 				0 == memcmp(s2t_map[i].string, p, len))
 			return s2t_map[i].type;
@@ -920,7 +920,7 @@ nfs4_acl_write_who(int who, char *p)
 {
 	int i;
 
-	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
 		if (s2t_map[i].type == who) {
 			memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
 			return s2t_map[i].stringlen;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d828662d737d..8d3d23c8a4d2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -335,7 +335,7 @@ static struct rpc_procinfo     nfs4_cb_procedures[] = {
 
 static struct rpc_version       nfs_cb_version4 = {
         .number                 = 1,
-        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
+        .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
         .procs                  = nfs4_cb_procedures
 };
 
@@ -411,7 +411,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
-	program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
+	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
 	program->stats = stat;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 69d3501173a8..03857fd81126 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -992,7 +992,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (argp->opcnt > 100)
 		goto xdr_error;
 
-	if (argp->opcnt > sizeof(argp->iops)/sizeof(argp->iops[0])) {
+	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
 		argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
 		if (!argp->ops) {
 			argp->ops = argp->iops;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index a0871b3efeb7..c8960aff0968 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -105,7 +105,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
 	char *data;
 	ssize_t rv;
 
-	if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino])
+	if (ino >= ARRAY_SIZE(write_op) || !write_op[ino])
 		return -EINVAL;
 
 	data = simple_transaction_get(file, buf, size);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1d163b616915..3790727e5dfd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -72,7 +72,7 @@ static struct svc_version *	nfsd_acl_version[] = {
 };
 
 #define NFSD_ACL_MINVERS            2
-#define NFSD_ACL_NRVERS		(sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
+#define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
 static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
@@ -101,7 +101,7 @@ static struct svc_version *	nfsd_version[] = {
 };
 
 #define NFSD_MINVERS    	2
-#define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
+#define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
 static struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c
index 80f108ae6661..06640c3e4021 100644
--- a/fs/nls/nls_euc-jp.c
+++ b/fs/nls/nls_euc-jp.c
@@ -268,8 +268,6 @@ static unsigned char euc2sjisibm_g3upper_map[][2] = {
 	{0xFC, 0x4B},
 };
 
-#define MAP_ELEMENT_OF(map)	(sizeof(map) / sizeof(map[0]))
-
 static inline int sjisibm2euc(unsigned char *euc, const unsigned char sjis_hi,
 			      const unsigned char sjis_lo);
 static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char euc_hi,
@@ -310,7 +308,7 @@ static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char
 	unsigned short euc;
 
 	min_index = 0;
-	max_index = MAP_ELEMENT_OF(euc2sjisibm_jisx0212_map) - 1;
+	max_index = ARRAY_SIZE(euc2sjisibm_jisx0212_map) - 1;
 	euc = (euc_hi << 8) | euc_lo;
 
 	while (min_index <= max_index) {
@@ -339,7 +337,7 @@ static inline int euc2sjisibm_g3upper(unsigned char *sjis, const unsigned char e
 	else
 		index = ((euc_hi << 8) | euc_lo) - 0xF4A1 + 12;
 
-	if ((index < 0) || (index >= MAP_ELEMENT_OF(euc2sjisibm_g3upper_map)))
+	if ((index < 0) || (index >= ARRAY_SIZE(euc2sjisibm_g3upper_map)))
 		return 0;
 
 	sjis[0] = euc2sjisibm_g3upper_map[index][0];
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index d55e164bd5c2..78b40621b88b 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -601,8 +601,7 @@ void store_print_tb(struct tree_balance *tb)
 		tb->tb_mode, PATH_LAST_POSITION(tb->tb_path),
 		tb->tb_path->pos_in_item);
 
-	for (h = 0; h < sizeof(tb->insert_size) / sizeof(tb->insert_size[0]);
-	     h++) {
+	for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) {
 		if (PATH_H_PATH_OFFSET(tb->tb_path, h) <=
 		    tb->tb_path->path_length
 		    && PATH_H_PATH_OFFSET(tb->tb_path,
@@ -658,15 +657,13 @@ void store_print_tb(struct tree_balance *tb)
 
 	/* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
 	h = 0;
-	for (i = 0; i < sizeof(tb->FEB) / sizeof(tb->FEB[0]); i++)
+	for (i = 0; i < ARRAY_SIZE(tb->FEB); i++)
 		sprintf(print_tb_buf + strlen(print_tb_buf),
 			"%p (%llu %d)%s", tb->FEB[i],
 			tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
 			b_blocknr : 0ULL,
 			tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
-			(i ==
-			 sizeof(tb->FEB) / sizeof(tb->FEB[0]) -
-			 1) ? "\n" : ", ");
+			(i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", ");
 
 	sprintf(print_tb_buf + strlen(print_tb_buf),
 		"======================== the end ====================================\n");
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 59e76b51142b..e92b991e6dda 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -377,10 +377,10 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sb = sb;
 	sbi->s_block_base = 0;
 	sb->s_fs_info = sbi;
-	
+
 	sb_set_blocksize(sb, BLOCK_SIZE);
 
-	for (i = 0; i < sizeof(flavours)/sizeof(flavours[0]) && !size; i++) {
+	for (i = 0; i < ARRAY_SIZE(flavours) && !size; i++) {
 		brelse(bh);
 		bh = sb_bread(sb, flavours[i].block);
 		if (!bh)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9303c50c5d55..0d555616c939 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -660,8 +660,7 @@ udf_find_anchor(struct super_block *sb)
 		 *     lastblock
 		 *  however, if the disc isn't closed, it could be 512 */
 
-		for (i=0; (!lastblock && i<sizeof(last)/sizeof(int)); i++)
-		{
+		for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) {
 			if (last[i] < 0 || !(bh = sb_bread(sb, last[i])))
 			{
 				ident = location = 0;
@@ -672,7 +671,7 @@ udf_find_anchor(struct super_block *sb)
 				location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
 				udf_release_data(bh);
 			}
-	
+
 			if (ident == TAG_IDENT_AVDP)
 			{
 				if (location == last[i] - UDF_SB_SESSION(sb))
@@ -753,8 +752,7 @@ udf_find_anchor(struct super_block *sb)
 		}
 	}
 
-	for (i=0; i<sizeof(UDF_SB_ANCHOR(sb))/sizeof(int); i++)
-	{
+	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
 		if (UDF_SB_ANCHOR(sb)[i])
 		{
 			if (!(bh = udf_read_tagged(sb,
@@ -1313,8 +1311,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 	if (!sb)
 		return 1;
 
-	for (i=0; i<sizeof(UDF_SB_ANCHOR(sb))/sizeof(int); i++)
-	{
+	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
 		if (UDF_SB_ANCHOR(sb)[i] && (bh = udf_read_tagged(sb,
 			UDF_SB_ANCHOR(sb)[i], UDF_SB_ANCHOR(sb)[i], &ident)))
 		{
@@ -1325,7 +1322,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 			main_e = le32_to_cpu( anchor->mainVolDescSeqExt.extLength );
 			main_e = main_e >> sb->s_blocksize_bits;
 			main_e += main_s;
-	
+
 			/* Locate the reserve sequence */
 			reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation);
 			reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength);
@@ -1344,12 +1341,10 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 		}
 	}
 
-	if (i == sizeof(UDF_SB_ANCHOR(sb))/sizeof(int))
-	{
+	if (i == ARRAY_SIZE(UDF_SB_ANCHOR(sb))) {
 		udf_debug("No Anchor block found\n");
 		return 1;
-	}
-	else
+	} else
 		udf_debug("Using anchor in block %d\n", UDF_SB_ANCHOR(sb)[i]);
 
 	for (i=0; i<UDF_SB_NUMPARTS(sb); i++)
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 713e6a7505d0..1f0589a05eca 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -56,7 +56,7 @@ xfs_read_xfsstats(
 	};
 
 	/* Loop over all stats groups */
-	for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+	for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
 		len += sprintf(buffer + len, xstats[i].desc);
 		/* inner loop does each group */
 		while (j < xstats[i].endpoint) {
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index a478f42e63ff..0f0a64e81db9 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1335,7 +1335,7 @@ xfs_inactive_symlink_rmt(
 	 */
 	done = 0;
 	XFS_BMAP_INIT(&free_list, &first_block);
-	nmaps = sizeof(mval) / sizeof(mval[0]);
+	nmaps = ARRAY_SIZE(mval);
 	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
 			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
 			&free_list)))
-- 
cgit v1.2.3


From 29c6e48601065908e7933c0d8440f383fffe155b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:52 -0800
Subject: [PATCH] fs/9p/: possible cleanups

- mux.c: v9fs_poll_mux() was inline but not static resuling in needless
         object size bloat
- mux.c: remove all "inline"s: gcc should know best what to inline
- #if 0 the following unused global functions:
  - 9p.c: v9fs_v9fs_t_flush()
  - conv.c: v9fs_create_tauth()
  - mux.c: v9fs_mux_rpcnb()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c   |  2 ++
 fs/9p/9p.h   |  2 --
 fs/9p/conv.c |  2 ++
 fs/9p/conv.h |  1 -
 fs/9p/mux.c  | 11 ++++++-----
 fs/9p/mux.h  |  2 --
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index f86a28d1d6a6..c148e6ba07e5 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -148,6 +148,7 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 	return ret;
 }
 
+#if 0
 /**
  * v9fs_v9fs_t_flush - flush a pending transaction
  * @v9ses: 9P2000 session information
@@ -171,6 +172,7 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 
 	return ret;
 }
+#endif  /*  0  */
 
 /**
  * v9fs_t_stat - read a file's meta-data
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 0cd374d94717..95d72aec1c1a 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -348,8 +348,6 @@ int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
 
 int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
 
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
-
 int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
 		struct v9fs_fcall **rcall);
 
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index bf1f10067960..bba817142465 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -536,6 +536,7 @@ struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
 	return fc;
 }
 
+#if 0
 struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
 {
 	int size;
@@ -559,6 +560,7 @@ struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
       error:
 	return fc;
 }
+#endif  /*  0  */
 
 struct v9fs_fcall *
 v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index 26a736e4a2e7..f5896628dae4 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -33,7 +33,6 @@ int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
 void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
 
 struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
-struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
 struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
 	char *aname);
 struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8e8356c1c229..e2ae60adda99 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -143,7 +143,7 @@ void v9fs_mux_global_exit(void)
  *
  * The current implementation returns sqrt of the number of mounts.
  */
-inline int v9fs_mux_calc_poll_procs(int muxnum)
+static int v9fs_mux_calc_poll_procs(int muxnum)
 {
 	int n;
 
@@ -384,7 +384,7 @@ v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 /**
  * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
  */
-static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
+static void v9fs_poll_mux(struct v9fs_mux_data *m)
 {
 	int n;
 
@@ -762,9 +762,8 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 	return req;
 }
 
-static inline void
-v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
-		  int err)
+static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc,
+			      struct v9fs_fcall *rc, int err)
 {
 	v9fs_mux_req_callback cb;
 	int tag;
@@ -902,6 +901,7 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	return err;
 }
 
+#if 0
 /**
  * v9fs_mux_rpcnb - sends 9P request without waiting for response.
  * @m: mux data
@@ -925,6 +925,7 @@ int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
 	return 0;
 }
+#endif  /*  0  */
 
 /**
  * v9fs_mux_cancel - cancel all pending requests with error
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 9473b84f24b2..17144fdfa11b 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -50,8 +50,6 @@ void v9fs_mux_destroy(struct v9fs_mux_data *);
 int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
 struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
 int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
-int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-	v9fs_mux_req_callback cb, void *a);
 
 void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
 void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
-- 
cgit v1.2.3


From 2c2212901f8b3fc84f36cb98150cfc2f6b4752f8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:53 -0800
Subject: [PATCH] fs/ext2/: proper ext2_get_parent() prototype

Add a proper prototype for ext2_get_parent().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/ext2.h  | 3 +++
 fs/ext2/super.c | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 00de0a7312a2..11035ac7986f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -138,6 +138,9 @@ extern void ext2_set_inode_flags(struct inode *inode);
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
 		       unsigned long);
 
+/* namei.c */
+struct dentry *ext2_get_parent(struct dentry *child);
+
 /* super.c */
 extern void ext2_error (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index cb6f9bd658de..7f3899bdebab 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -258,7 +258,6 @@ static struct super_operations ext2_sops = {
  * systems, but can be improved upon.
  * Currently only get_parent is required.
  */
-struct dentry *ext2_get_parent(struct dentry *child);
 static struct export_operations ext2_export_ops = {
 	.get_parent = ext2_get_parent,
 };
-- 
cgit v1.2.3


From c98d8cfbc600af88e9e6cffc84dd342280445760 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:53 -0800
Subject: [PATCH] fs/coda/: proper prototypes

Introduce a file fs/coda/coda_int.h with proper prototypes for some code.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/coda/coda_int.h | 13 +++++++++++++
 fs/coda/dir.c      |  3 ++-
 fs/coda/file.c     |  2 ++
 fs/coda/inode.c    |  2 ++
 fs/coda/psdev.c    |  9 ++-------
 5 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 fs/coda/coda_int.h

(limited to 'fs')

diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
new file mode 100644
index 000000000000..9e6338fea514
--- /dev/null
+++ b/fs/coda/coda_int.h
@@ -0,0 +1,13 @@
+#ifndef _CODA_INT_
+#define _CODA_INT_
+
+extern struct file_system_type coda_fs_type;
+
+void coda_destroy_inodecache(void);
+int coda_init_inodecache(void);
+int coda_fsync(struct file *coda_file, struct dentry *coda_dentry,
+	       int datasync);
+
+#endif  /*  _CODA_INT_  */
+
+
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 8f1a517f8b4e..54f76de8a686 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -27,6 +27,8 @@
 #include <linux/coda_cache.h>
 #include <linux/coda_proc.h>
 
+#include "coda_int.h"
+
 /* dir inode-ops */
 static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd);
 static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
@@ -50,7 +52,6 @@ static int coda_dentry_delete(struct dentry *);
 /* support routines */
 static int coda_venus_readdir(struct file *filp, filldir_t filldir,
 			      void *dirent, struct dentry *dir);
-int coda_fsync(struct file *, struct dentry *dentry, int datasync);
 
 /* same as fs/bad_inode.c */
 static int coda_return_EIO(void)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 30b4630bd735..146a991d6eb5 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -24,6 +24,8 @@
 #include <linux/coda_psdev.h>
 #include <linux/coda_proc.h>
 
+#include "coda_int.h"
+
 /* if CODA_STORE fails with EOPNOTSUPP, venus clearly doesn't support
  * CODA_STORE/CODA_RELEASE and we fall back on using the CODA_CLOSE upcall */
 static int use_coda_close;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 04a73fb4848f..7d7d52f74b28 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -31,6 +31,8 @@
 #include <linux/coda_fs_i.h>
 #include <linux/coda_cache.h>
 
+#include "coda_int.h"
+
 /* VFS super_block ops */
 static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6a3df88accfe..98c74fe2e139 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -48,12 +48,9 @@
 #include <linux/coda_psdev.h>
 #include <linux/coda_proc.h>
 
-#define upc_free(r) kfree(r)
+#include "coda_int.h"
 
-/* 
- * Coda stuff
- */
-extern struct file_system_type coda_fs_type;
+#define upc_free(r) kfree(r)
 
 /* statistics */
 int           coda_hard;         /* allows signals during upcalls */
@@ -394,8 +391,6 @@ out:
 MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>");
 MODULE_LICENSE("GPL");
 
-extern int coda_init_inodecache(void);
-extern void coda_destroy_inodecache(void);
 static int __init init_coda(void)
 {
 	int status;
-- 
cgit v1.2.3


From 4b6a9316fab51af611dc8671f296734089f6a22a Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:05 -0800
Subject: [PATCH] cpuset memory spread: slab cache filesystems

Mark file system inode and similar slab caches subject to SLAB_MEM_SPREAD
memory spreading.

If a slab cache is marked SLAB_MEM_SPREAD, then anytime that a task that's
in a cpuset with the 'memory_spread_slab' option enabled goes to allocate
from such a slab cache, the allocations are spread evenly over all the
memory nodes (task->mems_allowed) allowed to that task, instead of favoring
allocation on the node local to the current cpu.

The following inode and similar caches are marked SLAB_MEM_SPREAD:

    file                               cache
    ====                               =====
    fs/adfs/super.c                    adfs_inode_cache
    fs/affs/super.c                    affs_inode_cache
    fs/befs/linuxvfs.c                 befs_inode_cache
    fs/bfs/inode.c                     bfs_inode_cache
    fs/block_dev.c                     bdev_cache
    fs/cifs/cifsfs.c                   cifs_inode_cache
    fs/coda/inode.c                    coda_inode_cache
    fs/dquot.c                         dquot
    fs/efs/super.c                     efs_inode_cache
    fs/ext2/super.c                    ext2_inode_cache
    fs/ext2/xattr.c (fs/mbcache.c)     ext2_xattr
    fs/ext3/super.c                    ext3_inode_cache
    fs/ext3/xattr.c (fs/mbcache.c)     ext3_xattr
    fs/fat/cache.c                     fat_cache
    fs/fat/inode.c                     fat_inode_cache
    fs/freevxfs/vxfs_super.c           vxfs_inode
    fs/hpfs/super.c                    hpfs_inode_cache
    fs/isofs/inode.c                   isofs_inode_cache
    fs/jffs/inode-v23.c                jffs_fm
    fs/jffs2/super.c                   jffs2_i
    fs/jfs/super.c                     jfs_ip
    fs/minix/inode.c                   minix_inode_cache
    fs/ncpfs/inode.c                   ncp_inode_cache
    fs/nfs/direct.c                    nfs_direct_cache
    fs/nfs/inode.c                     nfs_inode_cache
    fs/ntfs/super.c                    ntfs_big_inode_cache_name
    fs/ntfs/super.c                    ntfs_inode_cache
    fs/ocfs2/dlm/dlmfs.c               dlmfs_inode_cache
    fs/ocfs2/super.c                   ocfs2_inode_cache
    fs/proc/inode.c                    proc_inode_cache
    fs/qnx4/inode.c                    qnx4_inode_cache
    fs/reiserfs/super.c                reiser_inode_cache
    fs/romfs/inode.c                   romfs_inode_cache
    fs/smbfs/inode.c                   smb_inode_cache
    fs/sysv/inode.c                    sysv_inode_cache
    fs/udf/super.c                     udf_inode_cache
    fs/ufs/super.c                     ufs_inode_cache
    net/socket.c                       sock_inode_cache
    net/sunrpc/rpc_pipe.c              rpc_inode_cache

The choice of which slab caches to so mark was quite simple.  I marked
those already marked SLAB_RECLAIM_ACCOUNT, except for fs/xfs, dentry_cache,
inode_cache, and buffer_head, which were marked in a previous patch.  Even
though SLAB_RECLAIM_ACCOUNT is for a different purpose, it marks the same
potentially large file system i/o related slab caches as we need for memory
spreading.

Given that the rule now becomes "wherever you would have used a
SLAB_RECLAIM_ACCOUNT slab cache flag before (usually the inode cache), use
the SLAB_MEM_SPREAD flag too", this should be easy enough to maintain.
Future file system writers will just copy one of the existing file system
slab cache setups and tend to get it right without thinking.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c          | 2 +-
 fs/affs/super.c          | 2 +-
 fs/befs/linuxvfs.c       | 2 +-
 fs/bfs/inode.c           | 2 +-
 fs/block_dev.c           | 2 +-
 fs/cifs/cifsfs.c         | 2 +-
 fs/coda/inode.c          | 2 +-
 fs/dquot.c               | 2 +-
 fs/efs/super.c           | 2 +-
 fs/ext2/super.c          | 2 +-
 fs/ext3/super.c          | 2 +-
 fs/fat/cache.c           | 2 +-
 fs/fat/inode.c           | 2 +-
 fs/freevxfs/vxfs_super.c | 2 +-
 fs/hpfs/super.c          | 2 +-
 fs/isofs/inode.c         | 2 +-
 fs/jffs/inode-v23.c      | 4 ++--
 fs/jffs2/super.c         | 2 +-
 fs/jfs/super.c           | 2 +-
 fs/mbcache.c             | 2 +-
 fs/minix/inode.c         | 2 +-
 fs/ncpfs/inode.c         | 2 +-
 fs/nfs/direct.c          | 2 +-
 fs/nfs/inode.c           | 2 +-
 fs/ntfs/super.c          | 4 ++--
 fs/ocfs2/dlm/dlmfs.c     | 2 +-
 fs/ocfs2/super.c         | 2 +-
 fs/proc/inode.c          | 2 +-
 fs/qnx4/inode.c          | 2 +-
 fs/reiserfs/super.c      | 2 +-
 fs/romfs/inode.c         | 2 +-
 fs/smbfs/inode.c         | 2 +-
 fs/sysv/inode.c          | 2 +-
 fs/udf/super.c           | 2 +-
 fs/ufs/super.c           | 2 +-
 net/socket.c             | 2 +-
 net/sunrpc/rpc_pipe.c    | 2 +-
 37 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 243963228d10..80f798b7d760 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -241,7 +241,7 @@ static int init_inodecache(void)
 {
 	adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
 					     sizeof(struct adfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (adfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index aaec015a16e4..216536d77a5f 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,7 @@ static int init_inodecache(void)
 {
 	affs_inode_cachep = kmem_cache_create("affs_inode_cache",
 					     sizeof(struct affs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (affs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dd6048ce0532..ac031686d6d7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -427,7 +427,7 @@ befs_init_inodecache(void)
 {
 	befs_inode_cachep = kmem_cache_create("befs_inode_cache",
 					      sizeof (struct befs_inode_info),
-					      0, SLAB_RECLAIM_ACCOUNT,
+					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					      init_once, NULL);
 	if (befs_inode_cachep == NULL) {
 		printk(KERN_ERR "befs_init_inodecache: "
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 3af6c73c5b5a..584a0838ac61 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -257,7 +257,7 @@ static int init_inodecache(void)
 {
 	bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
 					     sizeof(struct bfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (bfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 44d05e6e34db..80f97729e57f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -319,7 +319,7 @@ void __init bdev_cache_init(void)
 {
 	int err;
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
-			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
 			init_once, NULL);
 	err = register_filesystem(&bd_type);
 	if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1cd044ce82a6..ba5a24b99a11 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,7 +695,7 @@ cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
 					      sizeof (struct cifsInodeInfo),
-					      0, SLAB_RECLAIM_ACCOUNT,
+					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					      cifs_init_once, NULL);
 	if (cifs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7d7d52f74b28..ada1a81df6bd 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -71,7 +71,7 @@ int coda_init_inodecache(void)
 {
 	coda_inode_cachep = kmem_cache_create("coda_inode_cache",
 				sizeof(struct coda_inode_info),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (coda_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/dquot.c b/fs/dquot.c
index acf07e581f8c..1405755b8e4d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1821,7 +1821,7 @@ static int __init dquot_init(void)
 
 	dquot_cachep = kmem_cache_create("dquot", 
 			sizeof(struct dquot), sizeof(unsigned long) * 4,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
 			NULL, NULL);
 
 	order = 0;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index afc4891feb36..dff623e3ddbf 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -81,7 +81,7 @@ static int init_inodecache(void)
 {
 	efs_inode_cachep = kmem_cache_create("efs_inode_cache",
 				sizeof(struct efs_inode_info),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (efs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7f3899bdebab..e153f0cc240b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -175,7 +175,7 @@ static int init_inodecache(void)
 {
 	ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
 					     sizeof(struct ext2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ext2_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index efe5b20d7a5a..e4a0a7cbb5b2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -481,7 +481,7 @@ static int init_inodecache(void)
 {
 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 					     sizeof(struct ext3_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ext3_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 1acc941245fb..97b967b84fc6 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -49,7 +49,7 @@ int __init fat_cache_init(void)
 {
 	fat_cache_cachep = kmem_cache_create("fat_cache",
 				sizeof(struct fat_cache),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (fat_cache_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e78d7b4842cc..a75708901e73 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,7 @@ static int __init fat_init_inodecache(void)
 {
 	fat_inode_cachep = kmem_cache_create("fat_inode_cache",
 					     sizeof(struct msdos_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (fat_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 6aa6fbe4f8ee..b44c916d24a1 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -260,7 +260,7 @@ vxfs_init(void)
 {
 	vxfs_inode_cachep = kmem_cache_create("vxfs_inode",
 			sizeof(struct vxfs_inode_info), 0, 
-			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (vxfs_inode_cachep)
 		return register_filesystem(&vxfs_fs_type);
 	return -ENOMEM;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 9488a794076e..25fbefe4ed00 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -191,7 +191,7 @@ static int init_inodecache(void)
 {
 	hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
 					     sizeof(struct hpfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (hpfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 298f08be22d4..fcb68151ad86 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -87,7 +87,7 @@ static int init_inodecache(void)
 {
 	isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
 					     sizeof(struct iso_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (isofs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 890d7ff7456d..ffa49861d985 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1812,14 +1812,14 @@ init_jffs_fs(void)
 	}
 #endif
 	fm_cache = kmem_cache_create("jffs_fm", sizeof(struct jffs_fm),
-				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, 
+				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				     NULL, NULL);
 	if (!fm_cache) {
 		return -ENOMEM;
 	}
 
 	node_cache = kmem_cache_create("jffs_node",sizeof(struct jffs_node),
-				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, 
+				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				       NULL, NULL);
 	if (!node_cache) {
 		kmem_cache_destroy(fm_cache);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c8fac352a4cf..f2563389581c 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,7 +331,7 @@ static int __init init_jffs2_fs(void)
 
 	jffs2_inode_cachep = kmem_cache_create("jffs2_i",
 					     sizeof(struct jffs2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     jffs2_i_init_once, NULL);
 	if (!jffs2_inode_cachep) {
 		printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n");
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 18f69e6aa719..4ac40bfdbd84 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -664,7 +664,7 @@ static int __init init_jfs_fs(void)
 
 	jfs_inode_cachep =
 	    kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 
-			    SLAB_RECLAIM_ACCOUNT, init_once, NULL);
+			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, init_once, NULL);
 	if (jfs_inode_cachep == NULL)
 		return -ENOMEM;
 
diff --git a/fs/mbcache.c b/fs/mbcache.c
index f5bbe4c97c58..73e754fea2d8 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -288,7 +288,7 @@ mb_cache_create(const char *name, struct mb_cache_op *cache_op,
 			INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
 	}
 	cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
-		SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+		SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (!cache->c_entry_cache)
 		goto fail;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 790cc0d0e970..4fabef0b6519 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -80,7 +80,7 @@ static int init_inodecache(void)
 {
 	minix_inode_cachep = kmem_cache_create("minix_inode_cache",
 					     sizeof(struct minix_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (minix_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0b521d3d97ce..2547ebaa6547 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -72,7 +72,7 @@ static int init_inodecache(void)
 {
 	ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
 					     sizeof(struct ncp_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ncp_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4e9b3a1b36c5..751f5b5e7e07 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -781,7 +781,7 @@ int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
 						sizeof(struct nfs_direct_req),
-						0, SLAB_RECLAIM_ACCOUNT,
+						0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 						NULL, NULL);
 	if (nfs_direct_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 419d1d254f9f..834c1e905ce1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2163,7 +2163,7 @@ static int nfs_init_inodecache(void)
 {
 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
 					     sizeof(struct nfs_inode),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (nfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 7646b5059389..27833f6df49f 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3163,7 +3163,7 @@ static int __init init_ntfs_fs(void)
 
 	ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
 			sizeof(ntfs_inode), 0,
-			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (!ntfs_inode_cache) {
 		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
 				ntfs_inode_cache_name);
@@ -3172,7 +3172,7 @@ static int __init init_ntfs_fs(void)
 
 	ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
 			sizeof(big_ntfs_inode), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 			ntfs_big_inode_init_once, NULL);
 	if (!ntfs_big_inode_cache) {
 		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index dd2d24dc25e0..ca8587cc58bc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -596,7 +596,7 @@ static int __init init_dlmfs_fs(void)
 
 	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
 				sizeof(struct dlmfs_inode_private),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				dlmfs_init_once, NULL);
 	if (!dlmfs_inode_cache)
 		return -ENOMEM;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 09e1c57a86a0..3fe7896c666a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -951,7 +951,7 @@ static int ocfs2_initialize_mem_caches(void)
 {
 	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
 					       sizeof(struct ocfs2_inode_info),
-					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					       ocfs2_inode_init_once, NULL);
 	if (!ocfs2_inode_cachep)
 		return -ENOMEM;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 075d3e945602..5ac781801ae7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -121,7 +121,7 @@ int __init proc_init_inodecache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (proc_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 80f32911c0cb..463142c80ae8 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -546,7 +546,7 @@ static int init_inodecache(void)
 {
 	qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
 					     sizeof(struct qnx4_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (qnx4_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d63da756eb49..bf4346057879 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,7 @@ static int init_inodecache(void)
 	reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
 						  sizeof(struct
 							 reiserfs_inode_info),
-						  0, SLAB_RECLAIM_ACCOUNT,
+						  0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 						  init_once, NULL);
 	if (reiserfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 0a13859fd57b..223bebb8b45e 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -579,7 +579,7 @@ static int init_inodecache(void)
 {
 	romfs_inode_cachep = kmem_cache_create("romfs_inode_cache",
 					     sizeof(struct romfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (romfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 02e3e82d465c..9b14542cc186 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -80,7 +80,7 @@ static int init_inodecache(void)
 {
 	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
 					     sizeof(struct smb_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (smb_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index fa33eceb0011..3ff89cc5833a 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -342,7 +342,7 @@ int __init sysv_init_icache(void)
 {
 	sysv_inode_cachep = kmem_cache_create("sysv_inode_cache",
 			sizeof(struct sysv_inode_info), 0,
-			SLAB_RECLAIM_ACCOUNT,
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 			init_once, NULL);
 	if (!sysv_inode_cachep)
 		return -ENOMEM;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0d555616c939..e120f33f8473 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -140,7 +140,7 @@ static int init_inodecache(void)
 {
 	udf_inode_cachep = kmem_cache_create("udf_inode_cache",
 					     sizeof(struct udf_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (udf_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e9055ef7f5ac..684018d3c58e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1184,7 +1184,7 @@ static int init_inodecache(void)
 {
 	ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
 					     sizeof(struct ufs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ufs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/net/socket.c b/net/socket.c
index e2d5bae994de..7a5a56874efa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -319,7 +319,7 @@ static int init_inodecache(void)
 {
 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 				sizeof(struct socket_alloc),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (sock_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a5c0c7b6e151..befc0c5ca9e5 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -850,7 +850,7 @@ int register_rpc_pipefs(void)
 {
 	rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
                                              sizeof(struct rpc_inode),
-                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
                                              init_once, NULL);
 	if (!rpc_inode_cachep)
 		return -ENOMEM;
-- 
cgit v1.2.3


From fffb60f93ce5880aade88e01d7133b52a4879710 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:06 -0800
Subject: [PATCH] cpuset memory spread: slab cache format

Rewrap the overly long source code lines resulting from the previous
patch's addition of the slab cache flag SLAB_MEM_SPREAD.  This patch
contains only formatting changes, and no function change.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c       |  3 ++-
 fs/affs/super.c       |  3 ++-
 fs/befs/linuxvfs.c    |  3 ++-
 fs/bfs/inode.c        |  3 ++-
 fs/block_dev.c        |  3 ++-
 fs/cifs/cifsfs.c      |  3 ++-
 fs/dquot.c            |  3 ++-
 fs/ext2/super.c       |  3 ++-
 fs/ext3/super.c       |  3 ++-
 fs/fat/inode.c        |  3 ++-
 fs/hpfs/super.c       |  3 ++-
 fs/isofs/inode.c      |  3 ++-
 fs/jffs/inode-v23.c   | 10 ++++++----
 fs/jffs2/super.c      |  3 ++-
 fs/jfs/super.c        |  3 ++-
 fs/minix/inode.c      |  3 ++-
 fs/ncpfs/inode.c      |  3 ++-
 fs/nfs/direct.c       |  3 ++-
 fs/nfs/inode.c        |  3 ++-
 fs/ocfs2/dlm/dlmfs.c  |  3 ++-
 fs/ocfs2/super.c      |  8 +++++---
 fs/proc/inode.c       |  3 ++-
 fs/qnx4/inode.c       |  3 ++-
 fs/reiserfs/super.c   |  3 ++-
 fs/romfs/inode.c      |  3 ++-
 fs/smbfs/inode.c      |  3 ++-
 fs/udf/super.c        |  3 ++-
 fs/ufs/super.c        |  3 ++-
 net/socket.c          |  3 ++-
 net/sunrpc/rpc_pipe.c |  7 ++++---
 30 files changed, 69 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 80f798b7d760..252abda0d200 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -241,7 +241,8 @@ static int init_inodecache(void)
 {
 	adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
 					     sizeof(struct adfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (adfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 216536d77a5f..4d7e5b19e5cd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,8 @@ static int init_inodecache(void)
 {
 	affs_inode_cachep = kmem_cache_create("affs_inode_cache",
 					     sizeof(struct affs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (affs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index ac031686d6d7..044a59587829 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -427,7 +427,8 @@ befs_init_inodecache(void)
 {
 	befs_inode_cachep = kmem_cache_create("befs_inode_cache",
 					      sizeof (struct befs_inode_info),
-					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					      0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					      init_once, NULL);
 	if (befs_inode_cachep == NULL) {
 		printk(KERN_ERR "befs_init_inodecache: "
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 584a0838ac61..55a7a78332f8 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -257,7 +257,8 @@ static int init_inodecache(void)
 {
 	bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
 					     sizeof(struct bfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (bfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 80f97729e57f..2d096057ab53 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -319,7 +319,8 @@ void __init bdev_cache_init(void)
 {
 	int err;
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
-			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
+			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+				SLAB_MEM_SPREAD|SLAB_PANIC),
 			init_once, NULL);
 	err = register_filesystem(&bd_type);
 	if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ba5a24b99a11..221b3334b737 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,7 +695,8 @@ cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
 					      sizeof (struct cifsInodeInfo),
-					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					      0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					      cifs_init_once, NULL);
 	if (cifs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/dquot.c b/fs/dquot.c
index 1405755b8e4d..6b3886920939 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1821,7 +1821,8 @@ static int __init dquot_init(void)
 
 	dquot_cachep = kmem_cache_create("dquot", 
 			sizeof(struct dquot), sizeof(unsigned long) * 4,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
+			(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+				SLAB_MEM_SPREAD|SLAB_PANIC),
 			NULL, NULL);
 
 	order = 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e153f0cc240b..268b73f5847c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -175,7 +175,8 @@ static int init_inodecache(void)
 {
 	ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
 					     sizeof(struct ext2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ext2_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e4a0a7cbb5b2..a3e2a8e7dca2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -481,7 +481,8 @@ static int init_inodecache(void)
 {
 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 					     sizeof(struct ext3_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ext3_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a75708901e73..297300fe81c2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,8 @@ static int __init fat_init_inodecache(void)
 {
 	fat_inode_cachep = kmem_cache_create("fat_inode_cache",
 					     sizeof(struct msdos_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (fat_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 25fbefe4ed00..d72d8c87c996 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -191,7 +191,8 @@ static int init_inodecache(void)
 {
 	hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
 					     sizeof(struct hpfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (hpfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index fcb68151ad86..70adbb98bad1 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -87,7 +87,8 @@ static int init_inodecache(void)
 {
 	isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
 					     sizeof(struct iso_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (isofs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index ffa49861d985..5a4519e834da 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1812,15 +1812,17 @@ init_jffs_fs(void)
 	}
 #endif
 	fm_cache = kmem_cache_create("jffs_fm", sizeof(struct jffs_fm),
-				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-				     NULL, NULL);
+		       0,
+		       SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+		       NULL, NULL);
 	if (!fm_cache) {
 		return -ENOMEM;
 	}
 
 	node_cache = kmem_cache_create("jffs_node",sizeof(struct jffs_node),
-				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-				       NULL, NULL);
+		       0,
+		       SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+		       NULL, NULL);
 	if (!node_cache) {
 		kmem_cache_destroy(fm_cache);
 		return -ENOMEM;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f2563389581c..ffd8e84b22cc 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,7 +331,8 @@ static int __init init_jffs2_fs(void)
 
 	jffs2_inode_cachep = kmem_cache_create("jffs2_i",
 					     sizeof(struct jffs2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     jffs2_i_init_once, NULL);
 	if (!jffs2_inode_cachep) {
 		printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n");
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4ac40bfdbd84..db6f41d6dd60 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -664,7 +664,8 @@ static int __init init_jfs_fs(void)
 
 	jfs_inode_cachep =
 	    kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 
-			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, init_once, NULL);
+			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+			    init_once, NULL);
 	if (jfs_inode_cachep == NULL)
 		return -ENOMEM;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4fabef0b6519..d9ffc43fee59 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -80,7 +80,8 @@ static int init_inodecache(void)
 {
 	minix_inode_cachep = kmem_cache_create("minix_inode_cache",
 					     sizeof(struct minix_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (minix_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2547ebaa6547..a1f3e972c6ef 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -72,7 +72,8 @@ static int init_inodecache(void)
 {
 	ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
 					     sizeof(struct ncp_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ncp_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 751f5b5e7e07..4ae2f3b33fef 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -781,7 +781,8 @@ int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
 						sizeof(struct nfs_direct_req),
-						0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+						0, (SLAB_RECLAIM_ACCOUNT|
+							SLAB_MEM_SPREAD),
 						NULL, NULL);
 	if (nfs_direct_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 834c1e905ce1..3413996f9a86 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2163,7 +2163,8 @@ static int nfs_init_inodecache(void)
 {
 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
 					     sizeof(struct nfs_inode),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (nfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index ca8587cc58bc..7e88e24b3471 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -596,7 +596,8 @@ static int __init init_dlmfs_fs(void)
 
 	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
 				sizeof(struct dlmfs_inode_private),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+					SLAB_MEM_SPREAD),
 				dlmfs_init_once, NULL);
 	if (!dlmfs_inode_cache)
 		return -ENOMEM;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3fe7896c666a..44d8b5248236 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -950,9 +950,11 @@ static void ocfs2_inode_init_once(void *data,
 static int ocfs2_initialize_mem_caches(void)
 {
 	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
-					       sizeof(struct ocfs2_inode_info),
-					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-					       ocfs2_inode_init_once, NULL);
+				       sizeof(struct ocfs2_inode_info),
+				       0,
+				       (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+				       ocfs2_inode_init_once, NULL);
 	if (!ocfs2_inode_cachep)
 		return -ENOMEM;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 5ac781801ae7..722b9c463111 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -121,7 +121,8 @@ int __init proc_init_inodecache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (proc_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 463142c80ae8..2ecd46f85e9f 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -546,7 +546,8 @@ static int init_inodecache(void)
 {
 	qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
 					     sizeof(struct qnx4_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (qnx4_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index bf4346057879..93e6ef9360e3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,8 @@ static int init_inodecache(void)
 	reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
 						  sizeof(struct
 							 reiserfs_inode_info),
-						  0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+						  0, (SLAB_RECLAIM_ACCOUNT|
+							SLAB_MEM_SPREAD),
 						  init_once, NULL);
 	if (reiserfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 223bebb8b45e..c2fc424d7d5c 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -579,7 +579,8 @@ static int init_inodecache(void)
 {
 	romfs_inode_cachep = kmem_cache_create("romfs_inode_cache",
 					     sizeof(struct romfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (romfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9b14542cc186..44ed1d418b46 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -80,7 +80,8 @@ static int init_inodecache(void)
 {
 	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
 					     sizeof(struct smb_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (smb_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e120f33f8473..e45789fe38e8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -140,7 +140,8 @@ static int init_inodecache(void)
 {
 	udf_inode_cachep = kmem_cache_create("udf_inode_cache",
 					     sizeof(struct udf_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (udf_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 684018d3c58e..d257644a1aeb 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1184,7 +1184,8 @@ static int init_inodecache(void)
 {
 	ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
 					     sizeof(struct ufs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ufs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/net/socket.c b/net/socket.c
index 7a5a56874efa..5211ba270375 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -319,7 +319,8 @@ static int init_inodecache(void)
 {
 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 				sizeof(struct socket_alloc),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+					SLAB_MEM_SPREAD),
 				init_once, NULL);
 	if (sock_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index befc0c5ca9e5..ad9d9fc4e734 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -849,9 +849,10 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 int register_rpc_pipefs(void)
 {
 	rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
-                                             sizeof(struct rpc_inode),
-                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-                                             init_once, NULL);
+				sizeof(struct rpc_inode),
+				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+				init_once, NULL);
 	if (!rpc_inode_cachep)
 		return -ENOMEM;
 	register_filesystem(&rpc_pipe_fs_type);
-- 
cgit v1.2.3


From b0196009d8c3ecf6ea6ec080c63d2ccc146e7ad9 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:09 -0800
Subject: [PATCH] cpuset memory spread slab cache hooks

Change the kmem_cache_create calls for certain slab caches to support cpuset
memory spreading.

See the previous patches, cpuset_mem_spread, for an explanation of cpuset
memory spreading, and cpuset_mem_spread_slab_cache for the slab cache support
for memory spreading.

The slab caches marked for now are: dentry_cache, inode_cache, some xfs slab
caches, and buffer_head.  This list may change over time.  In particular,
other file system types that are used extensively on large NUMA systems may
want to allow for spreading their directory and inode slab cache entries.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c             | 7 +++++--
 fs/dcache.c             | 3 ++-
 fs/inode.c              | 9 +++++++--
 fs/xfs/linux-2.6/kmem.h | 2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 0d6ca7bac6c8..36c7253bea72 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3143,8 +3143,11 @@ void __init buffer_init(void)
 	int nrpages;
 
 	bh_cachep = kmem_cache_create("buffer_head",
-			sizeof(struct buffer_head), 0,
-			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
+					sizeof(struct buffer_head), 0,
+					(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					SLAB_MEM_SPREAD),
+					init_buffer_head,
+					NULL);
 
 	/*
 	 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/dcache.c b/fs/dcache.c
index 11dc83092d4a..653f64ce98e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1682,7 +1682,8 @@ static void __init dcache_init(unsigned long mempages)
 	dentry_cache = kmem_cache_create("dentry_cache",
 					 sizeof(struct dentry),
 					 0,
-					 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					 SLAB_MEM_SPREAD),
 					 NULL, NULL);
 	
 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
diff --git a/fs/inode.c b/fs/inode.c
index 25967b67903d..a51c671c54cf 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1375,8 +1375,13 @@ void __init inode_init(unsigned long mempages)
 	int loop;
 
 	/* inode slab cache */
-	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
-				0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_once, NULL);
+	inode_cachep = kmem_cache_create("inode_cache",
+					 sizeof(struct inode),
+					 0,
+					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					 SLAB_MEM_SPREAD),
+					 init_once,
+					 NULL);
 	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
 
 	/* Hash may have been set up in inode_init_early */
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index f0268a84e6fd..2cfd33d4d8aa 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,7 +100,7 @@ extern void  kmem_free(void *, size_t);
 
 #define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
 #define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD	0
+#define KM_ZONE_SPREAD	SLAB_MEM_SPREAD
 
 #define kmem_zone	kmem_cache
 #define kmem_zone_t	struct kmem_cache
-- 
cgit v1.2.3


From 53b3531bbbf70ac7551b32d1acc229d94de52658 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 24 Mar 2006 03:16:13 -0800
Subject: [PATCH] s/;;/;/g

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/mach-pxa/leds-mainstone.c     | 6 +++---
 arch/arm/mach-s3c2410/cpu.c            | 2 +-
 arch/frv/kernel/gdb-stub.c             | 2 +-
 arch/ia64/sn/kernel/bte.c              | 2 +-
 arch/ia64/sn/pci/tioca_provider.c      | 2 +-
 arch/mips/mm/dma-ip32.c                | 6 +++---
 arch/ppc/syslib/ppc85xx_setup.c        | 2 +-
 arch/sparc64/kernel/irq.c              | 2 +-
 drivers/char/pcmcia/synclink_cs.c      | 2 +-
 drivers/char/synclink.c                | 2 +-
 drivers/char/synclink_gt.c             | 2 +-
 drivers/char/synclinkmp.c              | 2 +-
 drivers/hwmon/gl520sm.c                | 2 +-
 drivers/i2c/chips/rtc8564.c            | 2 +-
 drivers/ide/ide-dma.c                  | 2 +-
 drivers/net/sk98lin/skge.c             | 2 +-
 drivers/net/sky2.h                     | 2 +-
 drivers/net/wireless/prism54/oid_mgt.c | 4 ++--
 drivers/net/wireless/spectrum_cs.c     | 2 +-
 drivers/scsi/megaraid/megaraid_mbox.c  | 2 +-
 drivers/usb/image/microtek.c           | 2 +-
 drivers/usb/input/hid-core.c           | 2 +-
 drivers/video/aty/radeon_pm.c          | 2 +-
 fs/cifs/cifssmb.c                      | 2 +-
 fs/pnode.c                             | 2 +-
 include/acpi/acpi_bus.h                | 2 +-
 include/asm-ia64/sn/sn_sal.h           | 2 +-
 net/ipv4/inet_hashtables.c             | 2 +-
 net/ipv4/netfilter/ip_nat_standalone.c | 2 +-
 net/tipc/link.c                        | 2 +-
 sound/ppc/toonie.c                     | 4 ++--
 31 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/arch/arm/mach-pxa/leds-mainstone.c b/arch/arm/mach-pxa/leds-mainstone.c
index bbd3f87a9fc2..c06d3d7a8dd4 100644
--- a/arch/arm/mach-pxa/leds-mainstone.c
+++ b/arch/arm/mach-pxa/leds-mainstone.c
@@ -85,7 +85,7 @@ void mainstone_leds_event(led_event_t evt)
 		break;
 
 	case led_green_on:
-		hw_led_state |= D21;;
+		hw_led_state |= D21;
 		break;
 
 	case led_green_off:
@@ -93,7 +93,7 @@ void mainstone_leds_event(led_event_t evt)
 		break;
 
 	case led_amber_on:
-		hw_led_state |= D22;;
+		hw_led_state |= D22;
 		break;
 
 	case led_amber_off:
@@ -101,7 +101,7 @@ void mainstone_leds_event(led_event_t evt)
 		break;
 
 	case led_red_on:
-		hw_led_state |= D23;;
+		hw_led_state |= D23;
 		break;
 
 	case led_red_off:
diff --git a/arch/arm/mach-s3c2410/cpu.c b/arch/arm/mach-s3c2410/cpu.c
index 00a379334b60..70c34fcf7858 100644
--- a/arch/arm/mach-s3c2410/cpu.c
+++ b/arch/arm/mach-s3c2410/cpu.c
@@ -146,7 +146,7 @@ void s3c24xx_set_board(struct s3c24xx_board *b)
 	board = b;
 
 	if (b->clocks_count != 0) {
-		struct clk **ptr = b->clocks;;
+		struct clk **ptr = b->clocks;
 
 		for (i = b->clocks_count; i > 0; i--, ptr++)
 			s3c24xx_register_clock(*ptr);
diff --git a/arch/frv/kernel/gdb-stub.c b/arch/frv/kernel/gdb-stub.c
index 8f860d9c4947..508601fad079 100644
--- a/arch/frv/kernel/gdb-stub.c
+++ b/arch/frv/kernel/gdb-stub.c
@@ -1406,7 +1406,7 @@ void gdbstub(int sigval)
 			__debug_frame->psr |= PSR_S;
 		__debug_regs->brr = (__debug_frame->tbr & TBR_TT) << 12;
 		__debug_regs->brr |= BRR_EB;
-		sigval = SIGXCPU;;
+		sigval = SIGXCPU;
 	}
 
 	LEDS(0x5002);
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 1f11db470d90..e952ef4f6d91 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -36,7 +36,7 @@ static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
 	nodepda_t *tmp_nodepda;
 
 	if (nasid_to_cnodeid(nasid) == -1)
-		return (struct bteinfo_s *)NULL;;
+		return (struct bteinfo_s *)NULL;
 
 	tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
 	return &tmp_nodepda->bte_if[interface];
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 7571a4025529..be0176912968 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -377,7 +377,7 @@ tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
 	struct tioca_dmamap *ca_dmamap;
 	void *map;
 	unsigned long flags;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
 
 	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
 	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c
index a7e3072ff78d..ec54ed0d26ff 100644
--- a/arch/mips/mm/dma-ip32.c
+++ b/arch/mips/mm/dma-ip32.c
@@ -138,7 +138,7 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 		BUG();
 	}
 
-	addr = virt_to_phys(ptr)&RAM_OFFSET_MASK;;
+	addr = virt_to_phys(ptr)&RAM_OFFSET_MASK;
 	if(dev == NULL)
 	    addr+=CRIME_HI_MEM_BASE;
 	return (dma_addr_t)addr;
@@ -179,7 +179,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		addr = (unsigned long) page_address(sg->page)+sg->offset;
 		if (addr)
 			__dma_sync(addr, sg->length, direction);
-		addr = __pa(addr)&RAM_OFFSET_MASK;;
+		addr = __pa(addr)&RAM_OFFSET_MASK;
 		if(dev == NULL)
 			addr +=  CRIME_HI_MEM_BASE;
 		sg->dma_address = (dma_addr_t)addr;
@@ -199,7 +199,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
 
 	addr = (unsigned long) page_address(page) + offset;
 	dma_cache_wback_inv(addr, size);
-	addr = __pa(addr)&RAM_OFFSET_MASK;;
+	addr = __pa(addr)&RAM_OFFSET_MASK;
 	if(dev == NULL)
 		addr +=  CRIME_HI_MEM_BASE;
 
diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c
index e70b34ee6275..79b7089d7500 100644
--- a/arch/ppc/syslib/ppc85xx_setup.c
+++ b/arch/ppc/syslib/ppc85xx_setup.c
@@ -235,7 +235,7 @@ mpc85xx_setup_pci2(struct pci_controller *hose)
 	   (__ilog2(MPC85XX_PCI2_UPPER_MEM - MPC85XX_PCI2_LOWER_MEM + 1) - 1);
 
 	/* Setup outbound IO windows @ MPC85XX_PCI2_IO_BASE */
-	pci->potar2 = (MPC85XX_PCI2_LOWER_IO >> 12) & 0x000fffff;;
+	pci->potar2 = (MPC85XX_PCI2_LOWER_IO >> 12) & 0x000fffff;
 	pci->potear2 = 0x00000000;
 	pci->powbar2 = (MPC85XX_PCI2_IO_BASE >> 12) & 0x000fffff;
 	/* Enable, IO R/W */
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index e505a4125e35..11e645c9ec50 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -727,7 +727,7 @@ void handler_irq(int irq, struct pt_regs *regs)
 }
 
 #ifdef CONFIG_BLK_DEV_FD
-extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);;
+extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);
 
 /* XXX No easy way to include asm/floppy.h XXX */
 extern unsigned char *pdma_vaddr;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8a8ca32822ba..e6b714b6390d 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4181,7 +4181,7 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 	}
 
 	info->params.encoding = new_encoding;
-	info->params.crc_type = new_crctype;;
+	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
 	if (info->netcount)
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index ede688a4e141..d68be61f0a49 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -7770,7 +7770,7 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 	}
 
 	info->params.encoding = new_encoding;
-	info->params.crc_type = new_crctype;;
+	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
 	if (info->netcount)
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index b046390cd256..738ec2f4e563 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -1365,7 +1365,7 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 	}
 
 	info->params.encoding = new_encoding;
-	info->params.crc_type = new_crctype;;
+	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
 	if (info->netcount)
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 960adb256fbb..858740131115 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -1650,7 +1650,7 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
 	}
 
 	info->params.encoding = new_encoding;
-	info->params.crc_type = new_crctype;;
+	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
 	if (info->netcount)
diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index 47b4d49f75c6..14e810f3c2c0 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -456,7 +456,7 @@ static ssize_t set_temp_max(struct i2c_client *client, struct gl520_data *data,
 	long v = simple_strtol(buf, NULL, 10);
 
 	mutex_lock(&data->update_lock);
-	data->temp_max[n - 1] = TEMP_TO_REG(v);;
+	data->temp_max[n - 1] = TEMP_TO_REG(v);
 	gl520_write_value(client, reg, data->temp_max[n - 1]);
 	mutex_unlock(&data->update_lock);
 	return count;
diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c
index ceaa6b0bdfd6..0d8699b3f488 100644
--- a/drivers/i2c/chips/rtc8564.c
+++ b/drivers/i2c/chips/rtc8564.c
@@ -53,7 +53,7 @@ static inline u8 _rtc8564_ctrl2(struct i2c_client *client)
 #define CTRL1(c) _rtc8564_ctrl1(c)
 #define CTRL2(c) _rtc8564_ctrl2(c)
 
-static int debug;;
+static int debug;
 module_param(debug, int, S_IRUGO | S_IWUSR);
 
 static struct i2c_driver rtc8564_driver;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 0523da77425a..c481be8b807f 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -175,7 +175,7 @@ ide_startstop_t ide_dma_intr (ide_drive_t *drive)
 			if (rq->rq_disk) {
 				ide_driver_t *drv;
 
-				drv = *(ide_driver_t **)rq->rq_disk->private_data;;
+				drv = *(ide_driver_t **)rq->rq_disk->private_data;
 				drv->end_request(drive, 1, rq->nr_sectors);
 			} else
 				ide_end_request(drive, 1, rq->nr_sectors);
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index a5f2b1ee0752..38a26df4095f 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1727,7 +1727,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32);
 		pTxd->pMBuf     = pMessage;
 		
-		pTxd->TBControl = Control | BMU_OWN | sk_frag->size;;
+		pTxd->TBControl = Control | BMU_OWN | sk_frag->size;
 
 		/* 
 		** Do we have the last fragment? 
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index 2838f661b393..62532b4e45c5 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -1804,7 +1804,7 @@ struct sky2_rx_le {
 	__le16	length;
 	u8	ctrl;
 	u8	opcode;
-} __attribute((packed));;
+} __attribute((packed));
 
 struct sky2_status_le {
 	__le32	status;	/* also checksum */
diff --git a/drivers/net/wireless/prism54/oid_mgt.c b/drivers/net/wireless/prism54/oid_mgt.c
index eea2f04c8c6d..ebb238785839 100644
--- a/drivers/net/wireless/prism54/oid_mgt.c
+++ b/drivers/net/wireless/prism54/oid_mgt.c
@@ -332,7 +332,7 @@ mgt_le_to_cpu(int type, void *data)
 	case OID_TYPE_ATTACH:{
 			struct obj_attachment *attach = data;
 			attach->id = le16_to_cpu(attach->id);
-			attach->size = le16_to_cpu(attach->size);; 
+			attach->size = le16_to_cpu(attach->size);
 			break;
 	}
 	case OID_TYPE_SSID:
@@ -401,7 +401,7 @@ mgt_cpu_to_le(int type, void *data)
 	case OID_TYPE_ATTACH:{
 			struct obj_attachment *attach = data;
 			attach->id = cpu_to_le16(attach->id);
-			attach->size = cpu_to_le16(attach->size);; 
+			attach->size = cpu_to_le16(attach->size);
 			break;
 	}
 	case OID_TYPE_SSID:
diff --git a/drivers/net/wireless/spectrum_cs.c b/drivers/net/wireless/spectrum_cs.c
index fee4be1ce810..5fa6fbe35bb9 100644
--- a/drivers/net/wireless/spectrum_cs.c
+++ b/drivers/net/wireless/spectrum_cs.c
@@ -147,7 +147,7 @@ struct pdi {
 	__le16 _len;		/* length of ID and data, in words */
 	__le16 _id;		/* record ID */
 	char data[0];		/* plug data */
-} __attribute__ ((packed));;
+} __attribute__ ((packed));
 
 
 /* Functions for access to little-endian data */
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index bf9f7f7ba354..c11e5ce6865e 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -2797,7 +2797,7 @@ mbox_post_sync_cmd(adapter_t *adapter, uint8_t raw_mbox[])
 	// available within 1 second, assume FW is initializing and wait
 	// for an extended amount of time
 	if (mbox->numstatus == 0xFF) {	// status not yet available
-		udelay(25);;
+		udelay(25);
 
 		for (i = 0; mbox->numstatus == 0xFF && i < 1000; i++) {
 			rmb();
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 28538db9eaf3..2a0e18a48748 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -360,7 +360,7 @@ static int mts_scsi_host_reset (Scsi_Cmnd *srb)
 	rc = usb_lock_device_for_reset(desc->usb_dev, desc->usb_intf);
 	if (rc < 0)
 		return FAILED;
-	result = usb_reset_device(desc->usb_dev);;
+	result = usb_reset_device(desc->usb_dev);
 	if (rc)
 		usb_unlock_device(desc->usb_dev);
 	return result ? FAILED : SUCCESS;
diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
index 58b59f6e9881..d4bf1701046b 100644
--- a/drivers/usb/input/hid-core.c
+++ b/drivers/usb/input/hid-core.c
@@ -1215,7 +1215,7 @@ static void hid_irq_out(struct urb *urb, struct pt_regs *regs)
 
 	if (hid->outhead != hid->outtail) {
 		if (hid_submit_out(hid)) {
-			clear_bit(HID_OUT_RUNNING, &hid->iofl);;
+			clear_bit(HID_OUT_RUNNING, &hid->iofl);
 			wake_up(&hid->wait);
 		}
 		spin_unlock_irqrestore(&hid->outlock, flags);
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index 1f8d805c61e5..5886a2f1323e 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -2080,7 +2080,7 @@ static void radeon_reinitialize_M9P(struct radeonfb_info *rinfo)
 	OUTREG(0x2ec, 0x6332a3f0);
 	mdelay(17);
 
-	OUTPLL(pllPPLL_REF_DIV, rinfo->pll.ref_div);;
+	OUTPLL(pllPPLL_REF_DIV, rinfo->pll.ref_div);
 	OUTPLL(pllPPLL_DIV_0, rinfo->save_regs[92]);
 
 	mdelay(40);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b41e8b379652..a243fe2792d5 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4908,7 +4908,7 @@ SetEARetry:
 	parm_data->list_len = cpu_to_le32(count);
 	parm_data->list[0].EA_flags = 0;
 	/* we checked above that name len is less than 255 */
-	parm_data->list[0].name_len = (__u8)name_len;;
+	parm_data->list[0].name_len = (__u8)name_len;
 	/* EA names are always ASCII */
 	if(ea_name)
 		strncpy(parm_data->list[0].name,ea_name,name_len);
diff --git a/fs/pnode.c b/fs/pnode.c
index f1871f773f64..37b568ed0e05 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -130,7 +130,7 @@ static struct vfsmount *get_source(struct vfsmount *dest,
 {
 	struct vfsmount *p_last_src = NULL;
 	struct vfsmount *p_last_dest = NULL;
-	*type = CL_PROPAGATION;;
+	*type = CL_PROPAGATION;
 
 	if (IS_MNT_SHARED(dest))
 		*type |= CL_MAKE_SHARED;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index e496fac860ac..6dca3d542080 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -269,7 +269,7 @@ struct acpi_device_wakeup_state {
 
 struct acpi_device_wakeup {
 	acpi_handle gpe_device;
-	acpi_integer gpe_number;;
+	acpi_integer gpe_number;
 	acpi_integer sleep_state;
 	struct acpi_handle_list resources;
 	struct acpi_device_wakeup_state state;
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index e77f0c9b7d3d..b546de2fdce5 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -1037,7 +1037,7 @@ ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift,
 
 /***** BEGIN HACK - temp til old proms no longer supported ********/
 	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
-		int nasid = get_sapicid() & 0xfff;;
+		int nasid = get_sapicid() & 0xfff;
 #define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL
 #define SH_SHUB_ID_NODES_PER_BIT_SHFT 48
 		if (shubtype) *shubtype = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 33228115cda4..ef7366fc132f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -315,7 +315,7 @@ ok:
  		spin_unlock(&head->lock);
 
  		if (tw) {
- 			inet_twsk_deschedule(tw, death_row);;
+ 			inet_twsk_deschedule(tw, death_row);
  			inet_twsk_put(tw);
  		}
 
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index ab1f88fa21ec..380aef3d7865 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -394,7 +394,7 @@ static int init_or_cleanup(int init)
 	ret = nf_register_hook(&ip_nat_local_out_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register local out hook.\n");
-		goto cleanup_adjustout_ops;;
+		goto cleanup_adjustout_ops;
 	}
 	ret = nf_register_hook(&ip_nat_local_in_ops);
 	if (ret < 0) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 910b37e5083d..784b24b6d102 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1629,7 +1629,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
                                         tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>");
                                         info("...Retransmitted %u times\n",
 					     l_ptr->stale_count);
-                                        link_print(l_ptr, TIPC_CONS, "Resetting Link\n");;
+                                        link_print(l_ptr, TIPC_CONS, "Resetting Link\n");
                                         tipc_link_reset(l_ptr);
                                         break;
                                 }
diff --git a/sound/ppc/toonie.c b/sound/ppc/toonie.c
index 210be20dc27e..4e595172e423 100644
--- a/sound/ppc/toonie.c
+++ b/sound/ppc/toonie.c
@@ -117,7 +117,7 @@ static int toonie_get_mute_switch(struct snd_kcontrol *kcontrol,
 		gp = &mix->amp_mute_gpio;
 		break;
 	default:
-		return -EINVAL;;
+		return -EINVAL;
 	}
 	ucontrol->value.integer.value[0] = !check_audio_gpio(gp);
 	return 0;
@@ -145,7 +145,7 @@ static int toonie_put_mute_switch(struct snd_kcontrol *kcontrol,
 		gp = &mix->amp_mute_gpio;
 		break;
 	default:
-		return -EINVAL;;
+		return -EINVAL;
 	}
 	val = ! check_audio_gpio(gp);
 	if (val != ucontrol->value.integer.value[0]) {
-- 
cgit v1.2.3


From 5b3cf3e0f019fcfe1e0c26aedb4d54b5e5be3a7b Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 24 Mar 2006 03:16:14 -0800
Subject: [PATCH] isofs: remove unused debugging macros

Remove unused debugging macros from isofs.  The referred debug functions do
not exist in the kernel.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/isofs.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 38c75151fc66..439a19b1bf3e 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -178,15 +178,3 @@ extern struct inode_operations isofs_dir_inode_operations;
 extern struct file_operations isofs_dir_operations;
 extern struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
-
-/* The following macros are used to check for memory leaks. */
-#ifdef LEAK_CHECK
-#define free_s leak_check_free_s
-#define malloc leak_check_malloc
-#define sb_bread leak_check_bread
-#define brelse leak_check_brelse
-extern void * leak_check_malloc(unsigned int size);
-extern void leak_check_free_s(void * obj, int size);
-extern struct buffer_head * leak_check_bread(struct super_block *sb, int block);
-extern void leak_check_brelse(struct buffer_head * bh);
-#endif /* LEAK_CHECK */
-- 
cgit v1.2.3


From 09fe316a7b10219be592118626850e1dfdfcc1aa Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Fri, 24 Mar 2006 03:16:16 -0800
Subject: [PATCH] fast ext3_statfs

Under I/O load it may take up to a dozen seconds to read all group
descriptors.  This is what ext3_statfs() does.  At the same time, we already
maintain global numbers of free inodes/blocks.  Why don't we use them instead
of group reading and summing?

Cc: Ravikiran G Thirumalai <kiran@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a3e2a8e7dca2..86e443182de4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2326,7 +2326,8 @@ restore_opts:
 
 static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
 	unsigned long overhead;
 	int i;
 
@@ -2368,12 +2369,12 @@ static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
 	buf->f_type = EXT3_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
-	buf->f_bfree = ext3_count_free_blocks (sb);
+	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
 	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
 	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
-	buf->f_ffree = ext3_count_free_inodes (sb);
+	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT3_NAME_LEN;
 	return 0;
 }
-- 
cgit v1.2.3


From 38885bd4c2a4b59ddb22271d3e6c621859c76f02 Mon Sep 17 00:00:00 2001
From: Coywolf Qi Hunt <qiyong@fc-cn.com>
Date: Fri, 24 Mar 2006 03:18:05 -0800
Subject: [PATCH] sb_set_blocksize cleanup

sb_set_blocksize() cleanup: make sb_set_blocksize() use blksize_bits().

Signed-off-by: Coywolf Qi Hunt <qiyong@fc-cn.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2d096057ab53..573fc8e0b67a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -86,16 +86,12 @@ EXPORT_SYMBOL(set_blocksize);
 
 int sb_set_blocksize(struct super_block *sb, int size)
 {
-	int bits = 9; /* 2^9 = 512 */
-
 	if (set_blocksize(sb->s_bdev, size))
 		return 0;
 	/* If we get here, we know size is power of two
 	 * and it's value is between 512 and PAGE_SIZE */
 	sb->s_blocksize = size;
-	for (size >>= 10; size; size >>= 1)
-		++bits;
-	sb->s_blocksize_bits = bits;
+	sb->s_blocksize_bits = blksize_bits(size);
 	return sb->s_blocksize;
 }
 
-- 
cgit v1.2.3


From 8a14342683b1e3adcf5f78660a42fcbd95b44a35 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 24 Mar 2006 03:18:10 -0800
Subject: [PATCH] HOTPLUG_CPU: avoid hitting too many cachelines in
 recalc_bh_state()

Instead of using for_each_cpu(i), we can use for_each_online_cpu(i).

When a CPU goes offline (ie removed from online map), it might have a non
null bh_accounting.nr, so this patch adds a transfer of this counter to an
online CPU counter.

We already have a hotcpu_notifier, (function buffer_cpu_notify()), where we
can do this bh_accounting.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 36c7253bea72..11ca6eb46a33 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3078,7 +3078,7 @@ static void recalc_bh_state(void)
 	if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
 		return;
 	__get_cpu_var(bh_accounting).ratelimit = 0;
-	for_each_cpu(i)
+	for_each_online_cpu(i)
 		tot += per_cpu(bh_accounting, i).nr;
 	buffer_heads_over_limit = (tot > max_buffer_heads);
 }
@@ -3127,6 +3127,9 @@ static void buffer_exit_cpu(int cpu)
 		brelse(b->bhs[i]);
 		b->bhs[i] = NULL;
 	}
+	get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
+	per_cpu(bh_accounting, cpu).nr = 0;
+	put_cpu_var(bh_accounting);
 }
 
 static int buffer_cpu_notify(struct notifier_block *self,
-- 
cgit v1.2.3


From 4741c9fd36b3bcadd37238321c469049da94a4b9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 24 Mar 2006 03:18:11 -0800
Subject: [PATCH] set_page_dirty() return value fixes

We need set_page_dirty() to return true if it actually transitioned the page
from a clean to dirty state.  This wasn't right in a couple of places.  Do a
kernel-wide audit, fix things up.

This leaves open the possibility of returning a negative errno from
set_page_dirty() sometime in the future.  But we don't do that at present.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/cris/arch-v32/drivers/cryptocop.c |  2 +-
 drivers/block/rd.c                     |  3 ++-
 fs/buffer.c                            |  2 +-
 include/linux/fs.h                     |  2 +-
 mm/page-writeback.c                    | 11 ++++++-----
 5 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index 501fa52d8d3a..c59ee28a35f4 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2944,7 +2944,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
 		int spdl_err;
 		/* Mark output pages dirty. */
 		spdl_err = set_page_dirty_lock(outpages[i]);
-		DEBUG(if (spdl_err)printk("cryptocop_ioctl_process: set_page_dirty_lock returned %d\n", spdl_err));
+		DEBUG(if (spdl_err < 0)printk("cryptocop_ioctl_process: set_page_dirty_lock returned %d\n", spdl_err));
 	}
 	for (i = 0; i < nooutpages; i++){
 		put_page(outpages[i]);
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 1c54f46d3f70..940bfd7951e5 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -186,7 +186,8 @@ static int ramdisk_writepages(struct address_space *mapping,
  */
 static int ramdisk_set_page_dirty(struct page *page)
 {
-	SetPageDirty(page);
+	if (!TestSetPageDirty(page))
+		return 1;
 	return 0;
 }
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 11ca6eb46a33..24262ea8cc50 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -865,8 +865,8 @@ int __set_page_dirty_buffers(struct page *page)
 		}
 		write_unlock_irq(&mapping->tree_lock);
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+		return 1;
 	}
-	
 	return 0;
 }
 EXPORT_SYMBOL(__set_page_dirty_buffers);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ad70c1e5e55..092cfaee0cd2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -350,7 +350,7 @@ struct address_space_operations {
 	/* Write back some dirty pages from this mapping. */
 	int (*writepages)(struct address_space *, struct writeback_control *);
 
-	/* Set a page dirty */
+	/* Set a page dirty.  Return true if this dirtied it */
 	int (*set_page_dirty)(struct page *page);
 
 	int (*readpages)(struct file *filp, struct address_space *mapping,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c67ddc464721..893d7677579e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -628,8 +628,6 @@ EXPORT_SYMBOL(write_one_page);
  */
 int __set_page_dirty_nobuffers(struct page *page)
 {
-	int ret = 0;
-
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
 		struct address_space *mapping2;
@@ -651,8 +649,9 @@ int __set_page_dirty_nobuffers(struct page *page)
 							I_DIRTY_PAGES);
 			}
 		}
+		return 1;
 	}
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 
@@ -682,8 +681,10 @@ int fastcall set_page_dirty(struct page *page)
 			return (*spd)(page);
 		return __set_page_dirty_buffers(page);
 	}
-	if (!PageDirty(page))
-		SetPageDirty(page);
+	if (!PageDirty(page)) {
+		if (!TestSetPageDirty(page))
+			return 1;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(set_page_dirty);
-- 
cgit v1.2.3


From 18e79b40ed9c5223b88771f805c69f5993fc131b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 24 Mar 2006 03:18:14 -0800
Subject: [PATCH] fsync: extract internal code

Pull the guts out of do_fsync() - we can use it elsewhere.

Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c        | 43 +++++++++++++++++++++++--------------------
 include/linux/fs.h |  1 +
 2 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 24262ea8cc50..6d77ce9f54e5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -327,31 +327,24 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-static long do_fsync(unsigned int fd, int datasync)
+long do_fsync(struct file *file, int datasync)
 {
-	struct file * file;
-	struct address_space *mapping;
-	int ret, err;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
+	int ret;
+	int err;
+	struct address_space *mapping = file->f_mapping;
 
-	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
-		goto out_putf;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	mapping = file->f_mapping;
-
 	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
 	/*
-	 * We need to protect against concurrent writers,
-	 * which could cause livelocks in fsync_buffers_list
+	 * We need to protect against concurrent writers, which could cause
+	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
 	err = file->f_op->fsync(file, file->f_dentry, datasync);
@@ -362,21 +355,31 @@ static long do_fsync(unsigned int fd, int datasync)
 	if (!ret)
 		ret = err;
 	current->flags &= ~PF_SYNCWRITE;
-
-out_putf:
-	fput(file);
 out:
 	return ret;
 }
 
+static long __do_fsync(unsigned int fd, int datasync)
+{
+	struct file *file;
+	int ret = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		ret = do_fsync(file, datasync);
+		fput(file);
+	}
+	return ret;
+}
+
 asmlinkage long sys_fsync(unsigned int fd)
 {
-	return do_fsync(fd, 0);
+	return __do_fsync(fd, 0);
 }
 
 asmlinkage long sys_fdatasync(unsigned int fd)
 {
-	return do_fsync(fd, 1);
+	return __do_fsync(fd, 1);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 092cfaee0cd2..215696a0f16f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1478,6 +1478,7 @@ extern int wait_on_page_writeback_range(struct address_space *mapping,
 extern int __filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end, int sync_mode);
 
+extern long do_fsync(struct file *file, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
 extern void emergency_sync(void);
-- 
cgit v1.2.3


From 82d821ddca8f5c990067cc37543010aa9346a172 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Fri, 24 Mar 2006 03:18:20 -0800
Subject: [PATCH] Conditionalize compat_sys_newfstatat

If we don't want sys_newfstatat because __ARCH_WANT_STAT64 is defined, then
we certainly don't want compat_sys_newfstatat either.

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 5333c7d7427f..2a88477330fc 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -114,6 +114,7 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
 	return error;
 }
 
+#ifndef __ARCH_WANT_STAT64
 asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
 		struct compat_stat __user *statbuf, int flag)
 {
@@ -134,6 +135,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
 out:
 	return error;
 }
+#endif
 
 asmlinkage long compat_sys_newfstat(unsigned int fd,
 		struct compat_stat __user * statbuf)
-- 
cgit v1.2.3


From b5a7c4f5835ae2805d00ca39709002cb03364143 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <glommer@br.ibm.com>
Date: Fri, 24 Mar 2006 03:18:37 -0800
Subject: [PATCH] ext3: Properly report backup block present in a group

In filesystems with the meta block group flag on, ext3_bg_num_gdb() fails
to report the correct number of blocks used to store the group descriptor
backups in a given group.  It happens because meta_bg follows a different
logic from the original ext3 backup placement in groups multiples of 3, 5
and 7.

Signed-off-by: Glauber de Oliveira Costa <glommer@br.ibm.com>
Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6250fcdf14a1..46623f77666b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1493,12 +1493,33 @@ static int ext3_group_sparse(int group)
  */
 int ext3_bg_has_super(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-	    !ext3_group_sparse(group))
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
 		return 0;
 	return 1;
 }
 
+static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+{
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+
+	if (group == first || group == first + 1 || group == last)
+		return 1;
+	return 0;
+}
+
+static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return EXT3_SB(sb)->s_gdb_count;
+}
+
 /**
  *	ext3_bg_num_gdb - number of blocks used by the group table in group
  *	@sb: superblock for filesystem
@@ -1510,9 +1531,14 @@ int ext3_bg_has_super(struct super_block *sb, int group)
  */
 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-	    !ext3_group_sparse(group))
-		return 0;
-	return EXT3_SB(sb)->s_gdb_count;
-}
+	unsigned long first_meta_bg =
+			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+			metagroup < first_meta_bg)
+		return ext3_bg_num_gdb_nometa(sb,group);
 
+	return ext3_bg_num_gdb_meta(sb,group);
+
+}
-- 
cgit v1.2.3


From c690a72253b962b7274559f2cdf4844553076c03 Mon Sep 17 00:00:00 2001
From: Michael Owen <mowen@costco.com>
Date: Fri, 24 Mar 2006 18:21:44 +0100
Subject: typo patch for fs/ufs/super.c

Quick and simple typo fix. neTXstep -> neXTstep

Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/ufs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d257644a1aeb..db98a4c71e63 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -575,7 +575,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 		if (!silent)
 			printk("You didn't specify the type of your ufs filesystem\n\n"
 			"mount -t ufs -o ufstype="
-			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|netxstep-cd|openstep ...\n\n"
+			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
 			">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
 			"default is ufstype=old\n");
 		ufs_set_opt (sbi->s_mount_opt, UFSTYPE_OLD);
-- 
cgit v1.2.3


From c30fe7f73194650148b58ee80908c1bc38246397 Mon Sep 17 00:00:00 2001
From: Uwe Zeisberger <zeisberg@informatik.uni-freiburg.de>
Date: Fri, 24 Mar 2006 18:23:14 +0100
Subject: fix typos "wich" -> "which"

Signed-off-by: Uwe Zeisberger <zeisberg@informatik.uni-freiburg.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 Documentation/filesystems/proc.txt       |  6 +++---
 Documentation/networking/packet_mmap.txt | 10 +++++-----
 drivers/acpi/Kconfig                     |  3 ++-
 drivers/isdn/i4l/isdn_x25iface.c         |  2 +-
 drivers/serial/mpc52xx_uart.c            |  2 +-
 drivers/usb/serial/option.c              |  2 +-
 drivers/video/sstfb.c                    |  2 +-
 drivers/w1/masters/matrox_w1.c           |  2 +-
 fs/befs/datastream.c                     |  2 +-
 scripts/Makefile.modpost                 |  2 +-
 10 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 944cf109a6f5..99902ae6804e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -121,7 +121,7 @@ Table 1-1: Process specific entries in /proc
 ..............................................................................
  File    Content                                        
  cmdline Command line arguments                         
- cpu	 Current and last cpu in wich it was executed		(2.4)(smp)
+ cpu	 Current and last cpu in which it was executed		(2.4)(smp)
  cwd	 Link to the current working directory
  environ Values of environment variables      
  exe	 Link to the executable of this process
@@ -309,13 +309,13 @@ is the same by default:
   > cat /proc/irq/0/smp_affinity 
   ffffffff
 
-It's a bitmask, in wich you can specify wich CPUs can handle the IRQ, you can
+It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can
 set it by doing:
 
   > echo 1 > /proc/irq/prof_cpu_mask
 
 This means that only the first CPU will handle the IRQ, but you can also echo 5
-wich means that only the first and fourth CPU can handle the IRQ.
+which means that only the first and fourth CPU can handle the IRQ.
 
 The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
 between all the CPUs which are allowed to handle it. As usual the kernel has
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 8d4cf78258e4..4fc8e9874320 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -40,7 +40,7 @@ network interface card supports some sort of interrupt load mitigation or
 + How to use CONFIG_PACKET_MMAP
 --------------------------------------------------------------------------------
 
-From the user standpoint, you should use the higher level libpcap library, wich
+From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
@@ -217,8 +217,8 @@ called pg_vec, its size limits the number of blocks that can be allocated.
 
 kmalloc allocates any number of bytes of phisically contiguous memory from 
 a pool of pre-determined sizes. This pool of memory is mantained by the slab 
-allocator wich is at the end the responsible for doing the allocation and 
-hence wich imposes the maximum memory that kmalloc can allocate. 
+allocator which is at the end the responsible for doing the allocation and 
+hence which imposes the maximum memory that kmalloc can allocate. 
 
 In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The 
 predetermined sizes that kmalloc uses can be checked in the "size-<bytes>" 
@@ -254,7 +254,7 @@ and, the number of frames be
 
 	<block number> * <block size> / <frame size>
 
-Suposse the following parameters, wich apply for 2.6 kernel and an
+Suposse the following parameters, which apply for 2.6 kernel and an
 i386 architecture:
 
 	<size-max> = 131072 bytes
@@ -360,7 +360,7 @@ TP_STATUS_LOSING      : indicates there were packet drops from last time
                         statistics where checked with getsockopt() and
                         the PACKET_STATISTICS option.
 
-TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets wich 
+TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which 
                         it's checksum will be done in hardware. So while 
                         reading the packet we should not try to check the 
                         checksum. 
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 82710ae39228..5cb96300eb0f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -256,7 +256,8 @@ config ACPI_CUSTOM_DSDT_FILE
 	depends on ACPI_CUSTOM_DSDT
 	default ""
 	help
-	  Enter the full path name to the file wich includes the AmlCode declaration.
+	  Enter the full path name to the file which includes the AmlCode
+	  declaration.
 
 config ACPI_BLACKLIST_YEAR
 	int "Disable ACPI for systems before Jan 1st this year" if X86_32
diff --git a/drivers/isdn/i4l/isdn_x25iface.c b/drivers/isdn/i4l/isdn_x25iface.c
index edf14a2aa3c8..743ac4077f35 100644
--- a/drivers/isdn/i4l/isdn_x25iface.c
+++ b/drivers/isdn/i4l/isdn_x25iface.c
@@ -7,7 +7,7 @@
  *
  * stuff needed to support the Linux X.25 PLP code on top of devices that
  * can provide a lab_b service using the concap_proto mechanism.
- * This module supports a network interface wich provides lapb_sematics
+ * This module supports a network interface which provides lapb_sematics
  * -- as defined in Documentation/networking/x25-iface.txt -- to
  * the upper layer and assumes that the lower layer provides a reliable
  * data link service by means of the concap_device_ops callbacks.
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index 928e6cf12dca..6459edc7f5c5 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -40,7 +40,7 @@
  * and so on). So the PSC1 is mapped to /dev/ttyPSC0, PSC2 to /dev/ttyPSC1 and
  * so on. But be warned, it's an ABSOLUTE REQUIREMENT ! This is needed mainly
  * fpr the console code : without this 1:1 mapping, at early boot time, when we
- * are parsing the kernel args console=ttyPSC?, we wouldn't know wich PSC it
+ * are parsing the kernel args console=ttyPSC?, we wouldn't know which PSC it
  * will be mapped to.
  */
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index a8455c9e79dd..495db5755df9 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -102,7 +102,7 @@ static struct usb_driver option_driver = {
 	.no_dynamic_id = 	1,
 };
 
-/* The card has three separate interfaces, wich the serial driver
+/* The card has three separate interfaces, which the serial driver
  * recognizes separately, thus num_port=1.
  */
 static struct usb_serial_driver option_3port_device = {
diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c
index 99921df35474..8c1a8b5135c6 100644
--- a/drivers/video/sstfb.c
+++ b/drivers/video/sstfb.c
@@ -32,7 +32,7 @@
 
 -TODO: at one time or another test that the mode is acceptable by the monitor
 -ASK: Can I choose different ordering for the color bitfields (rgba argb ...)
-      wich one should i use ? is there any preferred one ? It seems ARGB is
+      which one should i use ? is there any preferred one ? It seems ARGB is
       the one ...
 -TODO: in  set_var check the validity of timings (hsync vsync)...
 -TODO: check and recheck the use of sst_wait_idle : we don't flush the fifo via
diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c
index 591809cbbb97..2788b8ca9bb1 100644
--- a/drivers/w1/masters/matrox_w1.c
+++ b/drivers/w1/masters/matrox_w1.c
@@ -98,7 +98,7 @@ static void matrox_w1_write_ddc_bit(void *, u8);
  *
  * Using tristate pins, since i can't find any open-drain pin in whole motherboard.
  * Unfortunately we can't connect to Intel's 82801xx IO controller
- * since we don't know motherboard schema, wich has pretty unused(may be not) GPIO.
+ * since we don't know motherboard schema, which has pretty unused(may be not) GPIO.
  *
  * I've heard that PIIX also has open drain pin.
  *
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 785f6b2d5d10..b7d6b920f65f 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -118,7 +118,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data,
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
  * @ds: Datastrem to read from
- * @buf: Buffer in wich to place long symlink data
+ * @buf: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
  * Returns the number of bytes read
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index bf96a61d4b86..9a3ec20b0be2 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -4,7 +4,7 @@
 #
 # Stage one of module building created the following:
 # a) The individual .o files used for the module
-# b) A <module>.o file wich is the .o files above linked together
+# b) A <module>.o file which is the .o files above linked together
 # c) A <module>.mod file in $(MODVERDIR)/, listing the name of the
 #    the preliminary <module>.o file, plus all .o files
 
-- 
cgit v1.2.3


From 88bcd51262ed45212d1b3a65abbac46eaf36bfeb Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Fri, 24 Mar 2006 18:38:48 +0100
Subject: BUG_ON() Conversion in fs/binfmt_elf_fdpic.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/binfmt_elf_fdpic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b3076e8ee90..a2e48c999c24 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -572,8 +572,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	csp -= sizeof(unsigned long);
 	__put_user(bprm->argc, (unsigned long *) csp);
 
-	if (csp != sp)
-		BUG();
+	BUG_ON(csp != sp);
 
 	/* fill in the argv[] array */
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3


From c5d3237c2424c4a3cf69d33abc1f229943468367 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Fri, 24 Mar 2006 18:42:13 +0100
Subject: BUG_ON() Conversion in fs/coda/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/coda/cache.c | 2 +-
 fs/coda/cnode.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index c607d923350a..5d0527133266 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -51,7 +51,7 @@ void coda_cache_clear_all(struct super_block *sb)
         struct coda_sb_info *sbi;
 
         sbi = coda_sbp(sb);
-        if (!sbi) BUG();
+	BUG_ON(!sbi);
 
 	atomic_inc(&permission_epoch);
 }
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 23aeef5aa814..4c9fecbfa91f 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -120,8 +120,7 @@ void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid,
 	
 	cii = ITOC(inode);
 
-	if (!coda_fideq(&cii->c_fid, oldfid))
-		BUG();
+	BUG_ON(!coda_fideq(&cii->c_fid, oldfid));
 
 	/* replace fid and rehash inode */
 	/* XXX we probably need to hold some lock here! */
-- 
cgit v1.2.3


From a74e1f0e8a7858c9ba6065480c88d7feba3520ac Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 3 Mar 2006 10:18:58 -0800
Subject: ocfs2: use __attribute__ format

Use the "format" attribute on ocfs2_error() and ocfs2_abort() so that the
compiler will warn when we get calls to those functions wrong.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/super.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index c564177dfbdc..783f5270f2a1 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -33,12 +33,16 @@ int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
 
 void __ocfs2_error(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 void __ocfs2_abort(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 #endif /* OCFS2_SUPER_H */
-- 
cgit v1.2.3


From 9c6510a5bfe2f1c5f5b93386c06954be02e974e4 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Thu, 2 Mar 2006 18:09:26 -0800
Subject: [PATCH] ocfs2: fix hang in dlm lock resource mastery

fixes hangs in lock mastery related to refcounting on the mle structure

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 124 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 847dd3cc4cf5..78ac3a00eb54 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -792,7 +792,15 @@ redo_request:
 			mlog_errno(ret);
 		if (mle->master != O2NM_MAX_NODES) {
 			/* found a master ! */
-			break;
+			if (mle->master <= nodenum)
+				break;
+			/* if our master request has not reached the master
+			 * yet, keep going until it does.  this is how the
+			 * master will know that asserts are needed back to
+			 * the lower nodes. */
+			mlog(0, "%s:%.*s: requests only up to %u but master "
+			     "is %u, keep going\n", dlm->name, namelen,
+			     lockid, nodenum, mle->master);
 		}
 	}
 
@@ -860,7 +868,19 @@ recheck:
 	/* check if another node has already become the owner */
 	spin_lock(&res->spinlock);
 	if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
+		mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name,
+		     res->lockname.len, res->lockname.name, res->owner);
 		spin_unlock(&res->spinlock);
+		/* this will cause the master to re-assert across
+		 * the whole cluster, freeing up mles */
+		ret = dlm_do_master_request(mle, res->owner);
+		if (ret < 0) {
+			/* give recovery a chance to run */
+			mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
+			msleep(500);
+			goto recheck;
+		}
+		ret = 0;
 		goto leave;
 	}
 	spin_unlock(&res->spinlock);
@@ -1244,13 +1264,14 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 {
 	u8 response = DLM_MASTER_RESP_MAYBE;
 	struct dlm_ctxt *dlm = data;
-	struct dlm_lock_resource *res;
+	struct dlm_lock_resource *res = NULL;
 	struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
 	struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
 	char *name;
 	unsigned int namelen;
 	int found, ret;
 	int set_maybe;
+	int dispatch_assert = 0;
 
 	if (!dlm_grab(dlm))
 		return DLM_MASTER_RESP_NO;
@@ -1287,7 +1308,6 @@ way_up_top:
 		}
 
 		if (res->owner == dlm->node_num) {
-			u32 flags = DLM_ASSERT_MASTER_MLE_CLEANUP;
 			spin_unlock(&res->spinlock);
 			// mlog(0, "this node is the master\n");
 			response = DLM_MASTER_RESP_YES;
@@ -1300,16 +1320,7 @@ way_up_top:
 			 * caused all nodes up to this one to
 			 * create mles.  this node now needs to
 			 * go back and clean those up. */
-			mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
-			     dlm->node_num, res->lockname.len, res->lockname.name);
-			ret = dlm_dispatch_assert_master(dlm, res, 1,
-							 request->node_idx,
-							 flags);
-			if (ret < 0) {
-				mlog(ML_ERROR, "failed to dispatch assert "
-				     "master work\n");
-				response = DLM_MASTER_RESP_ERROR;
-			}
+			dispatch_assert = 1;
 			goto send_response;
 		} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			spin_unlock(&res->spinlock);
@@ -1357,9 +1368,13 @@ way_up_top:
 			}
 		} else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			set_maybe = 0;
-			if (tmpmle->master == dlm->node_num)
+			if (tmpmle->master == dlm->node_num) {
 				response = DLM_MASTER_RESP_YES;
-			else
+				/* this node will be the owner.
+				 * go back and clean the mles on any
+				 * other nodes */
+				dispatch_assert = 1;
+			} else
 				response = DLM_MASTER_RESP_NO;
 		} else {
 			// mlog(0, "this node is attempting to "
@@ -1398,8 +1413,8 @@ way_up_top:
 			mle = (struct dlm_master_list_entry *)
 				kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL);
 			if (!mle) {
-				// bad bad bad... this sucks.
 				response = DLM_MASTER_RESP_ERROR;
+				mlog_errno(-ENOMEM);
 				goto send_response;
 			}
 			spin_lock(&dlm->spinlock);
@@ -1418,25 +1433,19 @@ way_up_top:
 		// mlog(0, "mle was found\n");
 		set_maybe = 1;
 		spin_lock(&tmpmle->spinlock);
+		if (tmpmle->master == dlm->node_num) {
+			mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
+			BUG();
+		}
 		if (tmpmle->type == DLM_MLE_BLOCK)
 			response = DLM_MASTER_RESP_NO;
 		else if (tmpmle->type == DLM_MLE_MIGRATION) {
 			mlog(0, "migration mle was found (%u->%u)\n",
 			     tmpmle->master, tmpmle->new_master);
-			if (tmpmle->master == dlm->node_num) {
-				mlog(ML_ERROR, "no lockres, but migration mle "
-				     "says that this node is master!\n");
-				BUG();
-			}
 			/* real master can respond on its own */
 			response = DLM_MASTER_RESP_NO;
-		} else {
-			if (tmpmle->master == dlm->node_num) {
-				response = DLM_MASTER_RESP_YES;
-				set_maybe = 0;
-			} else
-				response = DLM_MASTER_RESP_MAYBE;
-		}
+		} else
+			response = DLM_MASTER_RESP_MAYBE;
 		if (set_maybe)
 			set_bit(request->node_idx, tmpmle->maybe_map);
 		spin_unlock(&tmpmle->spinlock);
@@ -1449,6 +1458,24 @@ way_up_top:
 		dlm_put_mle(tmpmle);
 	}
 send_response:
+
+	if (dispatch_assert) {
+		if (response != DLM_MASTER_RESP_YES)
+			mlog(ML_ERROR, "invalid response %d\n", response);
+		if (!res) {
+			mlog(ML_ERROR, "bad lockres while trying to assert!\n");
+			BUG();
+		}
+		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
+			     dlm->node_num, res->lockname.len, res->lockname.name);
+		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 
+						 DLM_ASSERT_MASTER_MLE_CLEANUP);
+		if (ret < 0) {
+			mlog(ML_ERROR, "failed to dispatch assert master work\n");
+			response = DLM_MASTER_RESP_ERROR;
+		}
+	}
+
 	dlm_put(dlm);
 	return response;
 }
@@ -1471,8 +1498,11 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 	int to, tmpret;
 	struct dlm_node_iter iter;
 	int ret = 0;
+	int reassert;
 
 	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+again:
+	reassert = 0;
 
 	/* note that if this nodemap is empty, it returns 0 */
 	dlm_node_iter_init(nodemap, &iter);
@@ -1504,9 +1534,17 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 			     "got %d.\n", namelen, lockname, to, r);
 			dlm_dump_lock_resources(dlm);
 			BUG();
+		} else if (r == EAGAIN) {
+			mlog(0, "%.*s: node %u create mles on other "
+			     "nodes and requests a re-assert\n", 
+			     namelen, lockname, to);
+			reassert = 1;
 		}
 	}
 
+	if (reassert)
+		goto again;
+
 	return ret;
 }
 
@@ -1528,6 +1566,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 	char *name;
 	unsigned int namelen;
 	u32 flags;
+	int master_request = 0;
+	int ret = 0;
 
 	if (!dlm_grab(dlm))
 		return 0;
@@ -1642,11 +1682,22 @@ ok:
 	// mlog(0, "woo!  got an assert_master from node %u!\n",
 	// 	     assert->node_idx);
 	if (mle) {
-		int extra_ref;
+		int extra_ref = 0;
+		int nn = -1;
 		
 		spin_lock(&mle->spinlock);
-		extra_ref = !!(mle->type == DLM_MLE_BLOCK
-			       || mle->type == DLM_MLE_MIGRATION);
+		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
+			extra_ref = 1;
+		else {
+			/* MASTER mle: if any bits set in the response map
+			 * then the calling node needs to re-assert to clear
+			 * up nodes that this node contacted */
+			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 
+						    nn+1)) < O2NM_MAX_NODES) {
+				if (nn != dlm->node_num && nn != assert->node_idx)
+					master_request = 1;
+			}
+		}
 		mle->master = assert->node_idx;
 		atomic_set(&mle->woken, 1);
 		wake_up(&mle->wq);
@@ -1677,10 +1728,15 @@ ok:
 	}
 
 done:
+	ret = 0;
 	if (res)
 		dlm_lockres_put(res);
 	dlm_put(dlm);
-	return 0;
+	if (master_request) {
+		mlog(0, "need to tell master to reassert\n");
+		ret = EAGAIN;  // positive. negative would shoot down the node.
+	}
+	return ret;
 
 kill:
 	/* kill the caller! */
@@ -1713,6 +1769,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 	item->u.am.request_from = request_from;
 	item->u.am.flags = flags;
 
+	if (ignore_higher) 
+		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 
+		     res->lockname.name);
+		
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
-- 
cgit v1.2.3


From c03872f5f50bc10f2a1a485f08879a8d01bcfe49 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Mon, 6 Mar 2006 14:08:49 -0800
Subject: [PATCH] ocfs2: dlm recovery fixes

when starting lock mastery (excepting the recovery lock) wait on any nodes
needing recovery. fix one instance where lock resources were left attached
to the recovery list after recovery completed.  ensure that the node_down
code is run uniformly regardless of which node found the dead node first.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h   |   6 +++
 fs/ocfs2/dlm/dlmlock.c     |  14 +++++-
 fs/ocfs2/dlm/dlmmaster.c   | 103 +++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/dlm/dlmrecovery.c |  38 +++++++++--------
 4 files changed, 142 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9c772583744a..a8aec9341347 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -658,6 +658,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
 
@@ -762,6 +763,11 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master);
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master);
+
 
 int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       struct dlm_lock_resource *res,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 671d4ff222cc..6fea28318d6d 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
 					  res->lockname.len)) {
 			kick_thread = 1;
 			call_ast = 1;
+		} else {
+			mlog(0, "%s: returning DLM_NORMAL to "
+			     "node %u for reco lock\n", dlm->name,
+			     lock->ml.node);
 		}
 	} else {
 		/* for NOQUEUE request, unless we get the
 		 * lock right away, return DLM_NOTQUEUED */
-		if (flags & LKM_NOQUEUE)
+		if (flags & LKM_NOQUEUE) {
 			status = DLM_NOTQUEUED;
-		else {
+			if (dlm_is_recovery_lock(res->lockname.name,
+						 res->lockname.len)) {
+				mlog(0, "%s: returning NOTQUEUED to "
+				     "node %u for reco lock\n", dlm->name,
+				     lock->ml.node);
+			}
+		} else {
 			dlm_lock_get(lock);
 			list_add_tail(&lock->list, &res->blocked);
 			kick_thread = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 78ac3a00eb54..940be4c13b1f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
 				       struct dlm_lock_resource *res,
 				       u8 target);
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res);
 
 
 int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	struct dlm_node_iter iter;
 	unsigned int namelen;
 	int tries = 0;
+	int bit, wait_on_recovery = 0;
 
 	BUG_ON(!lockid);
 
@@ -762,6 +765,18 @@ lookup:
 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
 		set_bit(dlm->node_num, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
+
+		/* still holding the dlm spinlock, check the recovery map
+		 * to see if there are any nodes that still need to be 
+		 * considered.  these will not appear in the mle nodemap
+		 * but they might own this lockres.  wait on them. */
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		}
 	}
 
 	/* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
+	while (wait_on_recovery) {
+		/* any cluster changes that occurred after dropping the
+		 * dlm spinlock would be detectable be a change on the mle,
+		 * so we only need to clear out the recovery map once. */
+		if (dlm_is_recovery_lock(lockid, namelen)) {
+			mlog(ML_NOTICE, "%s: recovery map is not empty, but "
+			     "must master $RECOVERY lock now\n", dlm->name);
+			if (!dlm_pre_master_reco_lockres(dlm, res))
+				wait_on_recovery = 0;
+			else {
+				mlog(0, "%s: waiting 500ms for heartbeat state "
+				    "change\n", dlm->name);
+				msleep(500);
+			}
+			continue;
+		} 
+
+		dlm_kick_recovery_thread(dlm);
+		msleep(100);
+		dlm_wait_for_recovery(dlm);
+
+		spin_lock(&dlm->spinlock);
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		} else
+			wait_on_recovery = 0;
+		spin_unlock(&dlm->spinlock);
+	}
+
 	/* must wait for lock to be mastered elsewhere */
 	if (blocked)
 		goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
 	mlog(0, "finished with dlm_assert_master_worker\n");
 }
 
+/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
+ * We cannot wait for node recovery to complete to begin mastering this
+ * lockres because this lockres is used to kick off recovery! ;-)
+ * So, do a pre-check on all living nodes to see if any of those nodes
+ * think that $RECOVERY is currently mastered by a dead node.  If so,
+ * we wait a short time to allow that node to get notified by its own
+ * heartbeat stack, then check again.  All $RECOVERY lock resources
+ * mastered by dead nodes are purged when the hearbeat callback is 
+ * fired, so we can know for sure that it is safe to continue once
+ * the node returns a live node or no node.  */
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res)
+{
+	struct dlm_node_iter iter;
+	int nodenum;
+	int ret = 0;
+	u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
+
+	spin_lock(&dlm->spinlock);
+	dlm_node_iter_init(dlm->domain_map, &iter);
+	spin_unlock(&dlm->spinlock);
+
+	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+		/* do not send to self */
+		if (nodenum == dlm->node_num)
+			continue;
+		ret = dlm_do_master_requery(dlm, res, nodenum, &master);
+		if (ret < 0) {
+			mlog_errno(ret);
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
+		}
+
+		if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
+			/* check to see if this master is in the recovery map */
+			spin_lock(&dlm->spinlock);
+			if (test_bit(master, dlm->recovery_map)) {
+				mlog(ML_NOTICE, "%s: node %u has not seen "
+				     "node %u go down yet, and thinks the "
+				     "dead node is mastering the recovery "
+				     "lock.  must wait.\n", dlm->name,
+				     nodenum, master);
+				ret = -EAGAIN;
+			}
+			spin_unlock(&dlm->spinlock);
+			mlog(0, "%s: reco lock master is %u\n", dlm->name, 
+			     master);
+			break;
+		}
+	}
+	return ret;
+}
+
 
 /*
  * DLM_MIGRATE_LOCKRES
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f7..36610bdf1231 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_recovery_thread(void *data);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 static int dlm_do_recovery(struct dlm_ctxt *dlm);
 
 static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
 				    u8 send_to,
 				    struct dlm_lock_resource *res,
 				    int total_locks);
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master);
 static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 				     struct dlm_lock_resource *res,
 				     struct dlm_migratable_lockres *mres);
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master);
 static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
 static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
 				 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
  * RECOVERY THREAD
  */
 
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
 {
 	/* wake the recovery thread
 	 * this will wake the reco thread in one of three places
@@ -1316,9 +1310,8 @@ leave:
 
 
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master)
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master)
 {
 	struct dlm_node_iter iter;
 	int nodenum;
@@ -1360,8 +1353,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
 		if (ret < 0) {
 			mlog_errno(ret);
-			BUG();
-			/* TODO: need to figure a way to restart this */
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
 		}
 		if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1367,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 }
 
 
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master)
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master)
 {
 	int ret = -EINVAL;
 	struct dlm_master_requery req;
@@ -1739,6 +1733,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 				} else
 					continue;
 
+				if (!list_empty(&res->recovering)) {
+					mlog(0, "%s:%.*s: lockres was "
+					     "marked RECOVERING, owner=%u\n",
+					     dlm->name, res->lockname.len,
+					     res->lockname.name, res->owner);
+					list_del_init(&res->recovering);
+				}
 				spin_lock(&res->spinlock);
 				dlm_change_lockres_owner(dlm, res, new_master);
 				res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2259,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 			mlog(0, "%u not in domain/live_nodes map "
 			     "so setting it in reco map manually\n",
 			     br->dead_node);
-		set_bit(br->dead_node, dlm->recovery_map);
+		/* force the recovery cleanup in __dlm_hb_node_down
+		 * both of these will be cleared in a moment */
+		set_bit(br->dead_node, dlm->domain_map);
+		set_bit(br->dead_node, dlm->live_nodes_map);
 		__dlm_hb_node_down(dlm, br->dead_node);
 	}
 	spin_unlock(&dlm->spinlock);
-- 
cgit v1.2.3


From 70bacbdbfa6f63f8cd10432891f9ecee62397ff2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 2 Mar 2006 11:10:05 -0800
Subject: ocfs2: don't use MLF* in cluster/ files

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d08971d29b63..bff0f0d06867 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -449,11 +449,11 @@ static u32 o2hb_compute_block_crc_le(struct o2hb_region *reg,
 
 static void o2hb_dump_slot(struct o2hb_disk_heartbeat_block *hb_block)
 {
-	mlog(ML_ERROR, "Dump slot information: seq = 0x%"MLFx64", node = %u, "
-	     "cksum = 0x%x, generation 0x%"MLFx64"\n",
-	     le64_to_cpu(hb_block->hb_seq), hb_block->hb_node,
-	     le32_to_cpu(hb_block->hb_cksum),
-	     le64_to_cpu(hb_block->hb_generation));
+	mlog(ML_ERROR, "Dump slot information: seq = 0x%llx, node = %u, "
+	     "cksum = 0x%x, generation 0x%llx\n",
+	     (long long)le64_to_cpu(hb_block->hb_seq),
+	     hb_block->hb_node, le32_to_cpu(hb_block->hb_cksum),
+	     (long long)le64_to_cpu(hb_block->hb_generation));
 }
 
 static int o2hb_verify_crc(struct o2hb_region *reg,
@@ -516,8 +516,9 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
 	hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
 								   hb_block));
 
-	mlog(ML_HB_BIO, "our node generation = 0x%"MLFx64", cksum = 0x%x\n",
-	     cpu_to_le64(generation), le32_to_cpu(hb_block->hb_cksum));
+	mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n",
+	     (long long)cpu_to_le64(generation),
+	     le32_to_cpu(hb_block->hb_cksum));
 }
 
 static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
@@ -686,19 +687,20 @@ static int o2hb_check_slot(struct o2hb_region *reg,
 	if (slot->ds_last_generation != le64_to_cpu(hb_block->hb_generation)) {
 		gen_changed = 1;
 		slot->ds_equal_samples = 0;
-		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%"MLFx64" "
-		     "to 0x%"MLFx64")\n", slot->ds_node_num,
-		     slot->ds_last_generation,
-		     le64_to_cpu(hb_block->hb_generation));
+		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%llx "
+		     "to 0x%llx)\n", slot->ds_node_num,
+		     (long long)slot->ds_last_generation,
+		     (long long)le64_to_cpu(hb_block->hb_generation));
 	}
 
 	slot->ds_last_generation = le64_to_cpu(hb_block->hb_generation);
 
-	mlog(ML_HEARTBEAT, "Slot %d gen 0x%"MLFx64" cksum 0x%x "
-	     "seq %"MLFu64" last %"MLFu64" changed %u equal %u\n",
-	     slot->ds_node_num, slot->ds_last_generation,
-	     le32_to_cpu(hb_block->hb_cksum), le64_to_cpu(hb_block->hb_seq), 
-	     slot->ds_last_time, slot->ds_changed_samples,
+	mlog(ML_HEARTBEAT, "Slot %d gen 0x%llx cksum 0x%x "
+	     "seq %llu last %llu changed %u equal %u\n",
+	     slot->ds_node_num, (long long)slot->ds_last_generation,
+	     le32_to_cpu(hb_block->hb_cksum),
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq), 
+	     (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
 	     slot->ds_equal_samples);
 
 	spin_lock(&o2hb_live_lock);
@@ -708,8 +710,8 @@ fire_callbacks:
 	 * changes at any time during their dead time */
 	if (list_empty(&slot->ds_live_item) &&
 	    slot->ds_changed_samples >= O2HB_LIVE_THRESHOLD) {
-		mlog(ML_HEARTBEAT, "Node %d (id 0x%"MLFx64") joined my "
-		     "region\n", slot->ds_node_num, slot->ds_last_generation);
+		mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
+		     slot->ds_node_num, (long long)slot->ds_last_generation);
 
 		/* first on the list generates a callback */
 		if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
-- 
cgit v1.2.3


From 29004858a76ba9e26393dd8a85e653f105a33753 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Thu, 2 Mar 2006 16:43:36 -0800
Subject: ocfs2: don't use MLF* in dlm/ files

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c      | 22 +++++++++++++++-------
 fs/ocfs2/dlm/dlmcommon.h   | 15 +++++++++++++++
 fs/ocfs2/dlm/dlmconvert.c  | 11 +++++++----
 fs/ocfs2/dlm/dlmdebug.c    | 18 ++++++++++++------
 fs/ocfs2/dlm/dlmrecovery.c | 12 ++++++++----
 fs/ocfs2/dlm/dlmunlock.c   | 11 +++++++----
 6 files changed, 64 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 8d17d28ef91c..355593dd8ef8 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -307,8 +307,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	if (past->type != DLM_AST &&
 	    past->type != DLM_BAST) {
-		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", "
-		     "name=%.*s\n", past->type, cookie, locklen, name);
+		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
+		     "name=%.*s\n", past->type, 
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -316,9 +319,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 	res = dlm_lookup_lockres(dlm, name, locklen);
 	if (!res) {
 		mlog(ML_ERROR, "got %sast for unknown lockres! "
-			       "cookie=%"MLFu64", name=%.*s, namelen=%u\n",
+			       "cookie=%u:%llu, name=%.*s, namelen=%u\n",
 		     past->type == DLM_AST ? "" : "b",
-		     cookie, locklen, name, locklen);
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name, locklen);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -360,9 +365,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%"MLFu64", "
-		       "name=%.*s, namelen=%u\n",
-             past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen);
+	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
+		       "name=%.*s, namelen=%u\n", 
+		       past->type == DLM_AST ? "" : "b", 
+		       dlm_get_lock_cookie_node(cookie),
+		       dlm_get_lock_cookie_seq(cookie),
+		       locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a8aec9341347..88cc43df18f1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -630,6 +630,21 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res)
 	return status;
 }
 
+static inline u8 dlm_get_lock_cookie_node(u64 cookie)
+{
+	u8 ret;
+	cookie >>= 56;
+	ret = (u8)(cookie & 0xffULL);
+	return ret;
+}
+
+static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie)
+{
+	unsigned long long ret;
+	ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL;
+	return ret;
+}
+
 struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 			       struct dlm_lockstatus *lksb);
 void dlm_lock_get(struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f66e2d818ccd..8285228d9e37 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -284,8 +284,10 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 	if (lock->ml.convert_type != LKM_IVMODE) {
 		__dlm_print_one_lock_resource(res);
 		mlog(ML_ERROR, "converting a remote lock that is already "
-		     "converting! (cookie=%"MLFu64", conv=%d)\n",
-		     lock->ml.cookie, lock->ml.convert_type);
+		     "converting! (cookie=%u:%llu, conv=%d)\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     lock->ml.convert_type);
 		status = DLM_DENIED;
 		goto bail;
 	}
@@ -513,8 +515,9 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 leave:
 	if (!lock)
 		mlog(ML_ERROR, "did not find lock to convert on grant queue! "
-			       "cookie=%"MLFu64"\n",
-		     cnv->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(cnv->cookie),
+			       dlm_get_lock_cookie_seq(cnv->cookie));
 	else
 		dlm_lock_put(lock);
 
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 54f61b76ab51..c7eae5d3324e 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -72,8 +72,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -85,8 +87,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -98,8 +102,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 36610bdf1231..805cbabac051 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -744,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 		     dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
 		     dead_node, reco_master);
 		mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
-		     "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
+		     "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
 		     dlm->name, mres->lockname_len, mres->lockname, mres->master,
 		     mres->num_locks, mres->total_locks, mres->flags,
-		     mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags,
+		     dlm_get_lock_cookie_node(mres->ml[0].cookie),
+		     dlm_get_lock_cookie_seq(mres->ml[0].cookie),
+		     mres->ml[0].list, mres->ml[0].flags,
 		     mres->ml[0].type, mres->ml[0].convert_type,
 		     mres->ml[0].highest_blocked, mres->ml[0].node);
 		BUG();
@@ -1513,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 			/* lock is always created locally first, and
 			 * destroyed locally last.  it must be on the list */
 			if (!lock) {
+				u64 c = ml->cookie;
 				mlog(ML_ERROR, "could not find local lock "
-					       "with cookie %"MLFu64"!\n",
-				     ml->cookie);
+					       "with cookie %u:%llu!\n",
+					       dlm_get_lock_cookie_node(c),
+					       dlm_get_lock_cookie_seq(c));
 				BUG();
 			}
 			BUG_ON(lock->ml.node != ml->node);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index c95f08d2e925..7b1a27542674 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -244,8 +244,10 @@ leave:
 	if (actions & DLM_UNLOCK_FREE_LOCK) {
 		/* this should always be coupled with list removal */
 		BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
-		mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n",
-		     lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1);
+		mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     atomic_read(&lock->lock_refs.refcount)-1);
 		dlm_lock_put(lock);
 	}
 	if (actions & DLM_UNLOCK_CALL_AST)
@@ -493,8 +495,9 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 not_found:
 	if (!found)
 		mlog(ML_ERROR, "failed to find lock to unlock! "
-			       "cookie=%"MLFu64"\n",
-		     unlock->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(unlock->cookie),
+			       dlm_get_lock_cookie_seq(unlock->cookie));
 	else {
 		/* send the lksb->status back to the other node */
 		status = lksb->status;
-- 
cgit v1.2.3


From b0697053f9e8de9cea3d510d9e290851ece9460b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 3 Mar 2006 10:24:33 -0800
Subject: ocfs2: don't use MLF* in the file system

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c          |  88 ++++++++++++++++++-----------------
 fs/ocfs2/aops.c           |  18 +++----
 fs/ocfs2/buffer_head_io.c |  11 +++--
 fs/ocfs2/dcache.c         |   9 ++--
 fs/ocfs2/dir.c            |  42 ++++++++---------
 fs/ocfs2/dlmglue.c        | 101 ++++++++++++++++++++--------------------
 fs/ocfs2/export.c         |  24 +++++-----
 fs/ocfs2/extent_map.c     |  34 +++++++-------
 fs/ocfs2/file.c           |  42 +++++++++--------
 fs/ocfs2/inode.c          | 116 ++++++++++++++++++++++++----------------------
 fs/ocfs2/journal.c        |  27 +++++------
 fs/ocfs2/localalloc.c     |  17 +++----
 fs/ocfs2/namei.c          |  79 ++++++++++++++++---------------
 fs/ocfs2/ocfs2.h          |  12 ++---
 fs/ocfs2/suballoc.c       |  72 +++++++++++++++-------------
 fs/ocfs2/super.c          |  14 +++---
 fs/ocfs2/uptodate.c       |  40 +++++++++-------
 fs/ocfs2/vote.c           |  63 +++++++++++++------------
 18 files changed, 425 insertions(+), 384 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 6b9812db3779..edaab05a93e0 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -566,9 +566,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 			next_free = le16_to_cpu(el->l_next_free_rec);
 			if (next_free == 0) {
 				ocfs2_error(inode->i_sb,
-					    "Dinode %"MLFu64" has a bad "
-					    "extent list",
-					    OCFS2_I(inode)->ip_blkno);
+					    "Dinode %llu has a bad extent list",
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 				status = -EIO;
 				goto bail;
 			}
@@ -611,9 +610,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		next_free = le16_to_cpu(el->l_next_free_rec);
 		if (next_free == 0) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" has a bad "
-				    "extent list",
-				    OCFS2_I(inode)->ip_blkno);
+				    "Dinode %llu has a bad extent list",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
@@ -652,8 +650,9 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		/* having an empty extent at eof is legal. */
 		if (el->l_recs[i].e_cpos != fe->i_clusters) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" trailing extent is bad: "
+				    "Dinode %llu trailing extent is bad: "
 				    "cpos (%u) != number of clusters (%u)",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    le32_to_cpu(el->l_recs[i].e_cpos),
 				    le32_to_cpu(fe->i_clusters));
 			status = -EIO;
@@ -747,19 +746,19 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 
 	while(le16_to_cpu(el->l_tree_depth) > 1) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty "
+			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
 				    "extent list (next_free_rec == 0)",
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (!blkno) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent "
+			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
 				    "list where extent # %d has no physical "
 				    "block start",
-				    OCFS2_I(inode)->ip_blkno, i);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
 			status = -EIO;
 			goto bail;
 		}
@@ -826,9 +825,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "add %u clusters starting at block %"MLFu64" to "
-		"inode %"MLFu64"\n",
-	     new_clusters, start_blk, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
+	     new_clusters, (unsigned long long)start_blk,
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 	el = &fe->id2.i_list;
@@ -963,8 +962,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	struct ocfs2_dinode *di;
 	struct ocfs2_truncate_log *tl;
 
-	mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
-		   num_clusters);
+	mlog_entry("start_blk = %llu, num_clusters = %u\n",
+		   (unsigned long long)start_blk, num_clusters);
 
 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
@@ -981,8 +980,9 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	tl_count = le16_to_cpu(tl->tl_count);
 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
 			tl_count == 0,
-			"Truncate record count on #%"MLFu64" invalid ("
-			"wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
+			"Truncate record count on #%llu invalid "
+			"wanted %u, actual %u\n",
+			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
 			ocfs2_truncate_recs_per_inode(osb->sb),
 			le16_to_cpu(tl->tl_count));
 
@@ -1002,8 +1002,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	}
 
 	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
-	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
-	     OCFS2_I(tl_inode)->ip_blkno, index);
+	     "%llu (index = %d)\n", num_clusters, start_cluster,
+	     (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
 
 	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
 		/*
@@ -1134,8 +1134,8 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	}
 
 	num_to_flush = le16_to_cpu(tl->tl_used);
-	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
-	     num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
+	mlog(0, "Flush %u records from truncate log #%llu\n",
+	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
 	if (!num_to_flush) {
 		status = 0;
 		goto bail;
@@ -1360,8 +1360,8 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 
 	tl = &tl_copy->id2.i_dealloc;
 	num_recs = le16_to_cpu(tl->tl_used);
-	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
-	     tl_copy->i_blkno);
+	mlog(0, "cleanup %u records from %llu\n", num_recs,
+	     (unsigned long long)tl_copy->i_blkno);
 
 	mutex_lock(&tl_inode->i_mutex);
 	for(i = 0; i < num_recs; i++) {
@@ -1529,7 +1529,8 @@ static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
 
 	*new_last_eb = bh;
 	get_bh(*new_last_eb);
-	mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno));
+	mlog(0, "returning block %llu\n",
+	     (unsigned long long)le64_to_cpu(eb->h_blkno));
 bail:
 	if (bh)
 		brelse(bh);
@@ -1646,8 +1647,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 	/* if our tree depth > 0, update all the tree blocks below us. */
 	while (depth) {
-		mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n",
-		     depth,  next_eb);
+		mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
+		     depth,  (unsigned long long)next_eb);
 		status = ocfs2_read_block(osb, next_eb, &eb_bh,
 					  OCFS2_BH_CACHED, inode);
 		if (status < 0) {
@@ -1674,12 +1675,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 
-		mlog(0, "extent block %"MLFu64", before: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, before: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
@@ -1697,12 +1698,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			BUG_ON(!el->l_next_free_rec);
 			le16_add_cpu(&el->l_next_free_rec, -1);
 		}
-		mlog(0, "extent block %"MLFu64", after: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, after: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		status = ocfs2_journal_dirty(handle, eb_bh);
@@ -1792,10 +1793,10 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	last_eb = le64_to_cpu(fe->i_last_eb_blk);
 start:
 	mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
-	     "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", "
+	     "last_eb = %llu, fe->i_last_eb_blk = %llu, "
 	     "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
-	     le32_to_cpu(fe->i_clusters), last_eb,
-	     le64_to_cpu(fe->i_last_eb_blk),
+	     le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
+	     (unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
 	     le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
 
 	if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
@@ -1934,16 +1935,17 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
-	     "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size);
+	     "%llu\n", fe->i_clusters, new_i_clusters,
+	     (unsigned long long)fe->i_size);
 
 	if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
-		ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count "
-			    "%u and size %"MLFu64" whereas struct inode has "
+		ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
+			    "%u and size %llu whereas struct inode has "
 			    "cluster count %u and size %llu which caused an "
 			    "invalid truncate to %u clusters.",
-			    le64_to_cpu(fe->i_blkno),
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->i_clusters),
-			    le64_to_cpu(fe->i_size),
+			    (unsigned long long)le64_to_cpu(fe->i_size),
 			    OCFS2_I(inode)->ip_clusters, i_size_read(inode),
 			    new_i_clusters);
 		mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8f4467a930a5..bf931ba1d364 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -74,8 +74,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 	fe = (struct ocfs2_dinode *) bh->b_data;
 
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		goto bail;
 	}
 
@@ -162,8 +162,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 					  NULL);
 	if (err) {
 		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
-		     "%"MLFu64", NULL)\n", err, inode,
-		     (unsigned long long)iblock, p_blkno);
+		     "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
+		     (unsigned long long)p_blkno);
 		goto bail;
 	}
 
@@ -171,13 +171,15 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
 	if (bh_result->b_blocknr == 0) {
 		err = -EIO;
-		mlog(ML_ERROR, "iblock = %llu p_blkno = %"MLFu64" "
-		     "blkno=(%"MLFu64")\n", (unsigned long long)iblock,
-		     p_blkno, OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n",
+		     (unsigned long long)iblock,
+		     (unsigned long long)p_blkno,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 
 	past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-	mlog(0, "Inode %lu, past_eof = %"MLFu64"\n", inode->i_ino, past_eof);
+	mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+	     (unsigned long long)past_eof);
 
 	if (create && (iblock >= past_eof))
 		set_buffer_new(bh_result);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index bae3d7548bea..9a24adf9be6e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -97,8 +97,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
 
-	mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n",
-		   block, nr, flags, inode);
+	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
+		   (unsigned long long)block, nr, flags, inode);
 
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
@@ -143,9 +143,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
-			     "bh (%llu), inode %"MLFu64" not uptodate\n",
+			     "bh (%llu), inode %llu not uptodate\n",
 			     (unsigned long long)bh->b_blocknr,
-			     OCFS2_I(inode)->ip_blkno);
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			ignore_cache = 1;
 		}
 
@@ -222,7 +222,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	     (unsigned long long)block, nr,
 	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
 
 bail:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index bd85182e97bc..1a01380e3878 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -64,15 +64,16 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 		/* did we or someone else delete this inode? */
 		if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
 			spin_unlock(&OCFS2_I(inode)->ip_lock);
-			mlog(0, "inode (%"MLFu64") deleted, returning false\n",
-			     OCFS2_I(inode)->ip_blkno);
+			mlog(0, "inode (%llu) deleted, returning false\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			goto bail;
 		}
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 		if (!inode->i_nlink) {
-			mlog(0, "Inode %"MLFu64" orphaned, returning false "
-			     "dir = %d\n", OCFS2_I(inode)->ip_blkno,
+			mlog(0, "Inode %llu orphaned, returning false "
+			     "dir = %d\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     S_ISDIR(inode->i_mode));
 			goto bail;
 		}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 57158fa75d91..ae47f450792f 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -83,7 +83,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct super_block * sb = inode->i_sb;
 	int have_disk_lock = 0;
 
-	mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("dirino=%llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	stored = 0;
 	bh = NULL;
@@ -104,9 +105,9 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		blk = (filp->f_pos) >> sb->s_blocksize_bits;
 		bh = ocfs2_bread(inode, blk, &err, 0);
 		if (!bh) {
-			mlog(ML_ERROR, "directory #%"MLFu64" contains a hole "
-				       "at offset %lld\n",
-			     OCFS2_I(inode)->ip_blkno,
+			mlog(ML_ERROR,
+			     "directory #%llu contains a hole at offset %lld\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     filp->f_pos);
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
@@ -214,9 +215,9 @@ int ocfs2_find_files_on_disk(const char *name,
 	int status = -ENOENT;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	mlog_entry("(osb=%p, parent=%"MLFu64", name='%.*s', blkno=%p, "
-		   "inode=%p)\n",
-		   osb, OCFS2_I(inode)->ip_blkno, namelen, name, blkno, inode);
+	mlog_entry("(osb=%p, parent=%llu, name='%.*s', blkno=%p, inode=%p)\n",
+		   osb, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   namelen, name, blkno, inode);
 
 	*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
 	if (!*dirent_bh || !*dirent) {
@@ -255,8 +256,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
 	struct buffer_head *dirent_bh = NULL;
 	struct ocfs2_dir_entry *dirent = NULL;
 
-	mlog_entry("dir %"MLFu64", name '%.*s'\n", OCFS2_I(dir)->ip_blkno,
-		   namelen, name);
+	mlog_entry("dir %llu, name '%.*s'\n",
+		   (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
 
 	ret = -EEXIST;
 	dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
@@ -287,9 +288,8 @@ int ocfs2_empty_dir(struct inode *inode)
 	if ((i_size_read(inode) <
 	     (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
 	    !(bh = ocfs2_bread(inode, 0, &err, 0))) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no data block\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		return 1;
 	}
 
@@ -300,9 +300,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			!le64_to_cpu(de1->inode) ||
 			strcmp(".", de->name) ||
 			strcmp("..", de1->name)) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no `.' or `..'\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		brelse(bh);
 		return 1;
 	}
@@ -314,9 +313,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			bh = ocfs2_bread(inode,
 					 offset >> sb->s_blocksize_bits, &err, 0);
 			if (!bh) {
-				mlog(ML_ERROR, "directory #%"MLFu64" contains "
-					       "a hole at offset %lu\n",
-				     OCFS2_I(inode)->ip_blkno, offset);
+				mlog(ML_ERROR, "dir %llu has a hole at %lu\n",
+				     (unsigned long long)OCFS2_I(inode)->ip_blkno, offset);
 				offset += sb->s_blocksize;
 				continue;
 			}
@@ -406,8 +404,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	mlog_entry_void();
 
 	dir_i_size = i_size_read(dir);
-	mlog(0, "extending dir %"MLFu64" (i_size = %lld)\n",
-	     OCFS2_I(dir)->ip_blkno, dir_i_size);
+	mlog(0, "extending dir %llu (i_size = %lld)\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
 
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
@@ -531,8 +529,8 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "getting ready to insert namelen %d into dir %"MLFu64"\n",
-	     namelen, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "getting ready to insert namelen %d into dir %llu\n",
+	     namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	BUG_ON(!S_ISDIR(dir->i_mode));
 	fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e971ec2f8407..84f153aca692 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -231,9 +231,9 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 
 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
 
-	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016"MLFx64"%08x",
-		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, blkno,
-		       generation);
+	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
+		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
+		       (long long)blkno, generation);
 
 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
 
@@ -533,8 +533,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 
 	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "AST fired for inode %"MLFu64", l_action = %u, type = %s\n",
-	     OCFS2_I(inode)->ip_blkno, lockres->l_action,
+	mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
 	     ocfs2_lock_type_string(lockres->l_type));
 
 	BUG_ON(!ocfs2_is_inode_lock(lockres));
@@ -544,8 +544,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 	lksb = &(lockres->l_lksb);
 	if (lksb->status != DLM_NORMAL) {
 		mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
-		     "on inode %"MLFu64"\n", lksb->status,
-		     OCFS2_I(inode)->ip_blkno);
+		     "on inode %llu\n", lksb->status,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		mlog_exit_void();
 		return;
@@ -646,10 +646,9 @@ static void ocfs2_inode_bast_func(void *opaque, int level)
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "BAST fired for inode %"MLFu64", blocking = %d, level = %d "
-	     "type = %s\n", OCFS2_I(inode)->ip_blkno, level,
-	     lockres->l_level,
-	     ocfs2_lock_type_string(lockres->l_type));
+	mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
+	     lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
 
 	ocfs2_generic_bast_func(osb, lockres, level);
 
@@ -1104,7 +1103,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	/* NOTE: That we don't increment any of the holder counts, nor
 	 * do we add anything to a journal handle. Since this is
@@ -1149,8 +1148,8 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
@@ -1173,8 +1172,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
@@ -1193,8 +1192,8 @@ int ocfs2_data_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	/* We'll allow faking a readonly data lock for
@@ -1278,8 +1277,8 @@ void ocfs2_data_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -1462,9 +1461,9 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 
 	spin_lock(&oi->ip_lock);
 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
-		mlog(0, "Orphaned inode %"MLFu64" was deleted while we "
+		mlog(0, "Orphaned inode %llu was deleted while we "
 		     "were waiting on a lock. ip_flags = 0x%x\n",
-		     oi->ip_blkno, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
 		spin_unlock(&oi->ip_lock);
 		status = -ENOENT;
 		goto bail;
@@ -1485,8 +1484,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	ocfs2_extent_map_trunc(inode, 0);
 
 	if (ocfs2_meta_lvb_is_trustable(lockres)) {
-		mlog(0, "Trusting LVB on inode %"MLFu64"\n",
-		     oi->ip_blkno);
+		mlog(0, "Trusting LVB on inode %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
 	} else {
 		/* Boo, we have to go to disk. */
@@ -1514,15 +1513,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 		}
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
-				"Invalid dinode %"MLFu64" disk generation: %u "
+				"Invalid dinode %llu disk generation: %u "
 				"inode->i_generation: %u\n",
-				oi->ip_blkno, le32_to_cpu(fe->i_generation),
+				(unsigned long long)oi->ip_blkno,
+				le32_to_cpu(fe->i_generation),
 				inode->i_generation);
 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
-				"Stale dinode %"MLFu64" dtime: %"MLFu64" "
-				"flags: 0x%x\n", oi->ip_blkno,
-				le64_to_cpu(fe->i_dtime),
+				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
+				(unsigned long long)oi->ip_blkno,
+				(unsigned long long)le64_to_cpu(fe->i_dtime),
 				le32_to_cpu(fe->i_flags));
 
 		ocfs2_refresh_inode(inode, fe);
@@ -1581,8 +1581,8 @@ int ocfs2_meta_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64", take %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu, take %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	status = 0;
@@ -1716,8 +1716,8 @@ void ocfs2_meta_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -2686,8 +2686,8 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 	mapping = inode->i_mapping;
 
 	if (filemap_fdatawrite(mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for downconvert!",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 	sync_mapping_buffers(mapping);
 	if (blocking == LKM_EXMODE) {
@@ -2717,7 +2717,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
@@ -2726,8 +2727,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2767,14 +2768,15 @@ int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 
        	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_do_unblock_meta(inode, requeue);
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2893,12 +2895,13 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     lockres->l_name, function, line);
 	mlog(level, "version: %u, clusters: %u\n",
 	     be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters));
-	mlog(level, "size: %"MLFu64", uid %u, gid %u, mode 0x%x\n",
-	     be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid),
-	     be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode));
-	mlog(level, "nlink %u, atime_packed 0x%"MLFx64", "
-	     "ctime_packed 0x%"MLFx64", mtime_packed 0x%"MLFx64"\n",
-	     be16_to_cpu(lvb->lvb_inlink), be64_to_cpu(lvb->lvb_iatime_packed),
-	     be64_to_cpu(lvb->lvb_ictime_packed),
-	     be64_to_cpu(lvb->lvb_imtime_packed));
+	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
+	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
+	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
+	     be16_to_cpu(lvb->lvb_imode));
+	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
+	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
 }
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5810160d92a8..ec55ab3c1214 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -95,8 +95,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	mlog_entry("(0x%p, '%.*s')\n", child,
 		   child->d_name.len, child->d_name.name);
 
-	mlog(0, "find parent of directory %"MLFu64"\n",
-	     OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find parent of directory %llu\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -115,7 +115,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		parent = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -160,8 +161,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 	blkno = OCFS2_I(inode)->ip_blkno;
 	generation = inode->i_generation;
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     blkno, generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)blkno, generation);
 
 	len = 3;
 	fh[0] = cpu_to_le32((u32)(blkno >> 32));
@@ -186,8 +187,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 		len = 6;
 		type = 2;
 
-		mlog(0, "Encoding parent: blkno: %"MLFu64", generation: %u\n",
-		     blkno, generation);
+		mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)blkno, generation);
 	}
 	
 	*max_len = len;
@@ -220,16 +221,17 @@ static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh,
 		parent.ih_blkno |= (u64)le32_to_cpu(fh[4]);
 		parent.ih_generation = le32_to_cpu(fh[5]);
 
-		mlog(0, "Decoding parent: blkno: %"MLFu64", generation: %u\n",
-		     parent.ih_blkno, parent.ih_generation);
+		mlog(0, "Decoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)parent.ih_blkno,
+		     parent.ih_generation);
 	}
 
 	handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32;
 	handle.ih_blkno |= (u64)le32_to_cpu(fh[1]);
 	handle.ih_generation = le32_to_cpu(fh[2]);
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     handle.ih_blkno, handle.ih_generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)handle.ih_blkno, handle.ih_generation);
 
 	ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent,
 						    acceptable, context);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e6f207eebab4..4601fc256f11 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -182,10 +182,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (rec_end > OCFS2_I(inode)->ip_clusters) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+					    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 					    i,
-					    le64_to_cpu(rec->e_blkno),
-					    OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno),
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 					    OCFS2_I(inode)->ip_clusters);
 				goto out_free;
 			}
@@ -233,11 +233,11 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (blkno) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n",
+					    "Multiple extents for (cpos = %u, clusters = %u) on inode %llu; e_blkno %llu and rec %d at e_blkno %llu\n",
 					    cpos, clusters,
-					    OCFS2_I(inode)->ip_blkno,
-					    blkno, i,
-					    le64_to_cpu(rec->e_blkno));
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)blkno, i,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno));
 				goto out_free;
 			}
 
@@ -251,9 +251,9 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 		ret = -EBADR;
 		if (!blkno) {
 			ocfs2_error(inode->i_sb,
-				    "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n",
+				    "No record found for (cpos = %u, clusters = %u) on inode %llu\n",
 				    cpos, clusters,
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			mlog_errno(ret);
 			goto out_free;
 		}
@@ -288,10 +288,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+				    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 				    i,
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    OCFS2_I(inode)->ip_clusters);
 			return ret;
 		}
@@ -557,9 +557,9 @@ static int ocfs2_extent_map_insert(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Zero e_clusters on non-tail extent record at e_blkno %"MLFu64" on inode %"MLFu64"\n",
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno);
+				    "Zero e_clusters on non-tail extent record at e_blkno %llu on inode %llu\n",
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			return ret;
 		}
 
@@ -660,10 +660,10 @@ int ocfs2_extent_map_append(struct inode *inode,
 	mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
 			 le32_to_cpu(rec->e_clusters)) !=
 			(em->em_clusters + new_clusters),
-			"Inode %"MLFu64":\n"
+			"Inode %llu:\n"
 			"rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
 			"em->em_clusters = %u + new_clusters = %u = %u\n",
-			OCFS2_I(inode)->ip_blkno,
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
 			le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
 			em->em_clusters, new_clusters,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a4048b55fdc..4b4cbadd5838 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -220,8 +220,9 @@ static int ocfs2_truncate_file(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_truncate_context *tc = NULL;
 
-	mlog_entry("(inode = %"MLFu64", new_i_size = %"MLFu64"\n",
-		   OCFS2_I(inode)->ip_blkno, new_i_size);
+	mlog_entry("(inode = %llu, new_i_size = %llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   (unsigned long long)new_i_size);
 
 	truncate_inode_pages(inode->i_mapping, new_i_size);
 
@@ -233,23 +234,26 @@ static int ocfs2_truncate_file(struct inode *inode,
 	}
 
 	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
-			"Inode %"MLFu64", inode i_size = %lld != di "
-			"i_size = %"MLFu64", i_flags = 0x%x\n",
-			OCFS2_I(inode)->ip_blkno,
+			"Inode %llu, inode i_size = %lld != di "
+			"i_size = %llu, i_flags = 0x%x\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			i_size_read(inode),
-			le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags));
+			(unsigned long long)le64_to_cpu(fe->i_size),
+			le32_to_cpu(fe->i_flags));
 
 	if (new_i_size > le64_to_cpu(fe->i_size)) {
-		mlog(0, "asked to truncate file with size (%"MLFu64") "
-		     "to size (%"MLFu64")!\n",
-		     le64_to_cpu(fe->i_size), new_i_size);
+		mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
+		     (unsigned long long)le64_to_cpu(fe->i_size),
+		     (unsigned long long)new_i_size);
 		status = -EINVAL;
 		mlog_errno(status);
 		goto bail;
 	}
 
-	mlog(0, "inode %"MLFu64", i_size = %"MLFu64", new_i_size = %"MLFu64"\n",
-	     le64_to_cpu(fe->i_blkno), le64_to_cpu(fe->i_size), new_i_size);
+	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
+	     (unsigned long long)le64_to_cpu(fe->i_blkno),
+	     (unsigned long long)le64_to_cpu(fe->i_size),
+	     (unsigned long long)new_i_size);
 
 	/* lets handle the simple truncate cases before doing any more
 	 * cluster locking. */
@@ -378,8 +382,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "Allocating %u clusters at block %u for inode %"MLFu64"\n",
-	     num_bits, bit_off, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
+	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
 				     num_bits, meta_ac);
 	if (status < 0) {
@@ -449,9 +453,9 @@ static int ocfs2_extend_allocation(struct inode *inode,
 restart_all:
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 
-	mlog(0, "extend inode %"MLFu64", i_size = %lld, fe->i_clusters = %u, "
+	mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
 	     "clusters_to_add = %u\n",
-	     OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
 	     fe->i_clusters, clusters_to_add);
 
 	handle = ocfs2_alloc_handle(osb);
@@ -569,8 +573,8 @@ restarted_transaction:
 		}
 	}
 
-	mlog(0, "fe: i_clusters = %u, i_size=%"MLFu64"\n",
-	     fe->i_clusters, fe->i_size);
+	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
+	     fe->i_clusters, (unsigned long long)fe->i_size);
 	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
 	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
 
@@ -865,8 +869,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di;
 
-	mlog_entry("(Inode %"MLFu64", mode 0%o)\n", oi->ip_blkno,
-		   inode->i_mode);
+	mlog_entry("(Inode %llu, mode 0%o)\n",
+		   (unsigned long long)oi->ip_blkno, inode->i_mode);
 
 	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 315472a5c192..327a5b7b86ed 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -95,7 +95,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 	struct super_block *sb = osb->sb;
 	struct ocfs2_find_inode_args args;
 
-	mlog_entry("(blkno = %"MLFu64")\n", blkno);
+	mlog_entry("(blkno = %llu)\n", (unsigned long long)blkno);
 
 	/* Ok. By now we've either got the offsets passed to us by the
 	 * caller, or we just pulled them off the bh. Lets do some
@@ -134,8 +134,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 
 bail:
 	if (!IS_ERR(inode)) {
-		mlog(0, "returning inode with number %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(0, "returning inode with number %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		mlog_exit_ptr(inode);
 	} else
 		mlog_errno(PTR_ERR(inode));
@@ -219,7 +219,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	struct ocfs2_super *osb;
 	int status = -EINVAL;
 
-	mlog_entry("(0x%p, size:%"MLFu64")\n", inode, fe->i_size);
+	mlog_entry("(0x%p, size:%llu)\n", inode,
+		   (unsigned long long)fe->i_size);
 
 	sb = inode->i_sb;
 	osb = OCFS2_SB(sb);
@@ -228,9 +229,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	 * today.  change if needed. */
 	if (!OCFS2_IS_VALID_DINODE(fe) ||
 	    !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
-		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%"MLFu64", "
+		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
 		     "signature = %.*s, flags = 0x%x\n",
-		     inode->i_ino, le64_to_cpu(fe->i_blkno), 7,
+		     inode->i_ino,
+		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
 		     fe->i_signature, le32_to_cpu(fe->i_flags));
 		goto bail;
 	}
@@ -268,8 +270,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
 		mlog(ML_ERROR,
-		     "ip_blkno %"MLFu64" != i_blkno %"MLFu64"!\n",
-		     OCFS2_I(inode)->ip_blkno, fe->i_blkno);
+		     "ip_blkno %llu != i_blkno %llu!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)fe->i_blkno);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
@@ -278,8 +281,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_ino = ino_from_blkno(inode->i_sb,
 			       le64_to_cpu(fe->i_blkno));
 
-	mlog(0, "blkno = %"MLFu64", ino = %lu, create_ino = %s\n",
-	     fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
+	mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
+	     (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
 
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 
@@ -371,8 +374,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -386,8 +389,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	status = -EINVAL;
 	if (ocfs2_populate_inode(inode, fe, 0) < 0) {
-		mlog(ML_ERROR, "populate inode failed! i_blkno=%"MLFu64", "
-		     "i_ino=%lu\n", fe->i_blkno, inode->i_ino);
+		mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -675,8 +678,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
 	 * never get here as system file inodes should always have a
 	 * positive link count. */
 	if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
-		mlog(ML_ERROR, "Skipping delete of system file %"MLFu64".\n",
-		     oi->ip_blkno);
+		mlog(ML_ERROR, "Skipping delete of system file %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail_unlock;
 	}
 
@@ -715,16 +718,16 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * ocfs2_delete_inode, another node might have asked to delete
 	 * the inode. Recheck our flags to catch this. */
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
-		mlog(0, "Skipping delete of %"MLFu64" because flags changed\n",
-		     oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because flags changed\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 
 	/* Now that we have an up to date inode, we can double check
 	 * the link count. */
 	if (inode->i_nlink) {
-		mlog(0, "Skipping delete of %"MLFu64" because nlink = %u\n",
-		     oi->ip_blkno, inode->i_nlink);
+		mlog(0, "Skipping delete of %llu because nlink = %u\n",
+		     (unsigned long long)oi->ip_blkno, inode->i_nlink);
 		goto bail;
 	}
 
@@ -734,9 +737,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		/* for lack of a better error? */
 		status = -EEXIST;
 		mlog(ML_ERROR,
-		     "Inode %"MLFu64" (on-disk %"MLFu64") not orphaned! "
+		     "Inode %llu (on-disk %llu) not orphaned! "
 		     "Disk flags  0x%x, inode flags 0x%x\n",
-		     oi->ip_blkno, di->i_blkno, di->i_flags, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno,
+		     (unsigned long long)di->i_blkno, di->i_flags,
+		     oi->ip_flags);
 		goto bail;
 	}
 
@@ -753,8 +758,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * disk and let them worry about deleting it. */
 	if (status == -EBUSY) {
 		status = 0;
-		mlog(0, "Skipping delete of %"MLFu64" because it is in use on"
-		     "other nodes\n", oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because it is in use on"
+		     "other nodes\n", (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 	if (status < 0) {
@@ -768,13 +773,13 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		 * into. This may happen during node death and
 		 * recovery knows how to clean it up so we can safely
 		 * ignore this inode for now on. */
-		mlog(0, "Nobody knew where inode %"MLFu64" was orphaned!\n",
-		     oi->ip_blkno);
+		mlog(0, "Nobody knew where inode %llu was orphaned!\n",
+		     (unsigned long long)oi->ip_blkno);
 	} else {
 		*wipe = 1;
 
-		mlog(0, "Inode %"MLFu64" is ok to wipe from orphan dir %d\n",
-		     oi->ip_blkno, oi->ip_orphaned_slot);
+		mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
+		     (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
 	}
 	spin_unlock(&oi->ip_lock);
 
@@ -788,8 +793,8 @@ bail:
 static void ocfs2_cleanup_delete_inode(struct inode *inode,
 				       int sync_data)
 {
-	mlog(0, "Cleanup inode %"MLFu64", sync = %d\n",
-	     OCFS2_I(inode)->ip_blkno, sync_data);
+	mlog(0, "Cleanup inode %llu, sync = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
 	if (sync_data)
 		write_inode_now(inode, 1);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -897,8 +902,8 @@ void ocfs2_clear_inode(struct inode *inode)
 	if (!inode)
 		goto bail;
 
-	mlog(0, "Clearing inode: %"MLFu64", nlink = %u\n",
-	     OCFS2_I(inode)->ip_blkno, inode->i_nlink);
+	mlog(0, "Clearing inode: %llu, nlink = %u\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
@@ -919,8 +924,8 @@ void ocfs2_clear_inode(struct inode *inode)
 		ocfs2_checkpoint_inode(inode);
 
 	mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
-			"Clear inode of %"MLFu64", inode has io markers\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has io markers\n",
+			(unsigned long long)oi->ip_blkno);
 
 	ocfs2_extent_map_drop(inode, 0);
 	ocfs2_extent_map_init(inode);
@@ -936,20 +941,20 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_metadata_cache_purge(inode);
 
 	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
-			"Clear inode of %"MLFu64", inode has %u cache items\n",
-			oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+			"Clear inode of %llu, inode has %u cache items\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
 
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Clear inode of %"MLFu64", inode has a bad flag\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has a bad flag\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(spin_is_locked(&oi->ip_lock),
-			"Clear inode of %"MLFu64", inode is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode is locked\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
-			"Clear inode of %"MLFu64", io_mutex is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, io_mutex is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	mutex_unlock(&oi->ip_io_mutex);
 
 	/*
@@ -957,19 +962,19 @@ void ocfs2_clear_inode(struct inode *inode)
 	 * kernel 1, world 0
 	 */
 	mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem),
-			"Clear inode of %"MLFu64", alloc_sem is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, alloc_sem is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	up_write(&oi->ip_alloc_sem);
 
 	mlog_bug_on_msg(oi->ip_open_count,
-			"Clear inode of %"MLFu64" has open count %d\n",
-			oi->ip_blkno, oi->ip_open_count);
+			"Clear inode of %llu has open count %d\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
 	mlog_bug_on_msg(!list_empty(&oi->ip_handle_list),
-			"Clear inode of %"MLFu64" has non empty handle list\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle list\n",
+			(unsigned long long)oi->ip_blkno);
 	mlog_bug_on_msg(oi->ip_handle,
-			"Clear inode of %"MLFu64" has non empty handle pointer\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle pointer\n",
+			(unsigned long long)oi->ip_blkno);
 
 	/* Clear all other flags. */
 	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
@@ -991,8 +996,8 @@ void ocfs2_drop_inode(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Drop inode %"MLFu64", nlink = %u, ip_flags = 0x%x\n",
-	     oi->ip_blkno, inode->i_nlink, oi->ip_flags);
+	mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
+	     (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
 
 	/* Testing ip_orphaned_slot here wouldn't work because we may
 	 * not have gotten a delete_inode vote from any other nodes
@@ -1069,8 +1074,8 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	int status = 0;
 
-	mlog_entry("(inode = 0x%p, ino = %"MLFu64")\n", inode,
-		   inode ? OCFS2_I(inode)->ip_blkno : 0ULL);
+	mlog_entry("(inode = 0x%p, ino = %llu)\n", inode,
+		   inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL);
 
 	if (!inode) {
 		mlog(0, "eep, no inode!\n");
@@ -1114,7 +1119,8 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 	int status;
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
 
-	mlog_entry("(inode %"MLFu64")\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("(inode %llu)\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_journal_access(handle, inode, bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4be801f4559b..ae3440ca083c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -503,8 +503,8 @@ static void ocfs2_handle_cleanup_locks(struct ocfs2_journal *journal,
 		ocfs2_meta_unlock(inode, 1);
 		if (atomic_read(&inode->i_count) == 1)
 			mlog(ML_ERROR,
-			     "Inode %"MLFu64", I'm doing a last iput for!",
-			     OCFS2_I(inode)->ip_blkno);
+			     "Inode %llu, I'm doing a last iput for!",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		iput(inode);
 		kmem_cache_free(ocfs2_lock_cache, lock);
 	}
@@ -640,8 +640,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		/* This is called from startup/shutdown which will
 		 * handle the errors in a specific manner, so no need
 		 * to call ocfs2_error() here. */
-		mlog(ML_ERROR, "Journal dinode %"MLFu64"  has invalid "
-		     "signature: %.*s", fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Journal dinode %llu  has invalid "
+		     "signature: %.*s", (unsigned long long)fe->i_blkno, 7,
+		     fe->i_signature);
 		status = -EIO;
 		goto out;
 	}
@@ -934,8 +935,8 @@ void ocfs2_complete_recovery(void *data)
 
 		la_dinode = item->lri_la_dinode;
 		if (la_dinode) {
-			mlog(0, "Clean up local alloc %"MLFu64"\n",
-			     la_dinode->i_blkno);
+			mlog(0, "Clean up local alloc %llu\n",
+			     (unsigned long long)la_dinode->i_blkno);
 
 			ret = ocfs2_complete_local_alloc_recovery(osb,
 								  la_dinode);
@@ -947,8 +948,8 @@ void ocfs2_complete_recovery(void *data)
 
 		tl_dinode = item->lri_tl_dinode;
 		if (tl_dinode) {
-			mlog(0, "Clean up truncate log %"MLFu64"\n",
-			     tl_dinode->i_blkno);
+			mlog(0, "Clean up truncate log %llu\n",
+			     (unsigned long long)tl_dinode->i_blkno);
 
 			ret = ocfs2_complete_truncate_log_recovery(osb,
 								   tl_dinode);
@@ -1473,11 +1474,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (de->file_type > OCFS2_FT_MAX) {
 				mlog(ML_ERROR,
 				     "block %llu contains invalid de: "
-				     "inode = %"MLFu64", rec_len = %u, "
+				     "inode = %llu, rec_len = %u, "
 				     "name_len = %u, file_type = %u, "
 				     "name='%.*s'\n",
 				     (unsigned long long)bh->b_blocknr,
-				     le64_to_cpu(de->inode),
+				     (unsigned long long)le64_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len),
 				     de->name_len,
 				     de->file_type,
@@ -1494,8 +1495,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (IS_ERR(iter))
 				continue;
 
-			mlog(0, "queue orphan %"MLFu64"\n",
-			     OCFS2_I(iter)->ip_blkno);
+			mlog(0, "queue orphan %llu\n",
+			     (unsigned long long)OCFS2_I(iter)->ip_blkno);
 			/* No locking is required for the next_orphan
 			 * queue as there is only ever a single
 			 * process doing orphan recovery. */
@@ -1588,7 +1589,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 
 	while (inode) {
 		oi = OCFS2_I(inode);
-		mlog(0, "iput orphan %"MLFu64"\n", oi->ip_blkno);
+		mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno);
 
 		iter = oi->ip_next_orphan;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 149b35181666..0d1973ea32b0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -143,8 +143,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
 	if (!(le32_to_cpu(alloc->i_flags) &
 	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
-		mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		status = -EINVAL;
 		goto bail;
 	}
@@ -493,9 +493,9 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
 	    ocfs2_local_alloc_count_bits(alloc)) {
-		ocfs2_error(osb->sb, "local alloc inode %"MLFu64" says it has "
+		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
 			    "%u free bits, but a count shows %u",
-			    le64_to_cpu(alloc->i_blkno),
+			    (unsigned long long)le64_to_cpu(alloc->i_blkno),
 			    le32_to_cpu(alloc->id1.bitmap1.i_used),
 			    ocfs2_local_alloc_count_bits(alloc));
 		status = -EIO;
@@ -753,10 +753,11 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 				ocfs2_clusters_to_blocks(osb->sb,
 							 start - count);
 
-			mlog(0, "freeing %u bits starting at local "
-			     "alloc bit %u (la_start_blk = %"MLFu64", "
-			     "blkno = %"MLFu64")\n", count, start - count,
-			     la_start_blk, blkno);
+			mlog(0, "freeing %u bits starting at local alloc bit "
+			     "%u (la_start_blk = %llu, blkno = %llu)\n",
+			     count, start - count,
+			     (unsigned long long)la_start_blk,
+			     (unsigned long long)blkno);
 
 			status = ocfs2_free_clusters(handle, main_bm_inode,
 						     main_bm_bh, blkno, count);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f6b77ff1d2bf..274f61d0cda9 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -161,8 +161,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 		goto bail;
 	}
 
-	mlog(0, "find name %.*s in directory %"MLFu64"\n", dentry->d_name.len,
-	     dentry->d_name.name, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
+	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -180,7 +180,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		ret = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -310,8 +311,8 @@ static int ocfs2_mknod(struct inode *dir,
 	osb = OCFS2_SB(dir->i_sb);
 
 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %"MLFu64" has i_nlink of %u\n",
-		     OCFS2_I(dir)->ip_blkno, dir->i_nlink);
+		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
 		status = -EMLINK;
 		goto leave;
 	}
@@ -562,9 +563,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
-		     "i_blkno=%"MLFu64", i_ino=%lu\n",
+		     "i_blkno=%llu, i_ino=%lu\n",
 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
-		     fe->i_blkno, inode->i_ino);
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		BUG();
 	}
 
@@ -765,7 +766,7 @@ static int ocfs2_unlink(struct inode *dir,
 
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
-	mlog(0, "ino = %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	if (inode == osb->root_inode) {
 		mlog(0, "Cannot delete the root directory\n");
@@ -799,9 +800,9 @@ static int ocfs2_unlink(struct inode *dir,
 	if (OCFS2_I(inode)->ip_blkno != blkno) {
 		status = -ENOENT;
 
-		mlog(0, "ip_blkno (%"MLFu64") != dirent blkno (%"MLFu64") "
-		     "ip_flags = %x\n", OCFS2_I(inode)->ip_blkno, blkno,
-		     OCFS2_I(inode)->ip_flags);
+		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
 		goto leave;
 	}
 
@@ -946,8 +947,9 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	struct buffer_head **tmpbh;
 	struct inode *tmpinode;
 
-	mlog_entry("(inode1 = %"MLFu64", inode2 = %"MLFu64")\n",
-		   oi1->ip_blkno, oi2->ip_blkno);
+	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
+		   (unsigned long long)oi1->ip_blkno,
+		   (unsigned long long)oi2->ip_blkno);
 
 	BUG_ON(!handle);
 
@@ -1187,9 +1189,9 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
 			status = -EACCES;
 
-			mlog(0, "Inode blkno (%"MLFu64") and dir (%"MLFu64") "
-			     "disagree. ip_flags = %x\n",
-			     OCFS2_I(new_inode)->ip_blkno, newfe_blkno,
+			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
+			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
+			     (unsigned long long)newfe_blkno,
 			     OCFS2_I(new_inode)->ip_flags);
 			goto bail;
 		}
@@ -1215,9 +1217,9 @@ static int ocfs2_rename(struct inode *old_dir,
 
 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
 
-		mlog(0, "aha rename over existing... new_de=%p "
-		     "new_blkno=%"MLFu64" newfebh=%p bhblocknr=%llu\n",
-		     new_de, newfe_blkno, newfe_bh, newfe_bh ?
+		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
+		     "newfebh=%p bhblocknr=%llu\n", new_de,
+		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
@@ -1354,8 +1356,8 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (new_dir_nlink != new_dir->i_nlink) {
 			if (!new_dir_bh) {
 				mlog(ML_ERROR, "need to change nlink for new "
-				     "dir %"MLFu64" from %d to %d but bh is "
-				     "NULL\n", OCFS2_I(new_dir)->ip_blkno,
+				     "dir %llu from %d to %d but bh is NULL\n",
+				     (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
 				     (int)new_dir_nlink, new_dir->i_nlink);
 			} else {
 				struct ocfs2_dinode *fe;
@@ -1372,10 +1374,9 @@ static int ocfs2_rename(struct inode *old_dir,
 	if (old_dir_nlink != old_dir->i_nlink) {
 		if (!old_dir_bh) {
 			mlog(ML_ERROR, "need to change nlink for old dir "
-			     "%"MLFu64" from %d to %d but bh is NULL!\n",
-			     OCFS2_I(old_dir)->ip_blkno,
-			     (int)old_dir_nlink,
-			     old_dir->i_nlink);
+			     "%llu from %d to %d but bh is NULL!\n",
+			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
+			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
 			status = ocfs2_journal_access(handle, old_dir,
@@ -1634,9 +1635,9 @@ static int ocfs2_symlink(struct inode *dir,
 						    NULL);
 		if (status < 0) {
 			if (status != -ENOSPC && status != -EINTR) {
-				mlog(ML_ERROR, "Failed to extend file to "
-					       "%"MLFu64"\n",
-				     newsize);
+				mlog(ML_ERROR,
+				     "Failed to extend file to %llu\n",
+				     (unsigned long long)newsize);
 				mlog_errno(status);
 				status = -ENOSPC;
 			}
@@ -1716,10 +1717,11 @@ int ocfs2_check_dir_entry(struct inode * dir,
 		error_msg = "directory entry across blocks";
 
 	if (error_msg != NULL)
-		mlog(ML_ERROR, "bad entry in directory #%"MLFu64": %s - "
-		     "offset=%lu, inode=%"MLFu64", rec_len=%d, name_len=%d\n",
-		     OCFS2_I(dir)->ip_blkno, error_msg, offset,
-		     le64_to_cpu(de->inode), rlen, de->name_len);
+		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
+		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
+		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
+		     de->name_len);
 	return error_msg == NULL ? 1 : 0;
 }
 
@@ -2021,8 +2023,8 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
 
 	mlog_entry_void();
 
-	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016"MLFx64,
-			   blkno);
+	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
+			   (long long)blkno);
 	if (namelen <= 0) {
 		if (namelen)
 			status = namelen;
@@ -2167,8 +2169,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
-	mlog(0, "Inode %"MLFu64" orphaned in slot %d\n",
-	     OCFS2_I(inode)->ip_blkno, osb->slot_num);
+	mlog(0, "Inode %llu orphaned in slot %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
 leave:
 	if (orphan_dir_inode)
@@ -2202,8 +2204,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	mlog(0, "removing '%s' from orphan dir %"MLFu64" (namelen=%d)\n",
-	     name, OCFS2_I(orphan_dir_inode)->ip_blkno, OCFS2_ORPHAN_NAMELEN);
+	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
+	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
+	     OCFS2_ORPHAN_NAMELEN);
 
 	/* find it's spot in the orphan directory */
 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e89de9b6e491..da1093039c01 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -357,8 +357,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_DINODE(__sb, __di)	do {			\
 	typeof(__di) ____di = (__di);					\
 	ocfs2_error((__sb), 						\
-		"Dinode # %"MLFu64" has bad signature %.*s",		\
-		(____di)->i_blkno, 7,					\
+		"Dinode # %llu has bad signature %.*s",			\
+		(unsigned long long)(____di)->i_blkno, 7,		\
 		(____di)->i_signature);					\
 } while (0);
 
@@ -368,8 +368,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb)	do {		\
 	typeof(__eb) ____eb = (__eb);					\
 	ocfs2_error((__sb), 						\
-		"Extent Block # %"MLFu64" has bad signature %.*s",	\
-		(____eb)->h_blkno, 7,					\
+		"Extent Block # %llu has bad signature %.*s",		\
+		(unsigned long long)(____eb)->h_blkno, 7,		\
 		(____eb)->h_signature);					\
 } while (0);
 
@@ -379,8 +379,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd)	do {		\
 	typeof(__gd) ____gd = (__gd);					\
 		ocfs2_error((__sb),					\
-		"Group Descriptor # %"MLFu64" has bad signature %.*s",	\
-		(____gd)->bg_blkno, 7,					\
+		"Group Descriptor # %llu has bad signature %.*s",	\
+		(unsigned long long)(____gd)->bg_blkno, 7,		\
 		(____gd)->bg_signature);				\
 } while (0);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c46c164aefbb..195523090c87 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -157,8 +157,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
 	mlog_entry_void();
 
 	if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
-		ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
-			    "!= b_blocknr (%llu)", group_blkno,
+		ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+			    "b_blocknr (%llu)",
+			    (unsigned long long)group_blkno,
 			    (unsigned long long) bg_bh->b_blocknr);
 		status = -EIO;
 		goto bail;
@@ -280,8 +281,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	/* setup the group */
 	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
-	     alloc_rec, bg_blkno);
+	mlog(0, "new descriptor, record %u, at block %llu\n",
+	     alloc_rec, (unsigned long long)bg_blkno);
 
 	bg_bh = sb_getblk(osb->sb, bg_blkno);
 	if (!bg_bh) {
@@ -382,8 +383,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
-		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
-			    "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno));
 		status = -EIO;
 		goto bail;
 	}
@@ -829,9 +830,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
 		goto out;
 	}
 
-	mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
-	     "top, prev = %"MLFu64"\n",
-	     fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+	mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+	     (unsigned long long)fe->i_blkno, chain,
+	     (unsigned long long)bg->bg_blkno,
+	     (unsigned long long)prev_bg->bg_blkno);
 
 	fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -974,8 +976,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	struct ocfs2_group_desc *bg;
 
 	chain = ac->ac_chain;
-	mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
-	     bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+	mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+	     bits_wanted, chain,
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
 	status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
 				  le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1027,8 +1030,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
-	     tmp_bits, bg->bg_blkno);
+	mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+	     tmp_bits, (unsigned long long)bg->bg_blkno);
 
 	*num_bits = tmp_bits;
 
@@ -1092,8 +1095,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
-	     *num_bits, fe->i_blkno);
+	mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+	     (unsigned long long)fe->i_blkno);
 
 	*bg_blkno = le64_to_cpu(bg->bg_blkno);
 bail:
@@ -1134,9 +1137,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	}
 	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
 	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
-		ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
-			    "used bits but only %u total.",
-			    le64_to_cpu(fe->i_blkno),
+		ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+			    "bits but only %u total.",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->id1.bitmap1.i_used),
 			    le32_to_cpu(fe->id1.bitmap1.i_total));
 		status = -EIO;
@@ -1479,10 +1482,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
 	}
 	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
-	mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-	     ", starting at %u\n",
-	     OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
-	     start_bit);
+	mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+	     (unsigned long long)bg_blkno, start_bit);
 
 	status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
 				  alloc_inode);
@@ -1592,10 +1594,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
 	ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
 				     &bg_start_bit);
 
-	mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
-	     num_clusters, start_blk);
-	mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
-	     bg_blkno, bg_start_bit);
+	mlog(0, "want to free %u clusters starting at block %llu\n",
+	     num_clusters, (unsigned long long)start_blk);
+	mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+	     (unsigned long long)bg_blkno, bg_start_bit);
 
 	status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
 					  bg_start_bit, bg_blkno,
@@ -1616,18 +1618,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
 	printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
 	printk("bg_chain:           %u\n", bg->bg_chain);
 	printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-	printk("bg_next_group:      %"MLFu64"\n", bg->bg_next_group);
-	printk("bg_parent_dinode:   %"MLFu64"\n", bg->bg_parent_dinode);
-	printk("bg_blkno:           %"MLFu64"\n", bg->bg_blkno);
+	printk("bg_next_group:      %llu\n",
+	       (unsigned long long)bg->bg_next_group);
+	printk("bg_parent_dinode:   %llu\n",
+	       (unsigned long long)bg->bg_parent_dinode);
+	printk("bg_blkno:           %llu\n",
+	       (unsigned long long)bg->bg_blkno);
 }
 
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 {
 	int i;
 
-	printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+	printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
 	printk("i_signature:                  %s\n", fe->i_signature);
-	printk("i_size:                       %"MLFu64"\n", fe->i_size);
+	printk("i_size:                       %llu\n",
+	       (unsigned long long)fe->i_size);
 	printk("i_clusters:                   %u\n", fe->i_clusters);
 	printk("i_generation:                 %u\n",
 	       le32_to_cpu(fe->i_generation));
@@ -1645,7 +1651,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 		       fe->id2.i_chain.cl_recs[i].c_free);
 		printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
 		       fe->id2.i_chain.cl_recs[i].c_total);
-		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
-		       fe->id2.i_chain.cl_recs[i].c_blkno);
+		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+		       (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
 	}
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 44d8b5248236..949b3dac30f1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1428,8 +1428,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
 	mlog(0, "vol_label: %s\n", osb->vol_label);
 	mlog(0, "uuid: %s\n", osb->uuid_str);
-	mlog(0, "root_blkno=%"MLFu64", system_dir_blkno=%"MLFu64"\n",
-	     osb->root_blkno, osb->system_dir_blkno);
+	mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
+	     (unsigned long long)osb->root_blkno,
+	     (unsigned long long)osb->system_dir_blkno);
 
 	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
 	if (!osb->osb_dlm_debug) {
@@ -1472,8 +1473,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
 	osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
 	brelse(bitmap_bh);
-	mlog(0, "cluster bitmap inode: %"MLFu64", clusters per group: %u\n",
-	     osb->bitmap_blkno, osb->bitmap_cpg);
+	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
+	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
 
 	status = ocfs2_init_slot_info(osb);
 	if (status < 0) {
@@ -1531,8 +1532,9 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 			     OCFS2_MINOR_REV_LEVEL);
 		} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
 			mlog(ML_ERROR, "bad block number on superblock: "
-			     "found %"MLFu64", should be %llu\n",
-			     di->i_blkno, (unsigned long long)bh->b_blocknr);
+			     "found %llu, should be %llu\n",
+			     (unsigned long long)di->i_blkno,
+			     (unsigned long long)bh->b_blocknr);
 		} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
 			    le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
 			mlog(ML_ERROR, "bad cluster size found: %u\n",
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 300b5bedfb21..04a684dfdd96 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -119,8 +119,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 	to_purge = ci->ci_num_cached;
 
-	mlog(0, "Purge %u %s items from Inode %"MLFu64"\n", to_purge,
-	     tree ? "array" : "tree", oi->ip_blkno);
+	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
+	     tree ? "array" : "tree", (unsigned long long)oi->ip_blkno);
 
 	/* If we're a tree, save off the root so that we can safely
 	 * initialize the cache. We do the work to free tree members
@@ -136,8 +136,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	 * easily detect counting errors. Unfortunately, this is only
 	 * meaningful for trees. */
 	if (tree && purged != to_purge)
-		mlog(ML_ERROR, "Inode %"MLFu64", count = %u, purged = %u\n",
-		     oi->ip_blkno, to_purge, purged);
+		mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n",
+		     (unsigned long long)oi->ip_blkno, to_purge, purged);
 }
 
 /* Returns the index in the cache array, -1 if not found.
@@ -186,8 +186,9 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 
 	spin_lock(&oi->ip_lock);
 
-	mlog(0, "Inode %"MLFu64", query block %llu (inline = %u)\n",
-	     oi->ip_blkno, (unsigned long long) bh->b_blocknr,
+	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) bh->b_blocknr,
 	     !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
@@ -293,12 +294,12 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
-			"Inode %"MLFu64", num cached = %u, should be %u\n",
-			oi->ip_blkno, ci->ci_num_cached,
+			"Inode %llu, num cached = %u, should be %u\n",
+			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
 			OCFS2_INODE_MAX_CACHE_ARRAY);
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Inode %"MLFu64" not marked as inline anymore!\n",
-			oi->ip_blkno);
+			"Inode %llu not marked as inline anymore!\n",
+			(unsigned long long)oi->ip_blkno);
 	assert_spin_locked(&oi->ip_lock);
 
 	/* Be careful to initialize the tree members *first* because
@@ -316,8 +317,8 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 		tree[i] = NULL;
 	}
 
-	mlog(0, "Expanded %"MLFu64" to a tree cache: flags 0x%x, num = %u\n",
-	     oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
+	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
+	     (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -332,8 +333,9 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
 		{ NULL, };
 
-	mlog(0, "Inode %"MLFu64", block %llu, expand = %d\n",
-	     oi->ip_blkno, (unsigned long long) block, expand_tree);
+	mlog(0, "Inode %llu, block %llu, expand = %d\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)block, expand_tree);
 
 	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL);
 	if (!new) {
@@ -414,8 +416,9 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	if (ocfs2_buffer_cached(oi, bh))
 		return;
 
-	mlog(0, "Inode %"MLFu64", inserting block %llu\n", oi->ip_blkno,
-	     (unsigned long long) bh->b_blocknr);
+	mlog(0, "Inode %llu, inserting block %llu\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
 	 * ip_io_mutex */
@@ -504,8 +507,9 @@ void ocfs2_remove_from_cache(struct inode *inode,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	spin_lock(&oi->ip_lock);
-	mlog(0, "Inode %"MLFu64", remove %llu, items = %u, array = %u\n",
-	     oi->ip_blkno, (unsigned long long) block, ci->ci_num_cached,
+	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) block, ci->ci_num_cached,
 	     oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 021978e0576b..53049a204197 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -190,20 +190,21 @@ static int ocfs2_process_delete_request(struct inode *inode,
 				OCFS2_INVALID_SLOT &&
 				OCFS2_I(inode)->ip_orphaned_slot !=
 				(*orphaned_slot),
-				"Inode %"MLFu64": This node thinks it's "
+				"Inode %llu: This node thinks it's "
 				"orphaned in slot %d, messaged it's in %d\n",
-				OCFS2_I(inode)->ip_blkno,
+				(unsigned long long)OCFS2_I(inode)->ip_blkno,
 				OCFS2_I(inode)->ip_orphaned_slot,
 				*orphaned_slot);
 
-		mlog(0, "Setting orphaned slot for inode %"MLFu64" to %d\n",
-		     OCFS2_I(inode)->ip_blkno, *orphaned_slot);
+		mlog(0, "Setting orphaned slot for inode %llu to %d\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     *orphaned_slot);
 
 		OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot;
 	} else {
-		mlog(0, "Sending back orphaned slot %d for inode %"MLFu64"\n",
+		mlog(0, "Sending back orphaned slot %d for inode %llu\n",
 		     OCFS2_I(inode)->ip_orphaned_slot,
-		     OCFS2_I(inode)->ip_blkno);
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 		*orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
 	}
@@ -226,8 +227,8 @@ static int ocfs2_process_delete_request(struct inode *inode,
 	}
 
 	if (filemap_fdatawrite(inode->i_mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for delete!\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for delete!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		goto done;
 	}
 	sync_mapping_buffers(inode->i_mapping);
@@ -302,8 +303,8 @@ static void ocfs2_process_dentry_request(struct inode *inode,
 	struct list_head *p;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
-	mlog(0, "parent %"MLFu64", namelen = %u, name = %.*s\n", parent_blkno,
-	     namelen, namelen, name);
+	mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
+	     (unsigned long long)parent_blkno, namelen, namelen, name);
 
 	spin_lock(&dcache_lock);
 
@@ -370,9 +371,10 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	if (request == OCFS2_VOTE_REQ_DELETE)
 		orphaned_slot = be32_to_cpu(msg->md1.v_orphaned_slot);
 
-	mlog(0, "processing vote: request = %u, blkno = %"MLFu64", "
+	mlog(0, "processing vote: request = %u, blkno = %llu, "
 	     "generation = %u, node_num = %u, priv1 = %u\n", request,
-	     blkno, generation, node_num, be32_to_cpu(msg->md1.v_generic1));
+	     (unsigned long long)blkno, generation, node_num,
+	     be32_to_cpu(msg->md1.v_generic1));
 
 	if (!ocfs2_is_valid_vote_request(request)) {
 		mlog(ML_ERROR, "Invalid vote request %d from node %u\n",
@@ -419,11 +421,12 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	 * we had not found an inode in the first place. */
 	if (inode->i_generation != generation) {
 		mlog(0, "generation passed %u != inode generation = %u, "
-		     "ip_flags = %x, ip_blkno = %"MLFu64", msg %"MLFu64", "
-		     "i_count = %u, message type = %u\n",
-		     generation, inode->i_generation, OCFS2_I(inode)->ip_flags,
-		     OCFS2_I(inode)->ip_blkno, blkno,
-		     atomic_read(&inode->i_count), request);
+		     "ip_flags = %x, ip_blkno = %llu, msg %llu, i_count = %u, "
+		     "message type = %u\n", generation, inode->i_generation,
+		     OCFS2_I(inode)->ip_flags,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, atomic_read(&inode->i_count),
+		     request);
 		iput(inode);
 		inode = NULL;
 		goto respond;
@@ -830,8 +833,9 @@ static void ocfs2_delete_response_cb(void *priv,
 
 	orphaned_slot = be32_to_cpu(resp->r_orphaned_slot);
 	node = be32_to_cpu(resp->r_hdr.h_node_num);
-	mlog(0, "node %d tells us that inode %"MLFu64" is orphaned in slot "
-	     "%d\n", node, OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "node %d tells us that inode %llu is orphaned in slot %d\n",
+	     node, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+	     orphaned_slot);
 
 	/* The other node may not actually know which slot the inode
 	 * is orphaned in. */
@@ -845,9 +849,9 @@ static void ocfs2_delete_response_cb(void *priv,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != orphaned_slot &&
 			OCFS2_I(inode)->ip_orphaned_slot
-			!= OCFS2_INVALID_SLOT, "Inode %"MLFu64": Node %d "
-			"says it's orphaned in slot %d, we think it's in %d\n",
-			OCFS2_I(inode)->ip_blkno,
+			!= OCFS2_INVALID_SLOT, "Inode %llu: Node %d says it's "
+			"orphaned in slot %d, we think it's in %d\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			be32_to_cpu(resp->r_hdr.h_node_num),
 			orphaned_slot, OCFS2_I(inode)->ip_orphaned_slot);
 
@@ -869,8 +873,8 @@ int ocfs2_request_delete_vote(struct inode *inode)
 	delete_cb.rc_cb = ocfs2_delete_response_cb;
 	delete_cb.rc_priv = inode;
 
-	mlog(0, "Inode %"MLFu64", we start thinking orphaned slot is %d\n",
-	     OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "Inode %llu, we start thinking orphaned slot is %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, orphaned_slot);
 
 	status = -ENOMEM;
 	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
@@ -895,8 +899,8 @@ static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
 	 * d_delete against it. Parent directory block and full name
 	 * should suffice. */
 
-	mlog(0, "unlink/rename request: parent: %"MLFu64" name: %.*s\n",
-	     OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
+	mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
+	     (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
 	     dentry->d_name.name);
 
 	request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
@@ -1082,7 +1086,8 @@ static int ocfs2_handle_response_message(struct o2net_msg *msg,
 	mlog(0, "received response message:\n");
 	mlog(0, "h_response_id = %u\n", response_id);
 	mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n", be64_to_cpu(resp->r_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n", node_num);
 	mlog(0, "r_response = %d\n", response_status);
@@ -1138,8 +1143,8 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
 	mlog(0, "h_response_id = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_response_id));
 	mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n",
-	     be64_to_cpu(work->w_msg.v_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n",
-- 
cgit v1.2.3


From ea8aa68d36026c4c3d938170b06957aeac9682f4 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 6 Mar 2006 12:33:14 -0800
Subject: ocfs2: finally remove MLF* macros

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/masklog.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2cadc3009c83..73edad782537 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -256,16 +256,6 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 	}								\
 } while (0)
 
-#if (BITS_PER_LONG == 32) || defined(CONFIG_X86_64) || (defined(CONFIG_UML_X86) && defined(CONFIG_64BIT))
-#define MLFi64 "lld"
-#define MLFu64 "llu"
-#define MLFx64 "llx"
-#else
-#define MLFi64 "ld"
-#define MLFu64 "lu"
-#define MLFx64 "lx"
-#endif
-
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 int mlog_sys_init(struct subsystem *o2cb_subsys);
-- 
cgit v1.2.3


From 214fda1f6e1b8ef2a5292b0372744037fc80d318 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 25 Mar 2006 03:06:36 -0800
Subject: [PATCH] Optimise d_find_alias()

The attached patch optimises d_find_alias() to only take the spinlock if
there's anything in the the inode's alias list.  If there isn't, it returns
NULL immediately.

With respect to the superblock sharing patch, this should reduce by one the
number of times the dcache_lock is taken by nfs_lookup() for ordinary
directory lookups.

Only in the case where there's already a dentry for particular directory inode
(such as might happen when another mountpoint is rooted at that dentry) will
the lock then be taken the extra time.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 653f64ce98e2..139e5fd22fa6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -325,10 +325,13 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
 
 struct dentry * d_find_alias(struct inode *inode)
 {
-	struct dentry *de;
-	spin_lock(&dcache_lock);
-	de = __d_find_alias(inode, 0);
-	spin_unlock(&dcache_lock);
+	struct dentry *de = NULL;
+
+	if (!list_empty(&inode->i_dentry)) {
+		spin_lock(&dcache_lock);
+		de = __d_find_alias(inode, 0);
+		spin_unlock(&dcache_lock);
+	}
 	return de;
 }
 
-- 
cgit v1.2.3


From 871751e25d956ad24f129ca972b7851feaa61d53 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 25 Mar 2006 03:06:39 -0800
Subject: [PATCH] slab: implement /proc/slab_allocators

Implement /proc/slab_allocators.   It produces output like:

idr_layer_cache: 80 idr_pre_get+0x33/0x4e
buffer_head: 2555 alloc_buffer_head+0x20/0x75
mm_struct: 9 mm_alloc+0x1e/0x42
mm_struct: 20 dup_mm+0x36/0x370
vm_area_struct: 384 dup_mm+0x18f/0x370
vm_area_struct: 151 do_mmap_pgoff+0x2e0/0x7c3
vm_area_struct: 1 split_vma+0x5a/0x10e
vm_area_struct: 11 do_brk+0x206/0x2e2
vm_area_struct: 2 copy_vma+0xda/0x142
vm_area_struct: 9 setup_arg_pages+0x99/0x214
fs_cache: 8 copy_fs_struct+0x21/0x133
fs_cache: 29 copy_process+0xf38/0x10e3
files_cache: 30 alloc_files+0x1b/0xcf
signal_cache: 81 copy_process+0xbaa/0x10e3
sighand_cache: 77 copy_process+0xe65/0x10e3
sighand_cache: 1 de_thread+0x4d/0x5f8
anon_vma: 241 anon_vma_prepare+0xd9/0xf3
size-2048: 1 add_sect_attrs+0x5f/0x145
size-2048: 2 journal_init_revoke+0x99/0x302
size-2048: 2 journal_init_revoke+0x137/0x302
size-2048: 2 journal_init_inode+0xf9/0x1c4

Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexander Nyberg <alexn@telia.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
DESC
slab-leaks3-locking-fix
EDESC
From: Andrew Morton <akpm@osdl.org>

Update for slab-remove-cachep-spinlock.patch

Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexander Nyberg <alexn@telia.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c  |  37 +++++++++++
 include/linux/slab.h |   6 +-
 lib/Kconfig.debug    |   4 ++
 mm/slab.c            | 180 +++++++++++++++++++++++++++++++++++++++++++++++++--
 mm/util.c            |   4 +-
 net/core/skbuff.c    |   2 +-
 6 files changed, 222 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 826c131994c3..1e9ea37d457e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -485,6 +485,40 @@ static struct file_operations proc_slabinfo_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+extern struct seq_operations slabstats_op;
+static int slabstats_open(struct inode *inode, struct file *file)
+{
+	unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	int ret = -ENOMEM;
+	if (n) {
+		ret = seq_open(file, &slabstats_op);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			*n = PAGE_SIZE / (2 * sizeof(unsigned long));
+			m->private = n;
+			n = NULL;
+		}
+		kfree(n);
+	}
+	return ret;
+}
+
+static int slabstats_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	kfree(m->private);
+	return seq_release(inode, file);
+}
+
+static struct file_operations proc_slabstats_operations = {
+	.open		= slabstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= slabstats_release,
+};
+#endif
 #endif
 
 static int show_stat(struct seq_file *p, void *v)
@@ -744,6 +778,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations);
+#endif
 #endif
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index e2ee5b268797..f88e08a5802c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -77,11 +77,12 @@ struct cache_sizes {
 };
 extern struct cache_sizes malloc_sizes[];
 
-#ifndef CONFIG_DEBUG_SLAB
 extern void *__kmalloc(size_t, gfp_t);
+#ifndef CONFIG_DEBUG_SLAB
+#define ____kmalloc(size, flags) __kmalloc(size, flags)
 #else
 extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
-#define __kmalloc(size, flags) \
+#define ____kmalloc(size, flags) \
     __kmalloc_track_caller(size, flags, __builtin_return_address(0))
 #endif
 
@@ -173,6 +174,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 #define kmem_ptr_validate(a, b) (0)
 #define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f)
 #define kmalloc_node(s, f, n) kmalloc(s, f)
+#define ____kmalloc kmalloc
 
 #endif /* CONFIG_SLOB */
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f2618e1c2b93..1fe3f897145f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -85,6 +85,10 @@ config DEBUG_SLAB
 	  allocation as well as poisoning memory on free to catch use of freed
 	  memory. This can make kmalloc/kfree-intensive workloads much slower.
 
+config DEBUG_SLAB_LEAK
+	bool "Memory leak debugging"
+	depends on DEBUG_SLAB
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT
diff --git a/mm/slab.c b/mm/slab.c
index 26138c9f8f00..a5047161084e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -204,7 +204,8 @@
 typedef unsigned int kmem_bufctl_t;
 #define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
 #define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+#define	BUFCTL_ACTIVE	(((kmem_bufctl_t)(~0U))-2)
+#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)
 
 /* Max number of objs-per-slab for caches which use off-slab slabs.
  * Needed to avoid a possible looping condition in cache_grow().
@@ -2399,7 +2400,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
 	/* Verify that the slab belongs to the intended node */
 	WARN_ON(slabp->nodeid != nodeid);
 
-	if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+	if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
 		printk(KERN_ERR "slab: double free detected in cache "
 				"'%s', objp %p\n", cachep->name, objp);
 		BUG();
@@ -2605,6 +2606,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 		 */
 		cachep->dtor(objp + obj_offset(cachep), cachep, 0);
 	}
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
+#endif
 	if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
 		if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2788,6 +2792,16 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 		*dbg_redzone1(cachep, objp) = RED_ACTIVE;
 		*dbg_redzone2(cachep, objp) = RED_ACTIVE;
 	}
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	{
+		struct slab *slabp;
+		unsigned objnr;
+
+		slabp = page_get_slab(virt_to_page(objp));
+		objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
+		slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
+	}
+#endif
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON) {
 		unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
@@ -3220,22 +3234,23 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	return __cache_alloc(cachep, flags, caller);
 }
 
-#ifndef CONFIG_DEBUG_SLAB
 
 void *__kmalloc(size_t size, gfp_t flags)
 {
+#ifndef CONFIG_DEBUG_SLAB
 	return __do_kmalloc(size, flags, NULL);
+#else
+	return __do_kmalloc(size, flags, __builtin_return_address(0));
+#endif
 }
 EXPORT_SYMBOL(__kmalloc);
 
-#else
-
+#ifdef CONFIG_DEBUG_SLAB
 void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
 {
 	return __do_kmalloc(size, flags, caller);
 }
 EXPORT_SYMBOL(__kmalloc_track_caller);
-
 #endif
 
 #ifdef CONFIG_SMP
@@ -3899,6 +3914,159 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
 		res = count;
 	return res;
 }
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+
+static void *leaks_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+	struct list_head *p;
+
+	mutex_lock(&cache_chain_mutex);
+	p = cache_chain.next;
+	while (n--) {
+		p = p->next;
+		if (p == &cache_chain)
+			return NULL;
+	}
+	return list_entry(p, struct kmem_cache, next);
+}
+
+static inline int add_caller(unsigned long *n, unsigned long v)
+{
+	unsigned long *p;
+	int l;
+	if (!v)
+		return 1;
+	l = n[1];
+	p = n + 2;
+	while (l) {
+		int i = l/2;
+		unsigned long *q = p + 2 * i;
+		if (*q == v) {
+			q[1]++;
+			return 1;
+		}
+		if (*q > v) {
+			l = i;
+		} else {
+			p = q + 2;
+			l -= i + 1;
+		}
+	}
+	if (++n[1] == n[0])
+		return 0;
+	memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
+	p[0] = v;
+	p[1] = 1;
+	return 1;
+}
+
+static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+{
+	void *p;
+	int i;
+	if (n[0] == n[1])
+		return;
+	for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
+		if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+			continue;
+		if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
+			return;
+	}
+}
+
+static void show_symbol(struct seq_file *m, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+	char *modname;
+	const char *name;
+	unsigned long offset, size;
+	char namebuf[KSYM_NAME_LEN+1];
+
+	name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+
+	if (name) {
+		seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
+		if (modname)
+			seq_printf(m, " [%s]", modname);
+		return;
+	}
+#endif
+	seq_printf(m, "%p", (void *)address);
+}
+
+static int leaks_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *cachep = p;
+	struct list_head *q;
+	struct slab *slabp;
+	struct kmem_list3 *l3;
+	const char *name;
+	unsigned long *n = m->private;
+	int node;
+	int i;
+
+	if (!(cachep->flags & SLAB_STORE_USER))
+		return 0;
+	if (!(cachep->flags & SLAB_RED_ZONE))
+		return 0;
+
+	/* OK, we can do it */
+
+	n[1] = 0;
+
+	for_each_online_node(node) {
+		l3 = cachep->nodelists[node];
+		if (!l3)
+			continue;
+
+		check_irq_on();
+		spin_lock_irq(&l3->list_lock);
+
+		list_for_each(q, &l3->slabs_full) {
+			slabp = list_entry(q, struct slab, list);
+			handle_slab(n, cachep, slabp);
+		}
+		list_for_each(q, &l3->slabs_partial) {
+			slabp = list_entry(q, struct slab, list);
+			handle_slab(n, cachep, slabp);
+		}
+		spin_unlock_irq(&l3->list_lock);
+	}
+	name = cachep->name;
+	if (n[0] == n[1]) {
+		/* Increase the buffer size */
+		mutex_unlock(&cache_chain_mutex);
+		m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
+		if (!m->private) {
+			/* Too bad, we are really out */
+			m->private = n;
+			mutex_lock(&cache_chain_mutex);
+			return -ENOMEM;
+		}
+		*(unsigned long *)m->private = n[0] * 2;
+		kfree(n);
+		mutex_lock(&cache_chain_mutex);
+		/* Now make sure this entry will be retried */
+		m->count = m->size;
+		return 0;
+	}
+	for (i = 0; i < n[1]; i++) {
+		seq_printf(m, "%s: %lu ", name, n[2*i+3]);
+		show_symbol(m, n[2*i+2]);
+		seq_putc(m, '\n');
+	}
+	return 0;
+}
+
+struct seq_operations slabstats_op = {
+	.start = leaks_start,
+	.next = s_next,
+	.stop = s_stop,
+	.show = leaks_show,
+};
+#endif
 #endif
 
 /**
diff --git a/mm/util.c b/mm/util.c
index 49e29f751b50..b68d3d7d0359 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -11,7 +11,7 @@
  */
 void *kzalloc(size_t size, gfp_t flags)
 {
-	void *ret = kmalloc(size, flags);
+	void *ret = ____kmalloc(size, flags);
 	if (ret)
 		memset(ret, 0, size);
 	return ret;
@@ -33,7 +33,7 @@ char *kstrdup(const char *s, gfp_t gfp)
 		return NULL;
 
 	len = strlen(s) + 1;
-	buf = kmalloc(len, gfp);
+	buf = ____kmalloc(len, gfp);
 	if (buf)
 		memcpy(buf, s, len);
 	return buf;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c9f878454531..09464fa8d72f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -149,7 +149,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
-	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
 
-- 
cgit v1.2.3


From e3df18983ea090a2e00dd5c2c6167bb431a0e0a2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 25 Mar 2006 03:06:53 -0800
Subject: [PATCH] jbd: embed j_commit_timer in journal struct

The kjournald timer is currently on the kernel thread's stack and the journal
structure points at it.  Save a pointer hop by moving the timer into the
journal structure.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c     | 19 +++++++++----------
 fs/jbd/transaction.c |  4 ++--
 include/linux/jbd.h  |  4 +++-
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 95a628d8cac8..6bd4647dc5a1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -111,18 +111,17 @@ static void commit_timeout(unsigned long __data)
 
 static int kjournald(void *arg)
 {
-	journal_t *journal = (journal_t *) arg;
+	journal_t *journal = arg;
 	transaction_t *transaction;
-	struct timer_list timer;
 
 	daemonize("kjournald");
 
-	/* Set up an interval timer which can be used to trigger a
-           commit wakeup after the commit interval expires */
-	init_timer(&timer);
-	timer.data = (unsigned long) current;
-	timer.function = commit_timeout;
-	journal->j_commit_timer = &timer;
+	/*
+	 * Set up an interval timer which can be used to trigger a commit wakeup
+	 * after the commit interval expires
+	 */
+	setup_timer(&journal->j_commit_timer, commit_timeout,
+			(unsigned long)current);
 
 	/* Record that the journal thread is running */
 	journal->j_task = current;
@@ -146,7 +145,7 @@ loop:
 	if (journal->j_commit_sequence != journal->j_commit_request) {
 		jbd_debug(1, "OK, requests differ\n");
 		spin_unlock(&journal->j_state_lock);
-		del_timer_sync(journal->j_commit_timer);
+		del_timer_sync(&journal->j_commit_timer);
 		journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
 		goto loop;
@@ -203,7 +202,7 @@ loop:
 
 end_loop:
 	spin_unlock(&journal->j_state_lock);
-	del_timer_sync(journal->j_commit_timer);
+	del_timer_sync(&journal->j_commit_timer);
 	journal->j_task = NULL;
 	wake_up(&journal->j_wait_done_commit);
 	jbd_debug(1, "Journal thread exiting.\n");
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5fc40888f4cf..ada31fa272e3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 
 	/* Set up the commit timer for the new transaction. */
-	journal->j_commit_timer->expires = transaction->t_expires;
-	add_timer(journal->j_commit_timer);
+	journal->j_commit_timer.expires = transaction->t_expires;
+	add_timer(&journal->j_commit_timer);
 
 	J_ASSERT(journal->j_running_transaction == NULL);
 	journal->j_running_transaction = transaction;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 2ccbfb6340ba..4fc7dffd66ef 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -29,6 +29,8 @@
 #include <linux/stddef.h>
 #include <linux/bit_spinlock.h>
 #include <linux/mutex.h>
+#include <linux/timer.h>
+
 #include <asm/semaphore.h>
 #endif
 
@@ -787,7 +789,7 @@ struct journal_s
 	unsigned long		j_commit_interval;
 
 	/* The timer used to wakeup the commit thread: */
-	struct timer_list	*j_commit_timer;
+	struct timer_list	j_commit_timer;
 
 	/*
 	 * The revoke table: maintains the list of revoked blocks in the
-- 
cgit v1.2.3


From 8d8c85117fbcbaea7718870ad4b1ddd12940d9b0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 25 Mar 2006 03:06:53 -0800
Subject: [PATCH] jbd: convert kjournald to kthread API

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 6bd4647dc5a1..7f96b5cb6781 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -33,9 +33,11 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/pagemap.h>
+#include <linux/kthread.h>
+#include <linux/proc_fs.h>
+
 #include <asm/uaccess.h>
 #include <asm/page.h>
-#include <linux/proc_fs.h>
 
 EXPORT_SYMBOL(journal_start);
 EXPORT_SYMBOL(journal_restart);
@@ -114,8 +116,6 @@ static int kjournald(void *arg)
 	journal_t *journal = arg;
 	transaction_t *transaction;
 
-	daemonize("kjournald");
-
 	/*
 	 * Set up an interval timer which can be used to trigger a commit wakeup
 	 * after the commit interval expires
@@ -211,7 +211,7 @@ end_loop:
 
 static void journal_start_thread(journal_t *journal)
 {
-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	kthread_run(kjournald, journal, "kjournald");
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
 }
 
-- 
cgit v1.2.3


From 4af4c52f34606bdaab6930a845550c6fb02078a4 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:06:54 -0800
Subject: [PATCH] Missed error checking for intent's filp in open_namei().

It seems there is error check missing in open_namei for errors returned
through intent.open.file (from lookup_instantiate_filp).

If there is plain open performed, then such a check done inside
__path_lookup_intent_open called from path_lookup_open(), but when the open
is performed with O_CREAT flag set, then __path_lookup_intent_open is only
called with LOOKUP_PARENT set where no file opening can occur yet.

Later on lookup_hash is called where exact opening might take place and
intent.open.file may be filled.  If it is filled with error value of some
sort, then we get kernel attempting to dereference this error value as
address (and corresponding oops) in nameidata_to_filp() called from
filp_open().

While this is relatively simple to workaround in ->lookup() method by just
checking lookup_instantiate_filp() return value and returning error as
needed, this is not so easy in ->d_revalidate(), where we can only return
"yes, dentry is valid" or "no, dentry is invalid, perform full lookup
again", and just returning 0 on error would cause extra lookup (with
potential extra costly RPCs).

So in short, I believe that there should be no difference in error handling
for opening a file and creating a file in open_namei() and propose this
simple patch as a solution.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index c72b940797fc..1baf1b06fe47 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1628,6 +1628,12 @@ do_last:
 		goto exit;
 	}
 
+	if (IS_ERR(nd->intent.open.file)) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		error = PTR_ERR(nd->intent.open.file);
+		goto exit_dput;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!path.dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
-- 
cgit v1.2.3


From bdfc326614b90e7bc47ee4a8fed05988555f0169 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Mar 2006 03:06:56 -0800
Subject: [PATCH] fs/inode.c: make iprune_mutex static

There's no reason for iprune_mutex being global.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         | 2 +-
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index a51c671c54cf..85da11044adc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -91,7 +91,7 @@ DEFINE_SPINLOCK(inode_lock);
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
  */
-DEFINE_MUTEX(iprune_mutex);
+static DEFINE_MUTEX(iprune_mutex);
 
 /*
  * Statistics gathering..
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 215696a0f16f..7c750312261b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1558,7 +1558,6 @@ extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int remove_suid(struct dentry *);
 extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
-extern struct mutex iprune_mutex;
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void remove_inode_hash(struct inode *);
-- 
cgit v1.2.3


From 23f9e0f891c9b159a199629d4426f6ae0c383508 Mon Sep 17 00:00:00 2001
From: Alexander Zarochentzev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:06:57 -0800
Subject: [PATCH] reiserfs: fix transaction overflowing

This patch fixes a bug in reiserfs truncate.  A transaction might overflow
when truncating long highly fragmented file.  The fix is to split
truncation into several transactions to avoid overflowing.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Cc; Charles McColgan <cm@chuck.net>
Cc: Alexander Zarochentsev <zam@namesys.com>
Cc: Hans Reiser <reiser@namesys.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/stree.c         | 210 +++++++++++++++-----------------------------
 include/linux/reiserfs_fs.h |   5 ++
 2 files changed, 77 insertions(+), 138 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index e2d08d7bcffc..d2b25e1ba6e9 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -981,6 +981,8 @@ static inline int prepare_for_direntry_item(struct path *path,
 	return M_CUT;
 }
 
+#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
+
 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
     If the path points to an indirect item, remove some number of its unformatted nodes.
     In case of file truncate calculate whether this item must be deleted/truncated or last
@@ -1020,148 +1022,79 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
 
 	/* Case of an indirect item. */
 	{
-		int n_unfm_number,	/* Number of the item unformatted nodes. */
-		 n_counter, n_blk_size;
-		__le32 *p_n_unfm_pointer;	/* Pointer to the unformatted node number. */
-		__u32 tmp;
-		struct item_head s_ih;	/* Item header. */
-		char c_mode;	/* Returned mode of the balance. */
-		int need_research;
-
-		n_blk_size = p_s_sb->s_blocksize;
-
-		/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
-		do {
-			need_research = 0;
-			p_s_bh = PATH_PLAST_BUFFER(p_s_path);
-			/* Copy indirect item header to a temp variable. */
-			copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
-			/* Calculate number of unformatted nodes in this item. */
-			n_unfm_number = I_UNFM_NUM(&s_ih);
-
-			RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
-			       pos_in_item(p_s_path) + 1 != n_unfm_number,
-			       "PAP-5240: invalid item %h "
-			       "n_unfm_number = %d *p_n_pos_in_item = %d",
-			       &s_ih, n_unfm_number, pos_in_item(p_s_path));
-
-			/* Calculate balance mode and position in the item to remove unformatted nodes. */
-			if (n_new_file_length == max_reiserfs_offset(inode)) {	/* Case of delete. */
-				pos_in_item(p_s_path) = 0;
-				*p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
-				c_mode = M_DELETE;
-			} else {	/* Case of truncate. */
-				if (n_new_file_length < le_ih_k_offset(&s_ih)) {
-					pos_in_item(p_s_path) = 0;
-					*p_n_cut_size =
-					    -(IH_SIZE + ih_item_len(&s_ih));
-					c_mode = M_DELETE;	/* Delete this item. */
-				} else {
-					/* indirect item must be truncated starting from *p_n_pos_in_item-th position */
-					pos_in_item(p_s_path) =
-					    (n_new_file_length + n_blk_size -
-					     le_ih_k_offset(&s_ih)) >> p_s_sb->
-					    s_blocksize_bits;
-
-					RFALSE(pos_in_item(p_s_path) >
-					       n_unfm_number,
-					       "PAP-5250: invalid position in the item");
-
-					/* Either convert last unformatted node of indirect item to direct item or increase
-					   its free space.  */
-					if (pos_in_item(p_s_path) ==
-					    n_unfm_number) {
-						*p_n_cut_size = 0;	/* Nothing to cut. */
-						return M_CONVERT;	/* Maybe convert last unformatted node to the direct item. */
-					}
-					/* Calculate size to cut. */
-					*p_n_cut_size =
-					    -(ih_item_len(&s_ih) -
-					      pos_in_item(p_s_path) *
-					      UNFM_P_SIZE);
-
-					c_mode = M_CUT;	/* Cut from this indirect item. */
-				}
-			}
+	    int blk_size = p_s_sb->s_blocksize;
+	    struct item_head s_ih;
+	    int need_re_search;
+	    int delete = 0;
+	    int result = M_CUT;
+	    int pos = 0;
+
+	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {
+		/* prepare_for_delete_or_cut() is called by
+		 * reiserfs_delete_item() */
+		n_new_file_length = 0;
+		delete = 1;
+	    }
+
+	    do {
+		need_re_search = 0;
+		*p_n_cut_size = 0;
+		p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+		pos = I_UNFM_NUM(&s_ih);
 
-			RFALSE(n_unfm_number <= pos_in_item(p_s_path),
-			       "PAP-5260: invalid position in the indirect item");
-
-			/* pointers to be cut */
-			n_unfm_number -= pos_in_item(p_s_path);
-			/* Set pointer to the last unformatted node pointer that is to be cut. */
-			p_n_unfm_pointer =
-			    (__le32 *) B_I_PITEM(p_s_bh,
-						 &s_ih) + I_UNFM_NUM(&s_ih) -
-			    1 - *p_n_removed;
-
-			/* We go through the unformatted nodes pointers of the indirect
-			   item and look for the unformatted nodes in the cache. If we
-			   found some of them we free it, zero corresponding indirect item
-			   entry and log buffer containing that indirect item. For this we
-			   need to prepare last path element for logging. If some
-			   unformatted node has b_count > 1 we must not free this
-			   unformatted node since it is in use. */
-			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
-			// note: path could be changed, first line in for loop takes care
-			// of it
+		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
+		    __u32 *unfm, block;
 
-			for (n_counter = *p_n_removed;
-			     n_counter < n_unfm_number;
-			     n_counter++, p_n_unfm_pointer--) {
+		    /* Each unformatted block deletion may involve one additional
+		     * bitmap block into the transaction, thereby the initial
+		     * journal space reservation might not be enough. */
+		    if (!delete && (*p_n_cut_size) != 0 &&
+			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
+			break;
+		    }
 
-				cond_resched();
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-				RFALSE(p_n_unfm_pointer <
-				       (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
-				       || p_n_unfm_pointer >
-				       (__le32 *) B_I_PITEM(p_s_bh,
-							    &s_ih) +
-				       I_UNFM_NUM(&s_ih) - 1,
-				       "vs-5265: pointer out of range");
-
-				/* Hole, nothing to remove. */
-				if (!get_block_num(p_n_unfm_pointer, 0)) {
-					(*p_n_removed)++;
-					continue;
-				}
+		    unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
+		    block = get_block_num(unfm, 0);
 
-				(*p_n_removed)++;
+		    if (block != 0) {
+			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
+			put_block_num(unfm, 0, 0);
+			journal_mark_dirty (th, p_s_sb, p_s_bh);
+			reiserfs_free_block(th, inode, block, 1);
+		    }
 
-				tmp = get_block_num(p_n_unfm_pointer, 0);
-				put_block_num(p_n_unfm_pointer, 0, 0);
-				journal_mark_dirty(th, p_s_sb, p_s_bh);
-				reiserfs_free_block(th, inode, tmp, 1);
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-			}
+		    cond_resched();
 
-			/* a trick.  If the buffer has been logged, this
-			 ** will do nothing.  If we've broken the loop without
-			 ** logging it, it will restore the buffer
-			 **
-			 */
-			reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
-
-			/* This loop can be optimized. */
-		} while ((*p_n_removed < n_unfm_number || need_research) &&
-			 search_for_position_by_key(p_s_sb, p_s_item_key,
-						    p_s_path) ==
-			 POSITION_FOUND);
-
-		RFALSE(*p_n_removed < n_unfm_number,
-		       "PAP-5310: indirect item is not found");
-		RFALSE(item_moved(&s_ih, p_s_path),
-		       "after while, comp failed, retry");
-
-		if (c_mode == M_CUT)
-			pos_in_item(p_s_path) *= UNFM_P_SIZE;
-		return c_mode;
+		    if (item_moved (&s_ih, p_s_path))  {
+			need_re_search = 1;
+			break;
+		    }
+
+		    pos --;
+		    (*p_n_removed) ++;
+		    (*p_n_cut_size) -= UNFM_P_SIZE;
+
+		    if (pos == 0) {
+			(*p_n_cut_size) -= IH_SIZE;
+			result = M_DELETE;
+			break;
+		    }
+		}
+		/* a trick.  If the buffer has been logged, this will do nothing.  If
+		** we've broken the loop without logging it, it will restore the
+		** buffer */
+		reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
+	    } while (need_re_search &&
+		     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
+	    pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
+
+	    if (*p_n_cut_size == 0) {
+		/* Nothing were cut. maybe convert last unformatted node to the
+		 * direct item? */
+		result = M_CONVERT;
+	    }
+	    return result;
 	}
 }
 
@@ -1948,7 +1881,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 		 ** sure the file is consistent before ending the current trans
 		 ** and starting a new one
 		 */
-		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
+		if (journal_transaction_should_end(th, 0) ||
+		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
 			int orig_len_alloc = th->t_blocks_allocated;
 			decrement_counters_in_path(&s_search_path);
 
@@ -1962,7 +1896,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 			if (err)
 				goto out;
 			err = journal_begin(th, p_s_inode->i_sb,
-					    JOURNAL_PER_BALANCE_CNT * 6);
+					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
 			if (err)
 				goto out;
 			reiserfs_update_inode_transaction(p_s_inode);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index dad78cecfd20..912f1b7cb18f 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1704,6 +1704,11 @@ static inline int reiserfs_transaction_running(struct super_block *s)
 	return 0;
 }
 
+static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
+{
+	return th->t_blocks_allocated - th->t_blocks_logged;
+}
+
 int reiserfs_async_progress_wait(struct super_block *s);
 
 struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
-- 
cgit v1.2.3


From a44c94a7b82a425b73384c104d5cb3dd3caa075e Mon Sep 17 00:00:00 2001
From: Alexander Zarochentsev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:06:59 -0800
Subject: [PATCH] reiserfs: handle trans_id overflow

Reiserfs does not handle transaction ID overflow correctly.  Transaction ID
== 0 causes reiserfs to crash.  The patch fixes all places where the
transaction ID is incremented.

Signed-off-by: Alexander Zarochentsev <zam@namesys.com>
Signed-off-by: Hans Reiser <reiser@namesys.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 5a9d2722fa0a..1b73529b8099 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2227,6 +2227,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
 	journal->j_last_flush_trans_id = trans_id;
 	journal->j_trans_id = trans_id + 1;
+	/* check for trans_id overflow */
+	if (journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	brelse(c_bh);
 	brelse(d_bh);
 	kfree(log_blocks);
@@ -2450,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
 		journal->j_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+		/* check for trans_id overflow */
+		if (journal->j_trans_id == 0)
+			journal->j_trans_id = 10;
 		journal->j_last_flush_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id);
 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -3873,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	int cur_write_start = 0;	/* start index of current log write */
 	int old_start;
 	int i;
-	int flush = flags & FLUSH_ALL;
-	int wait_on_commit = flags & WAIT;
+	int flush;
+	int wait_on_commit;
 	struct reiserfs_journal_list *jl, *temp_jl;
 	struct list_head *entry, *safe;
 	unsigned long jindex;
@@ -3884,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	BUG_ON(th->t_refcount > 1);
 	BUG_ON(!th->t_trans_id);
 
+	/* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+	if (th->t_trans_id == ~0UL)
+		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+	flush = flags & FLUSH_ALL;
+	wait_on_commit = flags & WAIT;
+
 	put_fs_excl();
 	current->journal_info = th->t_handle_save;
 	reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4105,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	journal->j_first = NULL;
 	journal->j_len = 0;
 	journal->j_trans_start_time = 0;
-	journal->j_trans_id++;
+	/* check for trans_id overflow */
+	if (++journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
 	journal->j_must_wait = 0;
 	journal->j_len_alloc = 0;
-- 
cgit v1.2.3


From 619d5d8a2b3f800ea3a0301a58ede570684956b0 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sat, 25 Mar 2006 03:07:00 -0800
Subject: [PATCH] reiserfs: reiserfs_file_write() will lose error code when a
 0-length write occurs w/ O_SYNC

When an error occurs in reiserfs_file_write before any data is written, and
O_SYNC is set, the return code of generic_osync_write will overwrite the
error code, losing it.

This patch ensures that generic_osync_inode() doesn't run under an error
condition, losing the error.  This duplicates the logic from
generic_file_buffered_write().

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Alexander Zarochentsev <zam@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index be12879bb179..044de8be39a7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1546,10 +1546,10 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		}
 	}
 
-	if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
-		res =
-		    generic_osync_inode(inode, file->f_mapping,
-					OSYNC_METADATA | OSYNC_DATA);
+	if (likely(res >= 0) &&
+	    (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
+		res = generic_osync_inode(inode, file->f_mapping,
+		                          OSYNC_METADATA | OSYNC_DATA);
 
 	mutex_unlock(&inode->i_mutex);
 	reiserfs_async_progress_wait(inode->i_sb);
-- 
cgit v1.2.3


From b500531e6f5f234ed267bd7060ee06d144faf0ca Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:07:01 -0800
Subject: [PATCH] Introduce FMODE_EXEC file flag

Introduce FMODE_EXEC file flag, to indicate that file is being opened for
execution.  This is useful for distributed filesystems to maintain
consistent behavior for returning ETXTBUSY when opening for write and
execution happens on different nodes.

akpm:

  Needed by Lustre at present.  I assume their objective to to work towards
  being able to install Lustre on an unmodified distro kernel, which seems
  sane.  It should have zero runtime cost.

  Trond and Chuck indicate that NFS4 can probably use this too, for the same
  thing.

  Steven says it's also on the GFS todo list.

Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Chuck Lever <cel@citi.umich.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c          | 4 ++--
 include/linux/fs.h | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 0b515ac53134..d8c477a56257 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	if (error)
 		goto out;
 
@@ -477,7 +477,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	file = ERR_PTR(err);
 
 	if (!err) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7c750312261b..21e8cf795c38 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,11 @@ extern int dir_notify_enable;
 #define FMODE_PREAD	8
 #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
 
+/* File is being opened for execution. Primary users of this flag are
+   distributed filesystems that can use it to achieve correct ETXTBUSY
+   behavior for cross-node execution/opening_for_writing of files */
+#define FMODE_EXEC	16
+
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
-- 
cgit v1.2.3


From 9a56c213929d83139fd1e12727e1037d71b519f8 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:07:02 -0800
Subject: [PATCH] Add lookup_instantiate_filp usage warning

I think it would be nice to put an usage warning in header of
lookup_instantiate_filp() to indicate it is unsafe to use it on anything
but regular files (even that is potentially unsafe, but there your ->open()
is usually in your hands anyway), so that others won't fall into the same
trap I did.

Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 1091dadd6c38..7d02d19bd0a2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -890,6 +890,10 @@ EXPORT_SYMBOL(filp_open);
  * a fully instantiated struct file to the caller.
  * This function is meant to be called from within a filesystem's
  * lookup method.
+ * Beware of calling it for non-regular files! Those ->open methods might block
+ * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
+ * leading to a deadlock, as nobody can open that fifo anymore, because
+ * another process to open fifo will block on locked parent when doing lookup).
  * Note that in case of error, nd->intent.open.file is destroyed, but the
  * path information remains valid.
  * If the open callback is set to NULL, then the standard f_op->open()
-- 
cgit v1.2.3


From c32ccd87bfd1414b0aabfcd8dbc7539ad23bcbaa Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Mar 2006 03:07:09 -0800
Subject: [PATCH] inotify: lock avoidance with parent watch status in dentry

Previous inotify work avoidance is good when inotify is completely unused,
but it breaks down if even a single watch is in place anywhere in the
system.  Robin Holt notices that udev is one such culprit - it slows down a
512-thread application on a 512 CPU system from 6 seconds to 22 minutes.

Solve this by adding a flag in the dentry that tells inotify whether or not
its parent inode has a watch on it.  Event queueing to parent will skip
taking locks if this flag is cleared.  Setting and clearing of this flag on
all child dentries versus event delivery: this is no in terms of race
cases, and that was shown to be equivalent to always performing the check.

The essential behaviour is that activity occuring _after_ a watch has been
added and _before_ it has been removed, will generate events.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c              |  8 +++++
 fs/inotify.c             | 87 ++++++++++++++++++++++++++++++++++++++++++------
 include/linux/dcache.h   |  2 ++
 include/linux/fsnotify.h | 19 +++++++++++
 include/linux/inotify.h  | 11 ++++++
 5 files changed, 117 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 139e5fd22fa6..0f7ec12d65ff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 	if (inode)
 		list_add(&entry->d_alias, &inode->i_dentry);
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 	list_add(&entry->d_alias, &inode->i_dentry);
 do_negative:
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 	return NULL;
@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+			fsnotify_d_instantiate(new, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
 			d_rehash(dentry);
@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 			/* d_instantiate takes dcache_lock, so we do it by hand */
 			list_add(&dentry->d_alias, &inode->i_dentry);
 			dentry->d_inode = inode;
+			fsnotify_d_instantiate(dentry, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry)
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
 	if (atomic_read(&dentry->d_count) == 1) {
+		/* remove this and other inotify debug checks after 2.6.18 */
+		dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+
 		dentry_iput(dentry);
 		fsnotify_nameremove(dentry, isdir);
 		return;
@@ -1342,6 +1349,7 @@ already_unhashed:
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 	spin_unlock(&target->d_lock);
+	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
diff --git a/fs/inotify.c b/fs/inotify.c
index 0ee39ef591c6..a61e93e17853 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -38,7 +38,6 @@
 #include <asm/ioctls.h>
 
 static atomic_t inotify_cookie;
-static atomic_t inotify_watches;
 
 static kmem_cache_t *watch_cachep;
 static kmem_cache_t *event_cachep;
@@ -380,6 +379,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 	return error;
 }
 
+/*
+ * inotify_inode_watched - returns nonzero if there are watches on this inode
+ * and zero otherwise.  We call this lockless, we do not care if we race.
+ */
+static inline int inotify_inode_watched(struct inode *inode)
+{
+	return !list_empty(&inode->inotify_watches);
+}
+
+/*
+ * Get child dentry flag into synch with parent inode.
+ * Flag should always be clear for negative dentrys.
+ */
+static void set_dentry_child_flags(struct inode *inode, int watched)
+{
+	struct dentry *alias;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode) {
+				WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+				continue;
+			}
+			spin_lock(&child->d_lock);
+			if (watched) {
+				WARN_ON(child->d_flags &
+						DCACHE_INOTIFY_PARENT_WATCHED);
+				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+			} else {
+				WARN_ON(!(child->d_flags &
+					DCACHE_INOTIFY_PARENT_WATCHED));
+				child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
+			}
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
 /*
  * create_watch - creates a watch on the given device.
  *
@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 	get_inotify_watch(watch);
 
 	atomic_inc(&dev->user->inotify_watches);
-	atomic_inc(&inotify_watches);
 
 	return watch;
 }
@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 	list_del(&watch->i_list);
 	list_del(&watch->d_list);
 
+	if (!inotify_inode_watched(watch->inode))
+		set_dentry_child_flags(watch->inode, 0);
+
 	atomic_dec(&dev->user->inotify_watches);
-	atomic_dec(&inotify_watches);
 	idr_remove(&dev->idr, watch->wd);
 	put_inotify_watch(watch);
 }
@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
 	remove_watch_no_event(watch, dev);
 }
 
+/* Kernel API */
+
 /*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
+ * inotify_d_instantiate - instantiate dcache entry for inode
  */
-static inline int inotify_inode_watched(struct inode *inode)
+void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
 {
-	return !list_empty(&inode->inotify_watches);
+	struct dentry *parent;
+
+	if (!inode)
+		return;
+
+	WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+	spin_lock(&entry->d_lock);
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	spin_unlock(&entry->d_lock);
 }
 
-/* Kernel API */
+/*
+ * inotify_d_move - dcache entry has been moved
+ */
+void inotify_d_move(struct dentry *entry)
+{
+	struct dentry *parent;
+
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	else
+		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+}
 
 /**
  * inotify_inode_queue_event - queue an event to all watches on this inode
@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 	struct dentry *parent;
 	struct inode *inode;
 
-	if (!atomic_read (&inotify_watches))
+	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
 		return;
 
 	spin_lock(&dentry->d_lock);
@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		goto out;
 	}
 
+	if (!inotify_inode_watched(inode))
+		set_dentry_child_flags(inode, 1);
+
 	/* Add the watch to the device's and the inode's list */
 	list_add(&watch->d_list, &dev->watches);
 	list_add(&watch->i_list, &inode->inotify_watches);
@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
 	inotify_max_user_watches = 8192;
 
 	atomic_set(&inotify_cookie, 0);
-	atomic_set(&inotify_watches, 0);
 
 	watch_cachep = kmem_cache_create("inotify_watch_cache",
 					 sizeof(struct inotify_watch),
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4361f3789975..d10bd30c337e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -162,6 +162,8 @@ d_iput:		no		no		no       yes
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
 
+#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
+
 extern spinlock_t dcache_lock;
 
 /**
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 03b8e7932b83..f7e517c1f1bd 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -16,6 +16,25 @@
 #include <linux/dnotify.h>
 #include <linux/inotify.h>
 
+/*
+ * fsnotify_d_instantiate - instantiate a dentry for inode
+ * Called with dcache_lock held.
+ */
+static inline void fsnotify_d_instantiate(struct dentry *entry,
+						struct inode *inode)
+{
+	inotify_d_instantiate(entry, inode);
+}
+
+/*
+ * fsnotify_d_move - entry has been moved
+ * Called with dcache_lock and entry->d_lock held.
+ */
+static inline void fsnotify_d_move(struct dentry *entry)
+{
+	inotify_d_move(entry);
+}
+
 /*
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 267c88b5f742..09e00433c78e 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -71,6 +71,8 @@ struct inotify_event {
 
 #ifdef CONFIG_INOTIFY
 
+extern void inotify_d_instantiate(struct dentry *, struct inode *);
+extern void inotify_d_move(struct dentry *);
 extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
 				      const char *);
 extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
@@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void);
 
 #else
 
+static inline void inotify_d_instantiate(struct dentry *dentry,
+					struct inode *inode)
+{
+}
+
+static inline void inotify_d_move(struct dentry *dentry)
+{
+}
+
 static inline void inotify_inode_queue_event(struct inode *inode,
 					     __u32 mask, __u32 cookie,
 					     const char *filename)
-- 
cgit v1.2.3


From cd02b966bfcad12d1b2e265dc8dbc331d4c184c4 Mon Sep 17 00:00:00 2001
From: "Vladimir V. Saveliev" <vs@namesys.com>
Date: Sat, 25 Mar 2006 03:07:15 -0800
Subject: [PATCH] reiserfs: cleanups

Clean up several places where gcc issues warnings when -W is specified.
Thanks to Neil for finding that.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/fix_node.c         | 4 +---
 fs/reiserfs/super.c            | 8 ++++----
 include/linux/reiserfs_xattr.h | 6 +++---
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index aa22588019ec..5600d3d60cf7 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -191,9 +191,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
 					       "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
 					       key, vn->vn_affected_item_num,
 					       vn->vn_mode, M_DELETE);
-			} else
-				/* we can delete directory item, that has only one directory entry in it */
-				;
+			}
 		}
 #endif
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 93e6ef9360e3..cae2abbc0c71 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -685,14 +685,14 @@ static const arg_desc_t logging_mode[] = {
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
 	{"writeback", 1 << REISERFS_DATA_WRITEBACK,
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for -o barrier= */
 static const arg_desc_t barrier_mode[] = {
 	{"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
 	{"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for "-o block-allocator=" and bits which are to be set in
@@ -890,7 +890,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
 		{"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
 #endif
-		{"nolog",},	/* This is unsupported */
+		{.option_name = "nolog"},
 		{"replayonly",.setmask = 1 << REPLAYONLY},
 		{"block-allocator",.arg_required = 'a',.values = balloc},
 		{"data",.arg_required = 'd',.values = logging_mode},
@@ -908,7 +908,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"grpjquota",.arg_required =
 		 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
 		{"jqfmt",.arg_required = 'f',.values = NULL},
-		{NULL,}
+		{.option_name = NULL}
 	};
 
 	*blocks = 0;
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 87280eb6083d..5353afb11db3 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -101,13 +101,13 @@ static inline void reiserfs_mark_inode_private(struct inode *inode)
 #else
 
 #define is_reiserfs_priv_object(inode) 0
-#define reiserfs_mark_inode_private(inode)
+#define reiserfs_mark_inode_private(inode) do {;} while(0)
 #define reiserfs_getxattr NULL
 #define reiserfs_setxattr NULL
 #define reiserfs_listxattr NULL
 #define reiserfs_removexattr NULL
-#define reiserfs_write_lock_xattrs(sb)
-#define reiserfs_write_unlock_xattrs(sb)
+#define reiserfs_write_lock_xattrs(sb) do {;} while(0)
+#define reiserfs_write_unlock_xattrs(sb) do {;} while(0)
 #define reiserfs_read_lock_xattrs(sb)
 #define reiserfs_read_unlock_xattrs(sb)
 
-- 
cgit v1.2.3


From 5930860296ca438071d3824bf7306ad0dfd33fc1 Mon Sep 17 00:00:00 2001
From: Alexander Zarochentsev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:07:16 -0800
Subject: [PATCH] reiserfs: use balance_dirty_pages_ratelimited_nr in
 reiserfs_file_write()

Use the new balance_dirty_pages_ratelimited_nr in reiserfs "largeio" file
write.

Signed-off-by: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 044de8be39a7..d0c1e865963e 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1532,7 +1532,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		buf += write_bytes;
 		*ppos = pos += write_bytes;
 		count -= write_bytes;
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 	}
 
 	/* this is only true on error */
-- 
cgit v1.2.3


From 27979bb2ff748613dba96ae66392a76fb0678527 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@swtch.com>
Date: Sat, 25 Mar 2006 03:07:23 -0800
Subject: [PATCH] v9fs: consolidate trans_sock into trans_fd

Here is a new trans_fd.c that replaces the current trans_fd.c and
trans_sock.c.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/Makefile     |   1 -
 fs/9p/trans_fd.c   | 297 +++++++++++++++++++++++++++++++----------------
 fs/9p/trans_sock.c | 334 -----------------------------------------------------
 3 files changed, 196 insertions(+), 436 deletions(-)
 delete mode 100644 fs/9p/trans_sock.c

(limited to 'fs')

diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 2f4ce43f7b6c..5db5af9dbf25 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -2,7 +2,6 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 
 9p2000-objs := \
 	trans_fd.o \
-	trans_sock.o \
 	mux.o \
 	9p.o \
 	conv.o \
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 5b2ce21b10fa..8029844d6b28 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -1,10 +1,13 @@
 /*
  * linux/fs/9p/trans_fd.c
  *
- * File Descriptor Transport Layer
+ * Fd transport layer.  Includes deprecated socket layer.
  *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -25,6 +28,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
@@ -40,89 +44,119 @@
 #include "v9fs.h"
 #include "transport.h"
 
+#define V9FS_PORT 564
+
 struct v9fs_trans_fd {
-	struct file *in_file;
-	struct file *out_file;
+	struct file *rd;
+	struct file *wr;
 };
 
 /**
- * v9fs_fd_recv - receive from a socket
+ * v9fs_fd_read- read from a fd
  * @v9ses: session information
  * @v: buffer to receive data into
  * @len: size of receive buffer
  *
  */
-
-static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+	int ret;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking read ...\n");
+
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
+	return ret;
 }
 
 /**
- * v9fs_fd_send - send to a socket
+ * v9fs_fd_write - write to a socket
  * @v9ses: session information
  * @v: buffer to send data from
  * @len: size of send buffer
  *
  */
-
-static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
-	mm_segment_t oldfs = get_fs();
-	int ret = 0;
+	int ret;
+	mm_segment_t oldfs;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
+
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking write ...\n");
 
 	oldfs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
 	set_fs(oldfs);
 
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
 	return ret;
 }
 
-/**
- * v9fs_fd_init - initialize file descriptor transport
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
 {
-	struct v9fs_trans_fd *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
+	int ret, n;
+	struct v9fs_trans_fd *ts;
+	mm_segment_t oldfs;
 
-	if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
-		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return -ENOPROTOOPT;
-	}
+	if (!trans || trans->status != Connected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-	if (!ts)
-		return -ENOMEM;
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	ts->in_file = fget( v9ses->rfdno );
-	ts->out_file = fget( v9ses->wfdno );
+	oldfs = get_fs();
+	set_fs(get_ds());
 
-	if (!ts->in_file || !ts->out_file) {
-		if (ts->in_file)
-			fput(ts->in_file);
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		goto end;
 
-		if (ts->out_file)
-			fput(ts->out_file);
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0) {
+			ret = n;
+			goto end;
+		}
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+	}
 
+      end:
+	set_fs(oldfs);
+	return ret;
+}
+
+static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
+{
+	struct v9fs_transport *trans = v9ses->transport;
+	struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
+					   GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->rd = fget(rfd);
+	ts->wr = fget(wfd);
+	if (!ts->rd || !ts->wr) {
+		if (ts->rd)
+			fput(ts->rd);
+		if (ts->wr)
+			fput(ts->wr);
 		kfree(ts);
 		return -EIO;
 	}
@@ -133,84 +167,145 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 	return 0;
 }
 
-
-/**
- * v9fs_fd_close - shutdown file descriptor
- * @trans: private socket structure
- *
- */
-
-static void v9fs_fd_close(struct v9fs_transport *trans)
+static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
+			char *data)
 {
-	struct v9fs_trans_fd *ts;
-
-	if (!trans)
-		return;
-
-	ts = xchg(&trans->priv, NULL);
+	if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
+		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		return -ENOPROTOOPT;
+	}
 
-	if (!ts)
-		return;
+	return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
+}
 
-	trans->status = Disconnected;
-	if (ts->in_file)
-		fput(ts->in_file);
+static int v9fs_socket_open(struct v9fs_session_info *v9ses,
+			    struct socket *csocket)
+{
+	int fd, ret;
+
+	csocket->sk->sk_allocation = GFP_NOIO;
+	if ((fd = sock_map_fd(csocket)) < 0) {
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
+		ret = fd;
+	      release_csocket:
+		sock_release(csocket);
+		return ret;
+	}
 
-	if (ts->out_file)
-		fput(ts->out_file);
+	if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
+		sockfd_put(csocket);
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
+		goto release_csocket;
+	}
 
-	kfree(ts);
+	((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
+	    O_NONBLOCK;
+	return 0;
 }
 
-static unsigned int
-v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
+			 char *data)
 {
-	int ret, n;
-	struct v9fs_trans_fd *ts;
-	mm_segment_t oldfs;
+	int ret;
+	struct socket *csocket = NULL;
+	struct sockaddr_in sin_server;
+
+	sin_server.sin_family = AF_INET;
+	sin_server.sin_addr.s_addr = in_aton(addr);
+	sin_server.sin_port = htons(v9ses->port);
+	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+
+	if (!csocket) {
+		eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
+		return -1;
+	}
 
-	if (!trans)
-		return -EIO;
+	ret = csocket->ops->connect(csocket,
+				    (struct sockaddr *)&sin_server,
+				    sizeof(struct sockaddr_in), 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_tcp: problem connecting socket to %s\n",
+			addr);
+		return ret;
+	}
 
-	ts = trans->priv;
-	if (trans->status != Connected || !ts)
-		return -EIO;
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	oldfs = get_fs();
-	set_fs(get_ds());
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	int ret;
+	struct socket *csocket;
+	struct sockaddr_un sun_server;
+
+	if (strlen(addr) > UNIX_PATH_MAX) {
+		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+			addr);
+		return -ENAMETOOLONG;
+	}
 
-	if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
-		ret = -EIO;
-		goto end;
+	sun_server.sun_family = PF_UNIX;
+	strcpy(sun_server.sun_path, addr);
+	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+			sizeof(struct sockaddr_un) - 1, 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
+			addr, ret);
+		return ret;
 	}
 
-	ret = ts->in_file->f_op->poll(ts->in_file, pt);
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	if (ts->out_file != ts->in_file) {
-		if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
-			ret = -EIO;
-			goto end;
-		}
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_fd *ts;
 
-		n = ts->out_file->f_op->poll(ts->out_file, pt);
+	if (!trans)
+		return;
 
-		ret &= ~POLLOUT;
-		n &= ~POLLIN;
+	ts = xchg(&trans->priv, NULL);
 
-		ret |= n;
-	}
+	if (!ts)
+		return;
 
-end:
-	set_fs(oldfs);
-	return ret;
+	trans->status = Disconnected;
+	if (ts->rd)
+		fput(ts->rd);
+	if (ts->wr)
+		fput(ts->wr);
+	kfree(ts);
 }
 
-
 struct v9fs_transport v9fs_trans_fd = {
 	.init = v9fs_fd_init,
-	.write = v9fs_fd_send,
-	.read = v9fs_fd_recv,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
 	.close = v9fs_fd_close,
 	.poll = v9fs_fd_poll,
 };
 
+struct v9fs_transport v9fs_trans_tcp = {
+	.init = v9fs_tcp_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+	.init = v9fs_unix_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
deleted file mode 100644
index 44e830697acb..000000000000
--- a/fs/9p/trans_sock.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * linux/fs/9p/trans_socket.c
- *
- * Socket Transport Layer
- *
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/config.h>
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/ipv6.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/un.h>
-#include <asm/uaccess.h>
-#include <linux/inet.h>
-#include <linux/idr.h>
-#include <linux/file.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "transport.h"
-
-#define V9FS_PORT 564
-
-struct v9fs_trans_sock {
-	struct socket *s;
-	struct file *filp;
-};
-
-/**
- * v9fs_sock_recv - receive from a socket
- * @v9ses: session information
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
-	if (ret <= 0) {
-		if (ret != -ERESTARTSYS && ret != -EAGAIN)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-/**
- * v9fs_sock_send - send to a socket
- * @v9ses: session information
- * @v: buffer to send data from
- * @len: size of send buffer
- *
- */
-
-static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-	if (!ts) {
-		dprintk(DEBUG_ERROR, "no transport ...\n");
-		return -EREMOTEIO;
-	}
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
-	set_fs(oldfs);
-
-	if (ret < 0) {
-		if (ret != -ERESTARTSYS)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
-	struct poll_table_struct *pt) {
-
-	int ret;
-	struct v9fs_trans_sock *ts;
-	mm_segment_t oldfs;
-
-	if (!trans) {
-		dprintk(DEBUG_ERROR, "no transport\n");
-		return -EIO;
-	}
-
-	ts = trans->priv;
-	if (trans->status != Connected || !ts) {
-		dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
-		return -EIO;
-	}
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-
-	if (!ts->filp->f_op || !ts->filp->f_op->poll) {
-		dprintk(DEBUG_ERROR, "no poll operation\n");
-		ret = -EIO;
-		goto end;
-	}
-
-	ret = ts->filp->f_op->poll(ts->filp, pt);
-
-end:
-	set_fs(oldfs);
-	return ret;
-}
-
-
-/**
- * v9fs_tcp_init - initialize TCP socket
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
-{
-	struct socket *csocket = NULL;
-	struct sockaddr_in sin_server;
-	int rc = 0;
-	struct v9fs_trans_sock *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
-	int fd;
-
-	trans->status = Disconnected;
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	if (!addr)
-		return -EINVAL;
-
-	dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
-
-	sin_server.sin_family = AF_INET;
-	sin_server.sin_addr.s_addr = in_aton(addr);
-	sin_server.sin_port = htons(v9ses->port);
-	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-	rc = csocket->ops->connect(csocket,
-				   (struct sockaddr *)&sin_server,
-				   sizeof(struct sockaddr_in), 0);
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_tcp: problem connecting socket to %s\n",
-			addr);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_unix_init - initialize UNIX domain socket
- * @v9ses: session information
- * @dev_name: path to named pipe
- * @data: mount options
- *
- */
-
-static int
-v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
-	       char *data)
-{
-	int rc, fd;
-	struct socket *csocket;
-	struct sockaddr_un sun_server;
-	struct v9fs_transport *trans;
-	struct v9fs_trans_sock *ts;
-
-	rc = 0;
-	csocket = NULL;
-	trans = v9ses->transport;
-
-	trans->status = Disconnected;
-
-	if (strlen(dev_name) > UNIX_PATH_MAX) {
-		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
-			dev_name);
-		return -ENOMEM;
-	}
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	sun_server.sun_family = PF_UNIX;
-	strcpy(sun_server.sun_path, dev_name);
-	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
-	rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
-		sizeof(struct sockaddr_un) - 1, 0);	/* -1 *is* important */
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
-			dev_name, rc);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_sock_close - shutdown socket
- * @trans: private socket structure
- *
- */
-
-static void v9fs_sock_close(struct v9fs_transport *trans)
-{
-	struct v9fs_trans_sock *ts;
-
-	if (!trans)
-		return;
-
-	ts = trans->priv;
-
-	if ((ts) && (ts->filp)) {
-		fput(ts->filp);
-		ts->filp = NULL;
-		ts->s = NULL;
-		trans->status = Disconnected;
-	}
-
-	kfree(ts);
-
-	trans->priv = NULL;
-}
-
-struct v9fs_transport v9fs_trans_tcp = {
-	.init = v9fs_tcp_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
-
-struct v9fs_transport v9fs_trans_unix = {
-	.init = v9fs_unix_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
-- 
cgit v1.2.3


From 4a26c2429b8c1ab2be140a4b29aaf16d4dcd8f92 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@swtch.com>
Date: Sat, 25 Mar 2006 03:07:24 -0800
Subject: [PATCH] v9fs: rename tids to tags to be consistent with Plan 9
 documentation

The code talks about these things called tids, which I eventually figured
out are tags.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c   |  2 +-
 fs/9p/mux.c  | 12 ++++++------
 fs/9p/v9fs.c |  2 +-
 fs/9p/v9fs.h |  3 ---
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index c148e6ba07e5..ea2cf9692ff4 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -152,7 +152,7 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 /**
  * v9fs_v9fs_t_flush - flush a pending transaction
  * @v9ses: 9P2000 session information
- * @tag: tid to release
+ * @tag: tag to release
  *
  */
 
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index e2ae60adda99..3b10a36cefdb 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -70,7 +70,7 @@ struct v9fs_mux_data {
 	int msize;
 	unsigned char *extended;
 	struct v9fs_transport *trans;
-	struct v9fs_idpool tidpool;
+	struct v9fs_idpool tagpool;
 	int err;
 	wait_queue_head_t equeue;
 	struct list_head req_list;
@@ -280,8 +280,8 @@ struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
 	m->msize = msize;
 	m->extended = extended;
 	m->trans = trans;
-	idr_init(&m->tidpool.pool);
-	init_MUTEX(&m->tidpool.lock);
+	idr_init(&m->tagpool.pool);
+	init_MUTEX(&m->tagpool.lock);
 	m->err = 0;
 	init_waitqueue_head(&m->equeue);
 	INIT_LIST_HEAD(&m->req_list);
@@ -965,7 +965,7 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 {
 	int tag;
 
-	tag = v9fs_get_idpool(&m->tidpool);
+	tag = v9fs_get_idpool(&m->tagpool);
 	if (tag < 0)
 		return V9FS_NOTAG;
 	else
@@ -974,6 +974,6 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 
 static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
 {
-	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
-		v9fs_put_idpool(tag, &m->tidpool);
+	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
+		v9fs_put_idpool(tag, &m->tagpool);
 }
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61352491ba36..daf623cd61c5 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -289,7 +289,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	/* set global debug level */
 	v9fs_debug_level = v9ses->debug;
 
-	/* id pools that are session-dependent: FIDs and TIDs */
+	/* id pools that are session-dependent: fids and tags */
 	idr_init(&v9ses->fidpool.pool);
 	init_MUTEX(&v9ses->fidpool.lock);
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index f337da7a0eec..9f63ab8106dd 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -91,6 +91,3 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_DEFUSER	"nobody"
 #define V9FS_DEFANAME	""
 
-/* inital pool sizes for fids and tags */
-#define V9FS_START_FIDS 8192
-#define V9FS_START_TIDS 256
-- 
cgit v1.2.3


From 5174fdab9f58181249debab6e959ae4fd4abd0ed Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sat, 25 Mar 2006 03:07:25 -0800
Subject: [PATCH] v9fs: print 9p messages

Print 9p messages.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.h         |   1 +
 fs/9p/Makefile     |   3 +-
 fs/9p/debug.h      |   1 +
 fs/9p/fcprint.c    | 347 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/mux.c        |  15 +++
 fs/9p/vfs_dentry.c |   2 +-
 fs/9p/vfs_inode.c  |   2 +-
 7 files changed, 368 insertions(+), 3 deletions(-)
 create mode 100644 fs/9p/fcprint.c

(limited to 'fs')

diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 95d72aec1c1a..1df9e69b794b 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -372,3 +372,4 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
 		 u32 count, const char __user * data,
 		 struct v9fs_fcall **rcall);
+int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 5db5af9dbf25..12d52421d58b 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,5 +13,6 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 	vfs_dentry.o \
 	error.o \
 	v9fs.o \
-	fid.o
+	fid.o \
+	fcprint.o
 
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index fe551032788b..ff54a7b0b232 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -30,6 +30,7 @@
 #define DEBUG_MUX		(1<<5)
 #define DEBUG_TRANS		(1<<6)
 #define DEBUG_SLABS	      	(1<<7)
+#define DEBUG_FCALL		(1<<8)
 
 #define DEBUG_DUMP_PKT		0
 
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
new file mode 100644
index 000000000000..fc8a98437b73
--- /dev/null
+++ b/fs/9p/fcprint.c
@@ -0,0 +1,347 @@
+/*
+ *  linux/fs/9p/fcprint.c
+ *
+ *  Print 9P call.
+ *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+static int
+v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
+{
+	int n;
+	char b[10];
+
+	n = 0;
+	if (q->type & V9FS_QTDIR)
+		b[n++] = 'd';
+	if (q->type & V9FS_QTAPPEND)
+		b[n++] = 'a';
+	if (q->type & V9FS_QTAUTH)
+		b[n++] = 'A';
+	if (q->type & V9FS_QTEXCL)
+		b[n++] = 'l';
+	if (q->type & V9FS_QTTMP)
+		b[n++] = 't';
+	if (q->type & V9FS_QTSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
+		q->version, b);
+}
+
+static int
+v9fs_printperm(char *buf, int buflen, int perm)
+{
+	int n;
+	char b[15];
+
+	n = 0;
+	if (perm & V9FS_DMDIR)
+		b[n++] = 'd';
+	if (perm & V9FS_DMAPPEND)
+		b[n++] = 'a';
+	if (perm & V9FS_DMAUTH)
+		b[n++] = 'A';
+	if (perm & V9FS_DMEXCL)
+		b[n++] = 'l';
+	if (perm & V9FS_DMTMP)
+		b[n++] = 't';
+	if (perm & V9FS_DMDEVICE)
+		b[n++] = 'D';
+	if (perm & V9FS_DMSOCKET)
+		b[n++] = 'S';
+	if (perm & V9FS_DMNAMEDPIPE)
+		b[n++] = 'P';
+	if (perm & V9FS_DMSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
+}
+
+static int
+v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
+{
+	int n;
+
+	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
+		st->name.str, st->uid.len, st->uid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
+
+	n += scnprintf(buf+n, buflen-n, " q ");
+	n += v9fs_printqid(buf+n, buflen-n, &st->qid);
+	n += scnprintf(buf+n, buflen-n, " m ");
+	n += v9fs_printperm(buf+n, buflen-n, st->mode);
+	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
+		st->atime, st->mtime, (long long int) st->length);
+
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
+			st->extension.len, st->extension.str);
+
+	return n;
+}
+
+static int
+v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	int i, n;
+
+	i = n = 0;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	return n;
+}
+
+static int
+v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
+}
+
+int
+v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
+{
+	int i, ret, type, tag;
+
+	if (!fc)
+		return scnprintf(buf, buflen, "<NULL>");
+
+	type = fc->id;
+	tag = fc->tag;
+
+	ret = 0;
+	switch (type) {
+	case TVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tversion tag %u msize %u version '%.*s'", tag,
+			fc->params.tversion.msize, fc->params.tversion.version.len,
+			fc->params.tversion.version.str);
+		break;
+
+	case RVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rversion tag %u msize %u version '%.*s'", tag,
+			fc->params.rversion.msize, fc->params.rversion.version.len,
+			fc->params.rversion.version.str);
+		break;
+
+	case TAUTH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
+			fc->params.tauth.afid, fc->params.tauth.uname.len,
+			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
+			fc->params.tauth.aname.str);
+		break;
+
+	case RAUTH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
+		break;
+
+	case TATTACH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
+			tag, fc->params.tattach.fid, fc->params.tattach.afid,
+			fc->params.tattach.uname.len, fc->params.tattach.uname.str,
+			fc->params.tattach.aname.len, fc->params.tattach.aname.str);
+		break;
+
+	case RATTACH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
+		break;
+
+	case RERROR:
+		ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
+			tag, fc->params.rerror.error.len,
+			fc->params.rerror.error.str);
+		if (extended)
+			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
+				fc->params.rerror.errno);
+		break;
+
+	case TFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
+			tag, fc->params.tflush.oldtag);
+		break;
+
+	case RFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
+		break;
+
+	case TWALK:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twalk tag %u fid %d newfid %d nwname %d", tag,
+			fc->params.twalk.fid, fc->params.twalk.newfid,
+			fc->params.twalk.nwname);
+		for(i = 0; i < fc->params.twalk.nwname; i++)
+			ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
+				fc->params.twalk.wnames[i].len,
+				fc->params.twalk.wnames[i].str);
+		break;
+
+	case RWALK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
+			tag, fc->params.rwalk.nwqid);
+		for(i = 0; i < fc->params.rwalk.nwqid; i++)
+			ret += v9fs_printqid(buf+ret, buflen-ret,
+				&fc->params.rwalk.wqids[i]);
+		break;
+
+	case TOPEN:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Topen tag %u fid %d mode %d", tag,
+			fc->params.topen.fid, fc->params.topen.mode);
+		break;
+
+	case ROPEN:
+		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
+		ret += scnprintf(buf+ret, buflen-ret," iounit %d",
+			fc->params.ropen.iounit);
+		break;
+
+	case TCREATE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
+			fc->params.tcreate.fid, fc->params.tcreate.name.len,
+			fc->params.tcreate.name.str);
+
+		ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
+		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
+			fc->params.tcreate.mode);
+		break;
+
+	case RCREATE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
+		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
+			fc->params.rcreate.iounit);
+		break;
+
+	case TREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tread tag %u fid %d offset %lld count %u", tag,
+			fc->params.tread.fid,
+			(long long int) fc->params.tread.offset,
+			fc->params.tread.count);
+		break;
+
+	case RREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rread tag %u count %u data ", tag,
+			fc->params.rread.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
+			fc->params.rread.count);
+		break;
+
+	case TWRITE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twrite tag %u fid %d offset %lld count %u data ",
+			tag, fc->params.twrite.fid,
+			(long long int) fc->params.twrite.offset,
+			fc->params.twrite.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
+			fc->params.twrite.count);
+		break;
+
+	case RWRITE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
+			tag, fc->params.rwrite.count);
+		break;
+
+	case TCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
+			tag, fc->params.tclunk.fid);
+		break;
+
+	case RCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
+		break;
+
+	case TREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
+			tag, fc->params.tremove.fid);
+		break;
+
+	case RREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
+		break;
+
+	case TSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
+			tag, fc->params.tstat.fid);
+		break;
+
+	case RSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
+			extended);
+		break;
+
+	case TWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
+			tag, fc->params.twstat.fid);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
+			extended);
+		break;
+
+	case RWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
+		break;
+
+	default:
+		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
+		break;
+	}
+
+	return ret;
+}
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 3b10a36cefdb..d16f4f488fda 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -635,6 +635,14 @@ static void v9fs_read_work(void *a)
 			goto error;
 		}
 
+		if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+			char buf[150];
+
+			v9fs_printfcall(buf, sizeof(buf), m->rcall,
+				*m->extended);
+			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
+		}
+
 		rcall = m->rcall;
 		rbuf = m->rbuf;
 		if (m->rpos > n) {
@@ -740,6 +748,13 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 
 	v9fs_set_tag(tc, n);
 
+	if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+		char buf[150];
+
+		v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
+		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
+	}
+
 	req->tag = n;
 	req->tcall = tc;
 	req->rcall = NULL;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 12c9cc926b71..0cf2b840219d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
  *
  */
 
-int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(struct dentry *dentry)
 {
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 	return 1;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 651a9e14d9a9..e46bb5a82e1a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -348,7 +348,7 @@ error:
 	return ERR_PTR(err);
 }
 
-struct inode *
+static struct inode *
 v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
 	struct super_block *sb)
 {
-- 
cgit v1.2.3


From 16cce6d27ef52e00cc124196046bbae7150024c1 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sat, 25 Mar 2006 03:07:26 -0800
Subject: [PATCH] v9fs: add extension field to Tcreate

Implement a new way of creating special files.  Instead of Tcreate+Twstat,
add one more field to Tcreate that contains special file description.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c        |  8 +++++---
 fs/9p/9p.h        |  3 ++-
 fs/9p/conv.c      |  8 +++++++-
 fs/9p/conv.h      |  3 ++-
 fs/9p/vfs_file.c  | 32 +++++++++++++++-----------------
 fs/9p/vfs_inode.c | 47 +++++++++++++++++++----------------------------
 6 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index ea2cf9692ff4..81027bc1f099 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -334,8 +334,8 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
  */
 
 int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-	      u32 perm, u8 mode, struct v9fs_fcall **rcp)
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
+	u8 mode, char *extension, struct v9fs_fcall **rcp)
 {
 	int ret;
 	struct v9fs_fcall *tc;
@@ -343,7 +343,9 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
 		fid, name, perm, mode);
 
-	tc = v9fs_create_tcreate(fid, name, perm, mode);
+	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
+		v9ses->extended);
+
 	if (!IS_ERR(tc)) {
 		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
 		kfree(tc);
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 1df9e69b794b..2bb89b4005a1 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -235,6 +235,7 @@ struct Tcreate {
 	struct v9fs_str name;
 	u32 perm;
 	u8 mode;
+	struct v9fs_str extension;
 };
 
 struct Rcreate {
@@ -364,7 +365,7 @@ int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 		  struct v9fs_fcall **rcall);
 
 int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-		  u32 perm, u8 mode, struct v9fs_fcall **rcall);
+	u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
 
 int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 		u64 offset, u32 count, struct v9fs_fcall **rcall);
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index bba817142465..b129079e5f32 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -666,7 +666,8 @@ struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
 	return fc;
 }
 
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended)
 {
 	int size;
 	struct v9fs_fcall *fc;
@@ -674,6 +675,9 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	struct cbuf *bufp = &buffer;
 
 	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
+	if (extended && extension!=NULL)
+		size += 2 + strlen(extension);	/* extension[s] */
+
 	fc = v9fs_create_common(bufp, size, TCREATE);
 	if (IS_ERR(fc))
 		goto error;
@@ -682,6 +686,8 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	v9fs_put_str(bufp, name, &fc->params.tcreate.name);
 	v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
 	v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+	if (extended)
+		v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
 
 	if (buf_check_overflow(bufp)) {
 		kfree(fc);
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index f5896628dae4..03cacdda7891 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -39,7 +39,8 @@ struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
 struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
 	char **wnames);
 struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended);
 struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
 struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
 	const char __user *data);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index de3a129698da..1144d59d6469 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -69,29 +69,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	fid = v9fs_get_idpool(&v9ses->fidpool);
 	if (fid < 0) {
-			eprintk(KERN_WARNING, "newfid fails!\n");
-			return -ENOSPC;
-		}
+		eprintk(KERN_WARNING, "newfid fails!\n");
+		return -ENOSPC;
+	}
 
 	err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL);
 	if (err < 0) {
-			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+		dprintk(DEBUG_ERROR, "rewalk didn't work\n");
 		goto put_fid;
 	}
 
-	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
-	if (vfid == NULL) {
-		dprintk(DEBUG_ERROR, "out of memory\n");
-		goto clunk_fid;
-		}
-
-		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
-		/* translate open mode appropriately */
+	/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+	/* translate open mode appropriately */
 	omode = v9fs_uflags2omode(file->f_flags);
 	err = v9fs_t_open(v9ses, fid, omode, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("open failed", fcall);
-		goto destroy_vfid;
+		goto clunk_fid;
+	}
+
+	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+	if (vfid == NULL) {
+		dprintk(DEBUG_ERROR, "out of memory\n");
+		err = -ENOMEM;
+		goto clunk_fid;
 	}
 
 	file->private_data = vfid;
@@ -106,15 +107,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	return 0;
 
-destroy_vfid:
-	v9fs_fid_destroy(vfid);
-
 clunk_fid:
 	v9fs_t_clunk(v9ses, fid);
 
 put_fid:
 	v9fs_put_idpool(fid, &v9ses->fidpool);
-		kfree(fcall);
+	kfree(fcall);
 
 	return err;
 }
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e46bb5a82e1a..dc67f83d0dae 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -255,8 +255,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 }
 
 static int
-v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
-	u32 perm, u8 mode, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
+v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
+	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
 {
 	u32 fid;
 	int err;
@@ -271,14 +271,14 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("clone error", fcall);
-		goto error;
+		goto put_fid;
 	}
 	kfree(fcall);
 
-	err = v9fs_t_create(v9ses, fid, name, perm, mode, &fcall);
+	err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("create fails", fcall);
-		goto error;
+		goto clunk_fid;
 	}
 
 	if (iounit)
@@ -293,7 +293,11 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	kfree(fcall);
 	return 0;
 
-error:
+clunk_fid:
+	v9fs_t_clunk(v9ses, fid);
+	fid = V9FS_NOFID;
+
+put_fid:
 	if (fid >= 0)
 		v9fs_put_idpool(fid, &v9ses->fidpool);
 
@@ -474,7 +478,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		flags = O_RDWR;
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, v9fs_uflags2omode(flags), &fid, &qid, &iounit);
+		perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
 
 	if (err)
 		goto error;
@@ -550,7 +554,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, NULL, &fid, NULL, NULL);
 
 	if (err) {
 		dprintk(DEBUG_ERROR, "create error %d\n", err);
@@ -1008,11 +1012,13 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	/* copy extension buffer into buffer */
 	if (fcall->params.rstat.stat.extension.len < buflen)
-		buflen = fcall->params.rstat.stat.extension.len;
+		buflen = fcall->params.rstat.stat.extension.len + 1;
 
-	memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+	memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
 	buffer[buflen-1] = 0;
 
+	dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len,
+		fcall->params.rstat.stat.extension.str, buffer);
 	retval = buflen;
 
       FreeFcall:
@@ -1072,7 +1078,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!link)
 		link = ERR_PTR(-ENOMEM);
 	else {
-		len = v9fs_readlink(dentry, link, strlen(link));
+		len = v9fs_readlink(dentry, link, PATH_MAX);
 
 		if (len < 0) {
 			__putname(link);
@@ -1109,10 +1115,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct v9fs_fid *dfid, *vfid;
 	struct inode *inode;
-	struct v9fs_fcall *fcall;
-	struct v9fs_wstat wstat;
 
-	fcall = NULL;
 	inode = NULL;
 	vfid = NULL;
 	v9ses = v9fs_inode2v9ses(dir);
@@ -1125,7 +1128,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	}
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
 
 	if (err)
 		goto error;
@@ -1148,23 +1151,11 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 		goto error;
 	}
 
-	/* issue a Twstat */
-	v9fs_blank_wstat(&wstat);
-	wstat.muid = v9ses->name;
-	wstat.extension = (char *) extension;
-	err = v9fs_t_wstat(v9ses, vfid->fid, &wstat, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("wstat error", fcall);
-		goto error;
-	}
-
-	kfree(fcall);
 	dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(dentry, inode);
 	return 0;
 
 error:
-	kfree(fcall);
 	if (vfid)
 		v9fs_fid_destroy(vfid);
 
@@ -1224,7 +1215,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	name = __getname();
-	sprintf(name, "hardlink(%d)\n", oldfid->fid);
+	sprintf(name, "%d\n", oldfid->fid);
 	retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
 	__putname(name);
 
-- 
cgit v1.2.3


From c0291a05f8e6a72c9807b0e2a369ee82bec659c3 Mon Sep 17 00:00:00 2001
From: Eugene Teo <eugene.teo@eugeneteo.net>
Date: Sat, 25 Mar 2006 03:07:27 -0800
Subject: [PATCH] v9fs: fix vfs_inode dereference before NULL check

__getname, which in turn will call kmem_cache_alloc, may return NULL.

Coverity bug #977

Signed-off-by: Eugene Teo <eugene.teo@eugeneteo.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index dc67f83d0dae..4cbfb714c6e4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1244,6 +1244,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 		return -EINVAL;
 
 	name = __getname();
+	if (!name)
+		return -ENOMEM;
 	/* build extension */
 	if (S_ISBLK(mode))
 		sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
-- 
cgit v1.2.3


From 42e8c509cfa3d92b3dcbfe95edf6be00e5d4b0eb Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@hera.kernel.org>
Date: Sat, 25 Mar 2006 03:07:28 -0800
Subject: [PATCH] v9fs: update license boilerplate

Update license boilerplate to specify GPLv2 and remove the (at your option
clause).  This change was agreed to by all the copyright holders (approvals
can be found on v9fs-developer mailing list).

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c         | 5 ++---
 fs/9p/9p.h         | 5 ++---
 fs/9p/conv.c       | 5 ++---
 fs/9p/conv.h       | 5 ++---
 fs/9p/debug.h      | 5 ++---
 fs/9p/error.c      | 5 ++---
 fs/9p/error.h      | 5 ++---
 fs/9p/fcprint.c    | 5 ++---
 fs/9p/fid.c        | 5 ++---
 fs/9p/fid.h        | 5 ++---
 fs/9p/mux.c        | 5 ++---
 fs/9p/mux.h        | 5 ++---
 fs/9p/trans_fd.c   | 6 ++----
 fs/9p/transport.h  | 5 ++---
 fs/9p/v9fs.c       | 5 ++---
 fs/9p/v9fs.h       | 5 ++---
 fs/9p/v9fs_vfs.h   | 5 ++---
 fs/9p/vfs_addr.c   | 5 ++---
 fs/9p/vfs_dentry.c | 5 ++---
 fs/9p/vfs_dir.c    | 5 ++---
 fs/9p/vfs_file.c   | 5 ++---
 fs/9p/vfs_inode.c  | 5 ++---
 fs/9p/vfs_super.c  | 5 ++---
 23 files changed, 46 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index 81027bc1f099..552de120bbd5 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 2bb89b4005a1..94e2f92ab2e8 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index b129079e5f32..a767e05b60bf 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index 03cacdda7891..dd5b6b1b610f 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index ff54a7b0b232..4228c0bb3c32 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/error.c b/fs/9p/error.c
index e4b6f8f38b6f..981fe8ecd780 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -11,9 +11,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/error.h b/fs/9p/error.h
index a9794e85fe51..5f3ca522b316 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -12,9 +12,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
index fc8a98437b73..583e827baebd 100644
--- a/fs/9p/fcprint.c
+++ b/fs/9p/fcprint.c
@@ -6,9 +6,8 @@
  *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index c4d13bf904d2..b7608af07ce8 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005, 2006 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 1fc2dd08d75a..aa974d6875c3 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index d16f4f488fda..3e5b124a7212 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 17144fdfa11b..e90bfd32ea42 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 8029844d6b28..94e0a7fd9fc2 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -7,12 +7,10 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 91fcdb94b361..b38a4b8a41ce 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index daf623cd61c5..b633433f9ced 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9f63ab8106dd..c134d104cb28 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index a759278acaae..43c9f7de0314 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 8100fb5171b7..efda46fb64d9 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 0cf2b840219d..062daa6000ab 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index cd5eeb032d64..766f11f1215c 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 1144d59d6469..59e744163407 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4cbfb714c6e4..133db366d306 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d05318fa684e..083ed5af8a1e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-- 
cgit v1.2.3


From 67543e508d74ad1a8e80290580c9d1440beba4d9 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@hera.kernel.org>
Date: Sat, 25 Mar 2006 03:07:29 -0800
Subject: [PATCH] 9p: fix name consistency problems

There were a number of conflicting naming schemes used in the v9fs project.
The directory was fs/9p, but MAINTAINERS and Documentation referred to
v9fs.  The module name itself was 9p2000, and the file system type was 9P.
This patch attempts to clean that up, changing all references to 9p in
order to match the directory name.  We'll also start using 9p instead of
v9fs as our patch prefix.

There is also a minor consistency cleanup in the options changing the name
option to uname in order to more closely match the Plan 9 options.

Signed-off-by: Eric Van Hensbergevan <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/filesystems/9p.txt   |  99 +++++++++
 Documentation/filesystems/v9fs.txt |  99 ---------
 MAINTAINERS                        |  24 +--
 fs/9p/9p.c                         | 431 -------------------------------------
 fs/9p/Makefile                     |   6 +-
 fs/9p/fcall.c                      | 430 ++++++++++++++++++++++++++++++++++++
 fs/9p/v9fs.c                       |   8 +-
 fs/9p/vfs_super.c                  |   2 +-
 8 files changed, 549 insertions(+), 550 deletions(-)
 create mode 100644 Documentation/filesystems/9p.txt
 delete mode 100644 Documentation/filesystems/v9fs.txt
 delete mode 100644 fs/9p/9p.c
 create mode 100644 fs/9p/fcall.c

(limited to 'fs')

diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
new file mode 100644
index 000000000000..24c7a9c41f0d
--- /dev/null
+++ b/Documentation/filesystems/9p.txt
@@ -0,0 +1,99 @@
+			V9FS: 9P2000 for Linux
+			======================
+
+ABOUT
+=====
+
+v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
+
+This software was originally developed by Ron Minnich <rminnich@lanl.gov>
+and Maya Gokhale <maya@lanl.gov>.  Additional development by Greg Watson
+<gwatson@lanl.gov> and most recently Eric Van Hensbergen
+<ericvh@gmail.com> and Latchesar Ionkov <lucho@ionkov.net>.
+
+USAGE
+=====
+
+For remote file server:
+
+	mount -t 9P 10.10.1.2 /mnt/9
+
+For Plan 9 From User Space applications (http://swtch.com/plan9)
+
+	mount -t 9P `namespace`/acme /mnt/9 -o proto=unix,name=$USER
+
+OPTIONS
+=======
+
+  proto=name	select an alternative transport.  Valid options are
+  		currently:
+ 			unix - specifying a named pipe mount point
+ 			tcp  - specifying a normal TCP/IP connection
+ 			fd   - used passed file descriptors for connection
+                                (see rfdno and wfdno)
+
+  name=name	user name to attempt mount as on the remote server.  The
+  		server may override or ignore this value.  Certain user
+		names may require authentication.
+
+  aname=name	aname specifies the file tree to access when the server is
+  		offering several exported file systems.
+
+  debug=n	specifies debug level.  The debug level is a bitmask.
+  			0x01 = display verbose error messages
+			0x02 = developer debug (DEBUG_CURRENT)
+			0x04 = display 9P trace
+			0x08 = display VFS trace
+			0x10 = display Marshalling debug
+			0x20 = display RPC debug
+			0x40 = display transport debug
+			0x80 = display allocation debug
+
+  rfdno=n	the file descriptor for reading with proto=fd
+
+  wfdno=n	the file descriptor for writing with proto=fd
+
+  maxdata=n	the number of bytes to use for 9P packet payload (msize)
+
+  port=n	port to connect to on the remote server
+
+  noextend	force legacy mode (no 9P2000.u semantics)
+
+  uid		attempt to mount as a particular uid
+
+  gid		attempt to mount with a particular gid
+
+  afid		security channel - used by Plan 9 authentication protocols
+
+  nodevmap	do not map special files - represent them as normal files.
+  		This can be used to share devices/named pipes/sockets between
+		hosts.  This functionality will be expanded in later versions.
+
+RESOURCES
+=========
+
+The Linux version of the 9P server is now maintained under the npfs project
+on sourceforge (http://sourceforge.net/projects/npfs).
+
+There are user and developer mailing lists available through the v9fs project
+on sourceforge (http://sourceforge.net/projects/v9fs).
+
+News and other information is maintained on SWiK (http://swik.net/v9fs).
+
+Bug reports may be issued through the kernel.org bugzilla 
+(http://bugzilla.kernel.org)
+
+For more information on the Plan 9 Operating System check out
+http://plan9.bell-labs.com/plan9
+
+For information on Plan 9 from User Space (Plan 9 applications and libraries
+ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
+
+
+STATUS
+======
+
+The 2.6 kernel support is working on PPC and x86.
+
+PLEASE USE THE SOURCEFORGE BUG-TRACKER TO REPORT PROBLEMS.
+
diff --git a/Documentation/filesystems/v9fs.txt b/Documentation/filesystems/v9fs.txt
deleted file mode 100644
index 24c7a9c41f0d..000000000000
--- a/Documentation/filesystems/v9fs.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-			V9FS: 9P2000 for Linux
-			======================
-
-ABOUT
-=====
-
-v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
-
-This software was originally developed by Ron Minnich <rminnich@lanl.gov>
-and Maya Gokhale <maya@lanl.gov>.  Additional development by Greg Watson
-<gwatson@lanl.gov> and most recently Eric Van Hensbergen
-<ericvh@gmail.com> and Latchesar Ionkov <lucho@ionkov.net>.
-
-USAGE
-=====
-
-For remote file server:
-
-	mount -t 9P 10.10.1.2 /mnt/9
-
-For Plan 9 From User Space applications (http://swtch.com/plan9)
-
-	mount -t 9P `namespace`/acme /mnt/9 -o proto=unix,name=$USER
-
-OPTIONS
-=======
-
-  proto=name	select an alternative transport.  Valid options are
-  		currently:
- 			unix - specifying a named pipe mount point
- 			tcp  - specifying a normal TCP/IP connection
- 			fd   - used passed file descriptors for connection
-                                (see rfdno and wfdno)
-
-  name=name	user name to attempt mount as on the remote server.  The
-  		server may override or ignore this value.  Certain user
-		names may require authentication.
-
-  aname=name	aname specifies the file tree to access when the server is
-  		offering several exported file systems.
-
-  debug=n	specifies debug level.  The debug level is a bitmask.
-  			0x01 = display verbose error messages
-			0x02 = developer debug (DEBUG_CURRENT)
-			0x04 = display 9P trace
-			0x08 = display VFS trace
-			0x10 = display Marshalling debug
-			0x20 = display RPC debug
-			0x40 = display transport debug
-			0x80 = display allocation debug
-
-  rfdno=n	the file descriptor for reading with proto=fd
-
-  wfdno=n	the file descriptor for writing with proto=fd
-
-  maxdata=n	the number of bytes to use for 9P packet payload (msize)
-
-  port=n	port to connect to on the remote server
-
-  noextend	force legacy mode (no 9P2000.u semantics)
-
-  uid		attempt to mount as a particular uid
-
-  gid		attempt to mount with a particular gid
-
-  afid		security channel - used by Plan 9 authentication protocols
-
-  nodevmap	do not map special files - represent them as normal files.
-  		This can be used to share devices/named pipes/sockets between
-		hosts.  This functionality will be expanded in later versions.
-
-RESOURCES
-=========
-
-The Linux version of the 9P server is now maintained under the npfs project
-on sourceforge (http://sourceforge.net/projects/npfs).
-
-There are user and developer mailing lists available through the v9fs project
-on sourceforge (http://sourceforge.net/projects/v9fs).
-
-News and other information is maintained on SWiK (http://swik.net/v9fs).
-
-Bug reports may be issued through the kernel.org bugzilla 
-(http://bugzilla.kernel.org)
-
-For more information on the Plan 9 Operating System check out
-http://plan9.bell-labs.com/plan9
-
-For information on Plan 9 from User Space (Plan 9 applications and libraries
-ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
-
-
-STATUS
-======
-
-The 2.6 kernel support is working on PPC and x86.
-
-PLEASE USE THE SOURCEFORGE BUG-TRACKER TO REPORT PROBLEMS.
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 5b6a0145505d..04877a3a20bd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -147,6 +147,18 @@ M:	p_gortmaker@yahoo.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 
+9P FILE SYSTEM
+P:      Eric Van Hensbergen
+M:      ericvh@gmail.com
+P:      Ron Minnich
+M:      rminnich@lanl.gov
+P:      Latchesar Ionkov
+M:      lucho@ionkov.net
+L:      v9fs-developer@lists.sourceforge.net
+W:      http://v9fs.sf.net
+T:      git kernel.org:/pub/scm/linux/kernel/ericvh/v9fs.git
+S:      Maintained
+
 A2232 SERIAL BOARD DRIVER
 P:	Enver Haase
 M:	ehaase@inf.fu-berlin.de
@@ -2979,18 +2991,6 @@ L:	rio500-users@lists.sourceforge.net
 W:	http://rio500.sourceforge.net
 S:	Maintained
 
-V9FS FILE SYSTEM
-P:      Eric Van Hensbergen
-M:      ericvh@gmail.com
-P:      Ron Minnich
-M:      rminnich@lanl.gov
-P:      Latchesar Ionkov
-M:      lucho@ionkov.net
-L:      v9fs-developer@lists.sourceforge.net
-W:      http://v9fs.sf.net
-T:      git kernel.org:/pub/scm/linux/kernel/ericvh/v9fs-devel.git
-S:      Maintained
-
 VIDEO FOR LINUX
 P:	Mauro Carvalho Chehab
 M:	mchehab@infradead.org
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
deleted file mode 100644
index 552de120bbd5..000000000000
--- a/fs/9p/9p.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- *  linux/fs/9p/9p.c
- *
- *  This file contains functions to perform synchronous 9P calls
- *
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "mux.h"
-
-/**
- * v9fs_t_version - negotiate protocol parameters with sever
- * @v9ses: 9P2000 session information
- * @msize: requested max size packet
- * @version: requested version.extension string
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-	       char *version, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-	tc = v9fs_create_tversion(msize, version);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_attach - mount the server
- * @v9ses: 9P2000 session information
- * @uname: user name doing the attach
- * @aname: remote name being attached to
- * @fid: mount fid to attatch to root node
- * @afid: authentication fid (in this case result key)
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall* tc;
-
-	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
-		aname, fid, afid);
-
-	tc = v9fs_create_tattach(fid, afid, uname, aname);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
-	struct v9fs_fcall *rc, int err)
-{
-	int fid;
-	struct v9fs_session_info *v9ses;
-
-	if (err)
-		return;
-
-	fid = tc->params.tclunk.fid;
-	kfree(tc);
-
-	if (!rc)
-		return;
-
-	v9ses = a;
-	if (rc->id == RCLUNK)
-		v9fs_put_idpool(fid, &v9ses->fidpool);
-
-	kfree(rc);
-}
-
-/**
- * v9fs_t_clunk - release a fid (finish a transaction)
- * @v9ses: 9P2000 session information
- * @fid: fid to release
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	rc = NULL;
-	tc = v9fs_create_tclunk(fid);
-	if (!IS_ERR(tc))
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-	else
-		ret = PTR_ERR(tc);
-
-	if (ret)
-		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
-
-	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
-	return ret;
-}
-
-#if 0
-/**
- * v9fs_v9fs_t_flush - flush a pending transaction
- * @v9ses: 9P2000 session information
- * @tag: tag to release
- *
- */
-
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
-
-	tc = v9fs_create_tflush(oldtag);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-#endif  /*  0  */
-
-/**
- * v9fs_t_stat - read a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to get info about
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	ret = -ENOMEM;
-	tc = v9fs_create_tstat(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_wstat - write a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to write info about
- * @stat: metadata
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_walk - walk a fid to a new file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to walk
- * @newfid: new fid (for clone operations)
- * @name: path to walk fid to
- * @fcall: pointer to response fcall
- *
- */
-
-/* TODO: support multiple walk */
-
-int
-v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-	    char *name, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-	int nwname;
-
-	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-
-	if (name)
-		nwname = 1;
-	else
-		nwname = 0;
-
-	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_open - open a file
- *
- * @v9ses - 9P2000 session information
- * @fid - fid to open
- * @mode - mode to open file (R, RW, etc)
- * @fcall - pointer to response fcall
- *
- */
-
-int
-v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-	    struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-
-	tc = v9fs_create_topen(fid, mode);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_remove - remove a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to remove
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-	      struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_tremove(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_create - create a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to create
- * @name: name of the file or directory to create
- * @perm: permissions to create with
- * @mode: mode to open file (R, RW, etc)
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
-	u8 mode, char *extension, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
-		fid, name, perm, mode);
-
-	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
-		v9ses->extended);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_read - read data
- * @v9ses: 9P2000 session information
- * @fid: fid to read from
- * @offset: offset to start read at
- * @count: how many bytes to read
- * @fcall: pointer to response fcall (with data)
- *
- */
-
-int
-v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-	    u32 count, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_tread(fid, offset, count);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-		if (!ret)
-			ret = rc->params.rread.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_write - write data
- * @v9ses: 9P2000 session information
- * @fid: fid to write to
- * @offset: offset to start write at
- * @count: how many bytes to write
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
-	const char __user *data, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_twrite(fid, offset, count, data);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-
-		if (!ret)
-			ret = rc->params.rwrite.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 12d52421d58b..87897f84dfb6 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,9 +1,9 @@
-obj-$(CONFIG_9P_FS) := 9p2000.o
+obj-$(CONFIG_9P_FS) := 9p.o
 
-9p2000-objs := \
+9p-objs := \
 	trans_fd.o \
 	mux.o \
-	9p.o \
+	fcall.o \
 	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
new file mode 100644
index 000000000000..71742ba150c4
--- /dev/null
+++ b/fs/9p/fcall.c
@@ -0,0 +1,430 @@
+/*
+ *  linux/fs/9p/fcall.c
+ *
+ *  This file contains functions to perform synchronous 9P calls
+ *
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+	       char *version, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+	tc = v9fs_create_tversion(msize, version);
+
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall* tc;
+
+	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+		aname, fid, afid);
+
+	tc = v9fs_create_tattach(fid, afid, uname, aname);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
+	struct v9fs_fcall *rc, int err)
+{
+	int fid;
+	struct v9fs_session_info *v9ses;
+
+	if (err)
+		return;
+
+	fid = tc->params.tclunk.fid;
+	kfree(tc);
+
+	if (!rc)
+		return;
+
+	v9ses = a;
+	if (rc->id == RCLUNK)
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+
+	kfree(rc);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	rc = NULL;
+	tc = v9fs_create_tclunk(fid);
+	if (!IS_ERR(tc))
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+	else
+		ret = PTR_ERR(tc);
+
+	if (ret)
+		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
+
+	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
+	return ret;
+}
+
+#if 0
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tag to release
+ *
+ */
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
+
+	tc = v9fs_create_tflush(oldtag);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+#endif
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	ret = -ENOMEM;
+	tc = v9fs_create_tstat(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+	    char *name, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+	int nwname;
+
+	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+
+	if (name)
+		nwname = 1;
+	else
+		nwname = 0;
+
+	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+	    struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+
+	tc = v9fs_create_topen(fid, mode);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+	      struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	tc = v9fs_create_tremove(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
+	u8 mode, char *extension, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+		fid, name, perm, mode);
+
+	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
+		v9ses->extended);
+
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+	    u32 count, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
+
+	tc = v9fs_create_tread(fid, offset, count);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+		if (!ret)
+			ret = rc->params.rread.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
+
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
+	const char __user *data, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
+
+	tc = v9fs_create_twrite(fid, offset, count, data);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+
+		if (!ret)
+			ret = rc->params.rwrite.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
+
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b633433f9ced..d37416eb5791 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -50,7 +50,7 @@ enum {
 	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
 	Opt_rfdno, Opt_wfdno,
 	/* String options */
-	Opt_name, Opt_remotename,
+	Opt_uname, Opt_remotename,
 	/* Options that take no arguments */
 	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
 	/* Error token */
@@ -66,7 +66,7 @@ static match_table_t tokens = {
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
 	{Opt_debug, "debug=%x"},
-	{Opt_name, "name=%s"},
+	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_unix, "proto=unix"},
 	{Opt_tcp, "proto=tcp"},
@@ -115,7 +115,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token < Opt_name) {
+		if (token < Opt_uname) {
 			if ((ret = match_int(&args[0], &option)) < 0) {
 				dprintk(DEBUG_ERROR,
 					"integer field, but no integer?\n");
@@ -157,7 +157,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		case Opt_fd:
 			v9ses->proto = PROTO_FD;
 			break;
-		case Opt_name:
+		case Opt_uname:
 			match_strcpy(v9ses->name, &args[0]);
 			break;
 		case Opt_remotename:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 083ed5af8a1e..b0a0ae509c00 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -261,7 +261,7 @@ static struct super_operations v9fs_super_ops = {
 };
 
 struct file_system_type v9fs_fs_type = {
-	.name = "9P",
+	.name = "9p",
 	.get_sb = v9fs_get_sb,
 	.kill_sb = v9fs_kill_super,
 	.owner = THIS_MODULE,
-- 
cgit v1.2.3


From 58bf6a2db2a4a1b41712674d9165510180259dec Mon Sep 17 00:00:00 2001
From: Kirk True <kernel@kirkandsheila.com>
Date: Sat, 25 Mar 2006 03:07:30 -0800
Subject: [PATCH] smbfs: Fix debug logging-only compilation error

When SMBFS_DEBUG_VERBOSE is #define-d, the compile breaks:

fs/smbfs/inode.c:217: error: aggregate value used where an integer was expected

This is a simple matter of using the .tv_sec attribute of struct time_spec.

Signed-off-by: Kirk True <kernel@kirkandsheila.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 44ed1d418b46..fdeabc0a34f7 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -217,7 +217,7 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
 		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
 			inode->i_ino,
-			(long) last_time, (long) inode->i_mtime,
+			(long) last_time, (long) inode->i_mtime.tv_sec,
 			(long) last_sz, (long) inode->i_size);
 
 		if (!S_ISDIR(inode->i_mode))
-- 
cgit v1.2.3


From f348d70a324e15afc701a494f32ec468abb7d1eb Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Sat, 25 Mar 2006 03:07:39 -0800
Subject: [PATCH] POLLRDHUP/EPOLLRDHUP handling for half-closed devices
 notifications

Implement the half-closed devices notifiation, by adding a new POLLRDHUP
(and its alias EPOLLRDHUP) bit to the existing poll/select sets.  Since the
existing POLLHUP handling, that does not report correctly half-closed
devices, was feared to be changed, this implementation leaves the current
POLLHUP reporting unchanged and simply add a new bit that is set in the few
places where it makes sense.  The same thing was discussed and conceptually
agreed quite some time ago:

http://lkml.org/lkml/2003/7/12/116

Since this new event bit is added to the existing Linux poll infrastruture,
even the existing poll/select system calls will be able to use it.  As far
as the existing POLLHUP handling, the patch leaves it as is.  The
pollrdhup-2.6.16.rc5-0.10.diff defines the POLLRDHUP for all the existing
archs and sets the bit in the six relevant files.  The other attached diff
is the simple change required to sys/epoll.h to add the EPOLLRDHUP
definition.

There is "a stupid program" to test POLLRDHUP delivery here:

 http://www.xmailserver.org/pollrdhup-test.c

It tests poll(2), but since the delivery is same epoll(2) will work equally.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c               | 4 ++--
 include/asm-alpha/poll.h     | 2 ++
 include/asm-arm/poll.h       | 1 +
 include/asm-arm26/poll.h     | 1 +
 include/asm-cris/poll.h      | 1 +
 include/asm-frv/poll.h       | 1 +
 include/asm-h8300/poll.h     | 1 +
 include/asm-i386/poll.h      | 1 +
 include/asm-ia64/poll.h      | 1 +
 include/asm-m32r/poll.h      | 1 +
 include/asm-m68k/poll.h      | 1 +
 include/asm-mips/poll.h      | 1 +
 include/asm-parisc/poll.h    | 1 +
 include/asm-powerpc/poll.h   | 1 +
 include/asm-s390/poll.h      | 1 +
 include/asm-sh/poll.h        | 1 +
 include/asm-sh64/poll.h      | 1 +
 include/asm-sparc/poll.h     | 1 +
 include/asm-sparc64/poll.h   | 1 +
 include/asm-v850/poll.h      | 1 +
 include/asm-x86_64/poll.h    | 1 +
 include/asm-xtensa/poll.h    | 1 +
 net/bluetooth/af_bluetooth.c | 3 +++
 net/core/datagram.c          | 2 ++
 net/dccp/proto.c             | 2 +-
 net/ipv4/tcp.c               | 2 +-
 net/sctp/socket.c            | 2 ++
 net/unix/af_unix.c           | 2 ++
 28 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c2b16fda13a..a0f682cdd03e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	switch (op) {
 	case EPOLL_CTL_ADD:
 		if (!epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		break;
 	case EPOLL_CTL_MOD:
 		if (epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 			error = ep_modify(ep, epi, &epds);
 		} else
 			error = -ENOENT;
diff --git a/include/asm-alpha/poll.h b/include/asm-alpha/poll.h
index 34f333b762a0..95707182b3ed 100644
--- a/include/asm-alpha/poll.h
+++ b/include/asm-alpha/poll.h
@@ -13,6 +13,8 @@
 #define POLLWRBAND	(1 << 9)
 #define POLLMSG		(1 << 10)
 #define POLLREMOVE	(1 << 11)
+#define POLLRDHUP       (1 << 12)
+
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-arm/poll.h b/include/asm-arm/poll.h
index 2744ca831f5d..5030b2b232a3 100644
--- a/include/asm-arm/poll.h
+++ b/include/asm-arm/poll.h
@@ -16,6 +16,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-arm26/poll.h b/include/asm-arm26/poll.h
index fdfdab064a65..9ccb7f4190ca 100644
--- a/include/asm-arm26/poll.h
+++ b/include/asm-arm26/poll.h
@@ -15,6 +15,7 @@
 #define POLLWRNORM	0x0100
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-cris/poll.h b/include/asm-cris/poll.h
index 1c0efc3e4be7..1b25d4cf498c 100644
--- a/include/asm-cris/poll.h
+++ b/include/asm-cris/poll.h
@@ -15,6 +15,7 @@
 #define POLLWRBAND	512
 #define POLLMSG		1024
 #define POLLREMOVE	4096
+#define POLLRDHUP       8192
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-frv/poll.h b/include/asm-frv/poll.h
index 8cbcd60e334f..c8fe8801d075 100644
--- a/include/asm-frv/poll.h
+++ b/include/asm-frv/poll.h
@@ -12,6 +12,7 @@
 #define POLLRDBAND	128
 #define POLLWRBAND	256
 #define POLLMSG		0x0400
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-h8300/poll.h b/include/asm-h8300/poll.h
index bf49ab8ad6da..fc52103b276a 100644
--- a/include/asm-h8300/poll.h
+++ b/include/asm-h8300/poll.h
@@ -12,6 +12,7 @@
 #define POLLRDBAND	128
 #define POLLWRBAND	256
 #define POLLMSG		0x0400
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-i386/poll.h b/include/asm-i386/poll.h
index aecc80a15d36..2cd4929abd40 100644
--- a/include/asm-i386/poll.h
+++ b/include/asm-i386/poll.h
@@ -16,6 +16,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-ia64/poll.h b/include/asm-ia64/poll.h
index 160258a0528d..bcaf9f140242 100644
--- a/include/asm-ia64/poll.h
+++ b/include/asm-ia64/poll.h
@@ -21,6 +21,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-m32r/poll.h b/include/asm-m32r/poll.h
index 43b7acf732d5..9e0e700e727c 100644
--- a/include/asm-m32r/poll.h
+++ b/include/asm-m32r/poll.h
@@ -21,6 +21,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-m68k/poll.h b/include/asm-m68k/poll.h
index c4b69c4a87e1..0fb8843647f8 100644
--- a/include/asm-m68k/poll.h
+++ b/include/asm-m68k/poll.h
@@ -13,6 +13,7 @@
 #define POLLWRBAND	256
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-mips/poll.h b/include/asm-mips/poll.h
index a000f1f789e3..70881f8c5c50 100644
--- a/include/asm-mips/poll.h
+++ b/include/asm-mips/poll.h
@@ -17,6 +17,7 @@
 /* These seem to be more or less nonstandard ...  */
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-parisc/poll.h b/include/asm-parisc/poll.h
index 1c1da86934cf..20e4d03c74cb 100644
--- a/include/asm-parisc/poll.h
+++ b/include/asm-parisc/poll.h
@@ -16,6 +16,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-powerpc/poll.h b/include/asm-powerpc/poll.h
index edd2054da86b..9c7d12631033 100644
--- a/include/asm-powerpc/poll.h
+++ b/include/asm-powerpc/poll.h
@@ -13,6 +13,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-s390/poll.h b/include/asm-s390/poll.h
index e90a5ca42061..6f7f65ac7d27 100644
--- a/include/asm-s390/poll.h
+++ b/include/asm-s390/poll.h
@@ -24,6 +24,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-sh/poll.h b/include/asm-sh/poll.h
index 52f95b9188dc..dbca9b32f4a6 100644
--- a/include/asm-sh/poll.h
+++ b/include/asm-sh/poll.h
@@ -16,6 +16,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-sh64/poll.h b/include/asm-sh64/poll.h
index a420d14eb704..3a6cbad08d28 100644
--- a/include/asm-sh64/poll.h
+++ b/include/asm-sh64/poll.h
@@ -26,6 +26,7 @@
 #define POLLWRNORM	0x0100
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-sparc/poll.h b/include/asm-sparc/poll.h
index 3ddcc6481f09..26f13fb35497 100644
--- a/include/asm-sparc/poll.h
+++ b/include/asm-sparc/poll.h
@@ -13,6 +13,7 @@
 #define POLLWRBAND	256
 #define POLLMSG		512
 #define POLLREMOVE	1024
+#define POLLRDHUP       2048
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-sparc64/poll.h b/include/asm-sparc64/poll.h
index 31b611aa7468..ab6b0d1bb4ad 100644
--- a/include/asm-sparc64/poll.h
+++ b/include/asm-sparc64/poll.h
@@ -13,6 +13,7 @@
 #define POLLWRBAND	256
 #define POLLMSG		512
 #define POLLREMOVE	1024
+#define POLLRDHUP       2048
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-v850/poll.h b/include/asm-v850/poll.h
index 0369562c7e15..c10176c2c28f 100644
--- a/include/asm-v850/poll.h
+++ b/include/asm-v850/poll.h
@@ -13,6 +13,7 @@
 #define POLLWRBAND	0x0100
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-x86_64/poll.h b/include/asm-x86_64/poll.h
index c43cbba31913..c0475a9d8bb8 100644
--- a/include/asm-x86_64/poll.h
+++ b/include/asm-x86_64/poll.h
@@ -16,6 +16,7 @@
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x1000
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/include/asm-xtensa/poll.h b/include/asm-xtensa/poll.h
index dffe447534e0..6fd94773e866 100644
--- a/include/asm-xtensa/poll.h
+++ b/include/asm-xtensa/poll.h
@@ -27,6 +27,7 @@
 
 #define POLLMSG		0x0400
 #define POLLREMOVE	0x0800
+#define POLLRDHUP       0x2000
 
 struct pollfd {
 	int fd;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index fb031fe9be9e..469eda0f0dfd 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -238,6 +238,9 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
 
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
+
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b8ce6bf81188..aecddcc30401 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -500,6 +500,8 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d4b293e16283..1ff7328b0e17 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -350,7 +350,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 		mask |= POLLHUP;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= POLLIN | POLLRDNORM;
+		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
 	/* Connected? */
 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4b0272c92d66..19ea5c0b094b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -365,7 +365,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
 		mask |= POLLHUP;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= POLLIN | POLLRDNORM;
+		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
 	/* Connected? */
 	if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0ea947eb6813..b6e4b89539b3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4894,6 +4894,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	/* Is there any exceptional events?  */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2b4cc2eea5b3..d901465ce013 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1878,6 +1878,8 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 		mask |= POLLERR;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
-- 
cgit v1.2.3


From 11b8448751ba114416c63899638a8e473ebd21e7 Mon Sep 17 00:00:00 2001
From: Denis Vlasenko <vda@ilport.com.ua>
Date: Sat, 25 Mar 2006 03:07:42 -0800
Subject: [PATCH] fix messages in fs/minix

Believe it or not, but in fs/minix/*, the oldest filesystem in the kernel,
something still can be fixed:

	printk("new_inode: bit already set");

"\n" is missing!

While at it, I also removed periods from the end of error messages and made
capitalization uniform.  Also s/i-node/inode/, s/printk (/printk(/

Signed-ff-by: Denis Vlasenko <vda@ilport.com.ua>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/minix/bitmap.c   | 10 +++++-----
 fs/minix/inode.c    | 26 +++++++++++++-------------
 fs/minix/itree_v1.c |  4 ++--
 fs/minix/itree_v2.c |  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index dc6a4e4abcdc..4a6abc49418e 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -56,7 +56,7 @@ void minix_free_block(struct inode * inode, int block)
 	unsigned int bit,zone;
 
 	if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) {
-		printk("trying to free block not in datazone\n");
+		printk("Trying to free block not in datazone\n");
 		return;
 	}
 	zone = block - sbi->s_firstdatazone + 1;
@@ -124,7 +124,7 @@ minix_V1_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -149,7 +149,7 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX2_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -204,7 +204,7 @@ void minix_free_inode(struct inode * inode)
 	bh = sbi->s_imap[ino >> 13];
 	lock_kernel();
 	if (!minix_test_and_clear_bit(ino & 8191, bh->b_data))
-		printk("minix_free_inode: bit %lu already cleared.\n", ino);
+		printk("minix_free_inode: bit %lu already cleared\n", ino);
 	unlock_kernel();
 	mark_buffer_dirty(bh);
  out:
@@ -238,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
 		return NULL;
 	}
 	if (minix_test_and_set_bit(j,bh->b_data)) {	/* shouldn't happen */
-		printk("new_inode: bit already set");
+		printk("new_inode: bit already set\n");
 		unlock_kernel();
 		iput(inode);
 		return NULL;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d9ffc43fee59..2dcccf1d1b7f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -127,11 +127,11 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
 		mark_buffer_dirty(sbi->s_sbh);
 
 		if (!(sbi->s_mount_state & MINIX_VALID_FS))
-			printk ("MINIX-fs warning: remounting unchecked fs, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting unchecked fs, "
+				"running fsck is recommended\n");
 		else if ((sbi->s_mount_state & MINIX_ERROR_FS))
-			printk ("MINIX-fs warning: remounting fs with errors, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting fs with errors, "
+				"running fsck is recommended\n");
 	}
 	return 0;
 }
@@ -245,11 +245,11 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		mark_buffer_dirty(bh);
 	}
 	if (!(sbi->s_mount_state & MINIX_VALID_FS))
-		printk ("MINIX-fs: mounting unchecked file system, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting unchecked file system, "
+			"running fsck is recommended\n");
  	else if (sbi->s_mount_state & MINIX_ERROR_FS)
-		printk ("MINIX-fs: mounting file system with errors, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting file system with errors, "
+			"running fsck is recommended\n");
 	return 0;
 
 out_iput:
@@ -273,19 +273,19 @@ out_no_bitmap:
 
 out_no_map:
 	if (!silent)
-		printk ("MINIX-fs: can't allocate map\n");
+		printk("MINIX-fs: can't allocate map\n");
 	goto out_release;
 
 out_no_fs:
 	if (!silent)
-		printk("VFS: Can't find a Minix or Minix V2 filesystem on device "
-		       "%s.\n", s->s_id);
+		printk("VFS: Can't find a Minix or Minix V2 filesystem "
+			"on device %s\n", s->s_id);
     out_release:
 	brelse(bh);
 	goto out;
 
 out_bad_hblock:
-	printk("MINIX-fs: blocksize too small for device.\n");
+	printk("MINIX-fs: blocksize too small for device\n");
 	goto out;
 
 out_bad_sb:
@@ -524,7 +524,7 @@ int minix_sync_inode(struct inode * inode)
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 		{
-			printk ("IO error syncing minix inode [%s:%08lx]\n",
+			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
 			err = -1;
 		}
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index ba06aef4aca1..656b1347a25b 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 512) {
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 3adc7675560f..9adcdc754e0f 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 256) {
-- 
cgit v1.2.3


From d25b9a1ff0741e71a46f37f45263b5ddcbc948c4 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sat, 25 Mar 2006 03:07:44 -0800
Subject: [PATCH] freeze_bdev() cleanup

freeze_bdev() uses a fsync_super() without sync_blockdev().  This patch
makes __fsync_super() and shares it.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6d77ce9f54e5..3b3ab5281920 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -160,12 +160,7 @@ int sync_blockdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(sync_blockdev);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
+static void __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	DQUOT_SYNC(sb);
@@ -177,7 +172,16 @@ int fsync_super(struct super_block *sb)
 		sb->s_op->sync_fs(sb, 1);
 	sync_blockdev(sb->s_bdev);
 	sync_inodes_sb(sb, 1);
+}
 
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	__fsync_super(sb);
 	return sync_blockdev(sb->s_bdev);
 }
 
@@ -216,19 +220,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		sync_inodes_sb(sb, 0);
-		DQUOT_SYNC(sb);
-
-		lock_super(sb);
-		if (sb->s_dirt && sb->s_op->write_super)
-			sb->s_op->write_super(sb);
-		unlock_super(sb);
-
-		if (sb->s_op->sync_fs)
-			sb->s_op->sync_fs(sb, 1);
-
-		sync_blockdev(sb->s_bdev);
-		sync_inodes_sb(sb, 1);
+		__fsync_super(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
-- 
cgit v1.2.3


From 4ffc84442572669727dc4fcd976582508eaf23e7 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sat, 25 Mar 2006 03:07:44 -0800
Subject: [PATCH] Move cond_resched() after iput() in sync_sb_inodes()

In here, I think the following order is more cache-friendly.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 785c7213a54f..f3fbe2d030f4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,8 +381,8 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 			list_move(&inode->i_list, &sb->s_dirty);
 		}
 		spin_unlock(&inode_lock);
-		cond_resched();
 		iput(inode);
+		cond_resched();
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0)
 			break;
-- 
cgit v1.2.3


From 2ab13460852e65c2ec0e77000baba5e859a6a2cf Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Sat, 25 Mar 2006 03:07:45 -0800
Subject: [PATCH] Reduce sched latency in shrink_dcache_sb()

This patch reduces scheduling latency in shrink_dcache_sb() noticed during
remounting of big partitions with many cached dentries.  The same latency
fix was applied to select_parent() long ago.

Signed-off-by: Denis Lunev <den@sw.ru>
Signed-off-by: Pavel Emelianov <xemul@sw.ru>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0f7ec12d65ff..939584648504 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -489,6 +489,7 @@ repeat:
 			continue;
 		}
 		prune_one_dentry(dentry);
+		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From c1f5a1944657ba6abe375e3bb2a3238a46849f70 Mon Sep 17 00:00:00 2001
From: Kirk True <kernel@kirkandsheila.com>
Date: Sat, 25 Mar 2006 03:07:47 -0800
Subject: [PATCH] ext3: Fix debug logging-only compilation error

When EXT3FS_DEBUG is #define-d, the compile breaks due to #include file
issues.

Signed-off-by: Kirk True <kernel@kirkandsheila.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/bitmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index cb16b4c5d5df..ce4f82b9e528 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,11 +7,11 @@
  * Universite Pierre et Marie Curie (Paris VI)
  */
 
-#ifdef EXT3FS_DEBUG
-
 #include <linux/buffer_head.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
 
-#include "ext3_fs.h"
+#ifdef EXT3FS_DEBUG
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
-- 
cgit v1.2.3


From 3cdc409c169c9f2155151eea82cb9868e4d62788 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Mar 2006 03:07:50 -0800
Subject: [PATCH] reiserfs/xattr_acl.c:reiserfs_get_acl(): make size an int

The Coverity checker wasn't happy seeing a size_t compared with -ENODATA
and -ENOSYS.

Since the only place where size is set is through the result of
reiserfs_xattr_get() which is an int, we could simply make size an int.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Cc: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/xattr_acl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index ab8894c3b9e5..58c418fbca2c 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -182,7 +182,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
 	struct posix_acl *acl, **p_acl;
-	size_t size;
+	int size;
 	int retval;
 	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
@@ -206,7 +206,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		return posix_acl_dup(*p_acl);
 
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
-	if ((int)size < 0) {
+	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
 			*p_acl = ERR_PTR(-ENODATA);
 			return NULL;
-- 
cgit v1.2.3


From d5ee4ea8334368b7d284a7d82855f6f16ba599b4 Mon Sep 17 00:00:00 2001
From: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Date: Sat, 25 Mar 2006 03:07:54 -0800
Subject: [PATCH] indirect_print_item() warning fix

fs/reiserfs/item_ops.c: In function 'indirect_print_item':
fs/reiserfs/item_ops.c:278: warning: 'num' may be used uninitialized in this function

(akpm: this is probably just gcc being dumb)

Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/item_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e237cd668e5b..7a88adbceef6 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -275,7 +275,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
 	int j;
 	__le32 *unp;
 	__u32 prev = INT_MAX;
-	int num;
+	int num = 0;
 
 	unp = (__le32 *) item;
 
-- 
cgit v1.2.3


From 7e53cac41da9ebb9be774220c1b2615182667c9d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 25 Mar 2006 03:07:57 -0800
Subject: [PATCH] Honour AOP_TRUNCATE_PAGE returns in page_symlink

As prepare_write, commit_write and readpage are allowed to return
AOP_TRUNCATE_PAGE, page_symlink should respond to them.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1baf1b06fe47..712dfc77793b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2627,16 +2627,27 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	int err = -ENOMEM;
 	char *kaddr;
 
+retry:
 	page = find_or_create_page(mapping, 0, gfp_mask);
 	if (!page)
 		goto fail;
 	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
 	if (err)
 		goto fail_map;
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, len-1);
 	kunmap_atomic(kaddr, KM_USER0);
-	mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
+	if (err)
+		goto fail_map;
 	/*
 	 * Notice that we are _not_ going to block here - end of page is
 	 * unmapped, so this will only try to map the rest of page, see
@@ -2646,7 +2657,8 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	 */
 	if (!PageUptodate(page)) {
 		err = mapping->a_ops->readpage(NULL, page);
-		wait_on_page_locked(page);
+		if (err != AOP_TRUNCATED_PAGE)
+			wait_on_page_locked(page);
 	} else {
 		unlock_page(page);
 	}
-- 
cgit v1.2.3


From 76c67de460b3d00b7ab8a96bb18f07ca47d65fba Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Sat, 25 Mar 2006 03:07:58 -0800
Subject: [PATCH] Ext2 flags shouldn't report "nogrpid"

If I mount ext2 "rw", I want it to say "rw", not "rw,nogrpid".

I caught this writing an automated regression test script for the busybox
mount command.  The symptom is
  /dev/loop0 on /images/ext2.dir type ext2 (rw,nogrpid)
instead of:
  /dev/loop0 on /images/ext2.dir type ext2 (rw)

The behavior was introduced by git commit
8fc2751beb0941966d3a97b26544e8585e428c08.

Signed-off-by: Rob Landley <rob@landley.net>
Cc: Mark Bellon <mbellon@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 268b73f5847c..7e30bae174ed 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -211,8 +211,6 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
 		seq_puts(seq, ",grpid");
-	else
-		seq_puts(seq, ",nogrpid");
 
 #if defined(CONFIG_QUOTA)
 	if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
-- 
cgit v1.2.3


From 57070d012cd425c3a71663528c56a436abd2d9da Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Sat, 25 Mar 2006 03:08:04 -0800
Subject: [PATCH] compat_sys_nfsservctl(): handle errors correctly

Correct some error handling on the compat version of the nfsservctl()
system.  It was detecting errors while copying in the arguments from user
space, but then attempting to use the arguments anyway.  This didn't seem
so good.

Signed-off-by: Peter Staubach <staubach@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 2a88477330fc..263990ae4096 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2170,9 +2170,12 @@ asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *
 
 	default:
 		err = -EINVAL;
-		goto done;
+		break;
 	}
 
+	if (err)
+		goto done;
+
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
 	/* The __user pointer casts are valid because of the set_fs() */
-- 
cgit v1.2.3


From 11b0b5abb2097a63c1081d9b7e825b987b227972 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <neukum@fachschaft.cup.uni-muenchen.de>
Date: Sat, 25 Mar 2006 03:08:13 -0800
Subject: [PATCH] use kzalloc and kcalloc in core fs code

Signed-off-by: Oliver Neukum <oliver@neukum.name>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c        | 3 +--
 fs/binfmt_elf.c | 3 +--
 fs/bio.c        | 7 ++-----
 fs/char_dev.c   | 7 ++-----
 fs/compat.c     | 3 +--
 fs/exec.c       | 3 +--
 fs/pipe.c       | 3 +--
 fs/super.c      | 3 +--
 8 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index aec2b1916d1b..e41e932ba489 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -122,10 +122,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 	info->nr = 0;
 	info->ring_pages = info->internal_pages;
 	if (nr_pages > AIO_RING_PAGES) {
-		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 		if (!info->ring_pages)
 			return -ENOMEM;
-		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
 	}
 
 	info->mmap_size = nr_pages * PAGE_SIZE;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c2eac2a50bd2..61c21e7dc95d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1465,12 +1465,11 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 		read_lock(&tasklist_lock);
 		do_each_thread(g,p)
 			if (current->mm == p->mm && current != p) {
-				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
+				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
 					read_unlock(&tasklist_lock);
 					goto cleanup;
 				}
-				memset(tmp, 0, sizeof(*tmp));
 				INIT_LIST_HEAD(&tmp->list);
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
diff --git a/fs/bio.c b/fs/bio.c
index 0a8c59cb68f5..73e664c01d30 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -636,12 +636,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
 		goto out;
 
-	memset(pages, 0, nr_pages * sizeof(struct page *));
-
 	for (i = 0; i < iov_count; i++) {
 		unsigned long uaddr = (unsigned long)iov[i].iov_base;
 		unsigned long len = iov[i].iov_len;
@@ -1186,12 +1184,11 @@ void bioset_free(struct bio_set *bs)
 
 struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
 {
-	struct bio_set *bs = kmalloc(sizeof(*bs), GFP_KERNEL);
+	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 
 	if (!bs)
 		return NULL;
 
-	memset(bs, 0, sizeof(*bs));
 	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
 			mempool_free_slab, bio_slab);
 
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 5c36345c9bf7..8c6eb04d31e2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -146,12 +146,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	int ret = 0;
 	int i;
 
-	cd = kmalloc(sizeof(struct char_device_struct), GFP_KERNEL);
+	cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
 	if (cd == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	memset(cd, 0, sizeof(struct char_device_struct));
-
 	mutex_lock(&chrdevs_lock);
 
 	/* temporary */
@@ -466,9 +464,8 @@ static struct kobj_type ktype_cdev_dynamic = {
 
 struct cdev *cdev_alloc(void)
 {
-	struct cdev *p = kmalloc(sizeof(struct cdev), GFP_KERNEL);
+	struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
 	if (p) {
-		memset(p, 0, sizeof(struct cdev));
 		p->kobj.ktype = &ktype_cdev_dynamic;
 		INIT_LIST_HEAD(&p->list);
 		kobject_init(&p->kobj);
diff --git a/fs/compat.c b/fs/compat.c
index 263990ae4096..ef5a0771592d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1476,10 +1476,9 @@ int compat_do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/fs/exec.c b/fs/exec.c
index d8c477a56257..995cba3c62b8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1143,10 +1143,9 @@ int do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/fs/pipe.c b/fs/pipe.c
index 8aada8e426f4..d976866a115b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -662,10 +662,9 @@ struct inode* pipe_new(struct inode* inode)
 {
 	struct pipe_inode_info *info;
 
-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!info)
 		goto fail_page;
-	memset(info, 0, sizeof(*info));
 	inode->i_pipe = info;
 
 	init_waitqueue_head(PIPE_WAIT(*inode));
diff --git a/fs/super.c b/fs/super.c
index 37554b876182..8743e9bbb297 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -55,11 +55,10 @@ DEFINE_SPINLOCK(sb_lock);
  */
 static struct super_block *alloc_super(void)
 {
-	struct super_block *s = kmalloc(sizeof(struct super_block),  GFP_USER);
+	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
 
 	if (s) {
-		memset(s, 0, sizeof(struct super_block));
 		if (security_sb_alloc(s)) {
 			kfree(s);
 			s = NULL;
-- 
cgit v1.2.3


From 0e6b3e5e97e2e8a25bcfc528dad94edf5220dfeb Mon Sep 17 00:00:00 2001
From: Phillip Susi <psusi@cfl.rr.com>
Date: Sat, 25 Mar 2006 03:08:14 -0800
Subject: [PATCH] udf: fix uid/gid options and add uid/gid=ignore and forget
 options

As Pekka Enberg pointed out, with the if still following the else, you can
still get a null uid written to the disk if you specify a default uid= without
uid=forget.  In other words, if the desktop user is uid=1000 and the mount
option uid=1000 is given ( which is done on ubuntu automatically and probably
other distributions that use hal ), then if any other user besides uid 1000
owns a file then a 0 will be written to the media as the owning uid instead.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/filesystems/udf.txt | 14 ++++++++++++++
 fs/udf/inode.c                    |  6 ++----
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
index e5213bc301f7..511b4230c053 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.txt
@@ -26,6 +26,20 @@ The following mount options are supported:
 	nostrict	Unset strict conformance
 	iocharset=	Set the NLS character set
 
+The uid= and gid= options need a bit more explaining.  They will accept a
+decimal numeric value which will be used as the default ID for that mount.
+They will also accept the string "ignore" and "forget".  For files on the disk
+that are owned by nobody ( -1 ), they will instead look as if they are owned
+by the default ID.  The ignore option causes the default ID to override all
+IDs on the disk, not just -1.  The forget option causes all IDs to be written
+to disk as -1, so when the media is later remounted, they will appear to be
+owned by whatever default ID it is mounted with at that time.
+
+For typical desktop use of removable media, you should set the ID to that
+of the interactively logged on user, and also specify both the forget and
+ignore options.  This way the interactive user will always see the files
+on the disk as belonging to him.
+
 The remaining are for debugging and disaster recovery:
 
 	novrs		Skip volume sequence recognition 
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index d04cff2273b6..81e0e8459af1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1341,13 +1341,11 @@ udf_update_inode(struct inode *inode, int do_sync)
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
 		fe->uid = cpu_to_le32(-1);
-	else if (inode->i_uid != UDF_SB(inode->i_sb)->s_uid)
-		fe->uid = cpu_to_le32(inode->i_uid);
+	else fe->uid = cpu_to_le32(inode->i_uid);
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
 		fe->gid = cpu_to_le32(-1);
-	else if (inode->i_gid != UDF_SB(inode->i_sb)->s_gid)
-		fe->gid = cpu_to_le32(inode->i_gid);
+	else fe->gid = cpu_to_le32(inode->i_gid);
 
 	udfperms =	((inode->i_mode & S_IRWXO)     ) |
 			((inode->i_mode & S_IRWXG) << 2) |
-- 
cgit v1.2.3


From 174e27c607cfa3ebb92934d28c0fdfcf5ce6c3af Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Sat, 25 Mar 2006 03:08:16 -0800
Subject: [PATCH] direct-io: bug fix in dio handling write error

There is a bug in direct-io on propagating write error up to the higher I/O
layer.  When performing an async ODIRECT write to a block device, if a
device error occurred (like media error or disk is pulled), the error code
is only propagated from device driver to the DIO layer.  The error code
stops at finished_one_bio().  The aysnc write, however, is supposedly have
a corresponding AIO event with appropriate return code (in this case -EIO).
 Application which waits on the async write event, will hang forever since
such AIO event is lost forever (if such app did not use the timeout option
in io_getevents call.  Regardless, an AIO event is lost).

The discovery of above bug leads to another discovery of potential race
window with dio->result.  The fundamental problem is that dio->result is
overloaded with dual use: an indicator of fall back path for partial dio
write, and an error indicator used in the I/O completion path.  In the
event of device error, the setting of -EIO to dio->result clashes with
value used to track partial write that activates the fall back path.

It was also pointed out that it is impossible to use dio->result to track
partial write and at the same time to track error returned from device
driver.  Because direct_io_work can only determines whether it is a partial
write at the end of io submission and in mid stream of those io submission,
a return code could be coming back from the driver.  Thus messing up all
the subsequent logic.

Proposed fix is to separating out error code returned by the IO completion
path from partial IO submit tracking.  A new variable is added to dio
structure specifically to track io error returned in the completion path.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Suparna Bhattacharya <suparna@in.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 27f3e787faca..235ed8d1f11e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -129,6 +129,7 @@ struct dio {
 	/* AIO related stuff */
 	struct kiocb *iocb;		/* kiocb */
 	int is_async;			/* is IO async ? */
+	int io_error;			/* IO error in completion path */
 	ssize_t result;                 /* IO result */
 };
 
@@ -250,6 +251,10 @@ static void finished_one_bio(struct dio *dio)
 			    ((offset + transferred) > dio->i_size))
 				transferred = dio->i_size - offset;
 
+			/* check for error in completion path */
+			if (dio->io_error)
+				transferred = dio->io_error;
+
 			dio_complete(dio, offset, transferred);
 
 			/* Complete AIO later if falling back to buffered i/o */
@@ -406,7 +411,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	int page_no;
 
 	if (!uptodate)
-		dio->result = -EIO;
+		dio->io_error = -EIO;
 
 	if (dio->is_async && dio->rw == READ) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
@@ -971,6 +976,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->next_block_for_io = -1;
 
 	dio->page_errors = 0;
+	dio->io_error = 0;
 	dio->result = 0;
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
-- 
cgit v1.2.3


From 55148548124e3e52e8921f1cb0e325111ef9cbb1 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Sat, 25 Mar 2006 03:08:22 -0800
Subject: [PATCH] remove needless check in binfmt_elf.c

Local variable i is unsigned int and thus cannot be negative.

(akpm: unsigneds shouldn't be called `i'.  This value cannot possibly be
negative anyway).

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 61c21e7dc95d..4349113881fb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1334,7 +1334,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
-	psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
+	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-- 
cgit v1.2.3


From 6cc6b1226b71132a1d6e95449d78e051f1f3b506 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Sat, 25 Mar 2006 03:08:23 -0800
Subject: [PATCH] remove needless check in fs/read_write.c

nr_segs is unsigned long and thus cannot be negative.  We checked against 0
few lines before.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index 3f7a1a62165f..34b1bf259efd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -470,7 +470,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	 * verify all the pointers
 	 */
 	ret = -EINVAL;
-	if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0))
+	if (nr_segs > UIO_MAXIOV)
 		goto out;
 	if (!file->f_op)
 		goto out;
-- 
cgit v1.2.3


From 1ad3dcc09c88c6e01d7624398c591ff3aee22fbe Mon Sep 17 00:00:00 2001
From: Luke Yang <luke.adi@gmail.com>
Date: Sat, 25 Mar 2006 03:08:24 -0800
Subject: [PATCH] flat binary loader doesn't check fd table full

In binfmt_flat.c, the flat binary loader should check file descriptor table
and install the fd on the file.

Convert the function to single-exit and fix this bug.

Signed-off-by: "Luke Yang" <luke.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_flat.c | 73 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 108d56bbd0d0..69f44dcdb0b4 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -36,6 +36,7 @@
 #include <linux/personality.h>
 #include <linux/init.h>
 #include <linux/flat.h>
+#include <linux/syscalls.h>
 
 #include <asm/byteorder.h>
 #include <asm/system.h>
@@ -426,6 +427,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 	int i, rev, relocs = 0;
 	loff_t fpos;
 	unsigned long start_code, end_code;
+	int ret;
+	int exec_fileno;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
 	inode = bprm->file->f_dentry->d_inode;
@@ -450,7 +453,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 		 */
 		if (strncmp(hdr->magic, "#!", 2))
 			printk("BINFMT_FLAT: bad header magic\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
@@ -458,14 +462,16 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 	if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
 		printk("BINFMT_FLAT: bad flat file version 0x%x (supported 0x%x and 0x%x)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 	
 	/* Don't allow old format executables to use shared libraries */
 	if (rev == OLD_FLAT_VERSION && id != 0) {
 		printk("BINFMT_FLAT: shared libraries are not available before rev 0x%x\n",
 				(int) FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	/*
@@ -478,7 +484,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 #ifndef CONFIG_BINFMT_ZFLAT
 	if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
 		printk("Support for ZFLAT executables is not enabled.\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 #endif
 
@@ -490,14 +497,27 @@ static int load_flat_file(struct linux_binprm * bprm,
 	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
-	if (data_len + bss_len > rlim)
-		return -ENOMEM;
+	if (data_len + bss_len > rlim) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* check file descriptor */
+	exec_fileno = get_unused_fd();
+	if (exec_fileno < 0) {
+		ret = -EMFILE;
+		goto err;
+	}
+	get_file(bprm->file);
+	fd_install(exec_fileno, bprm->file);
 
 	/* Flush all traces of the currently running executable */
 	if (id == 0) {
 		result = flush_old_exec(bprm);
-		if (result)
-			return result;
+		if (result) {
+			ret = result;
+			goto err_close;
+		}
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX);
@@ -527,7 +547,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			if (!textpos)
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to mmap process text, errno %d\n", (int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		down_write(&current->mm->mmap_sem);
@@ -542,7 +563,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to allocate RAM for process data, errno %d\n",
 					(int)-datapos);
 			do_munmap(current->mm, textpos, text_len);
-			return realdatastart;
+			ret = realdatastart;
+			goto err_close;
 		}
 		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
 
@@ -564,7 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read data+bss, errno %d\n", (int)-result);
 			do_munmap(current->mm, textpos, text_len);
 			do_munmap(current->mm, realdatastart, data_len + extra);
-			return result;
+			ret = result;
+			goto err_close;
 		}
 
 		reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
@@ -582,7 +605,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to allocate RAM for process text/data, errno %d\n",
 					(int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		realdatastart = textpos + ntohl(hdr->data_start);
@@ -627,7 +651,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read code+data+bss, errno %d\n",(int)-result);
 			do_munmap(current->mm, textpos, text_len + data_len + extra +
 				MAX_SHARED_LIBS * sizeof(unsigned long));
-			return result;
+			ret = result;
+			goto err_close;
 		}
 	}
 
@@ -690,8 +715,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			unsigned long addr;
 			if (*rp) {
 				addr = calc_reloc(*rp, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 				*rp = addr;
 			}
 		}
@@ -718,8 +745,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			relval = ntohl(reloc[i]);
 			addr = flat_get_relocate_addr(relval);
 			rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
-			if (rp == (unsigned long *)RELOC_FAILED)
-				return -ENOEXEC;
+			if (rp == (unsigned long *)RELOC_FAILED) {
+				ret = -ENOEXEC;
+				goto err_close;
+			}
 
 			/* Get the pointer's value.  */
 			addr = flat_get_addr_from_rp(rp, relval, flags);
@@ -731,8 +760,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 				if ((flags & FLAT_FLAG_GOTPIC) == 0)
 					addr = ntohl(addr);
 				addr = calc_reloc(addr, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 
 				/* Write back the relocated pointer.  */
 				flat_put_addr_at_rp(rp, addr, relval);
@@ -752,6 +783,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			stack_len);
 
 	return 0;
+err_close:
+	sys_close(exec_fileno);
+err:
+	return ret;
 }
 
 
-- 
cgit v1.2.3


From 913bd906019514579b3c7ec5ab9c463e89207a57 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 25 Mar 2006 16:29:09 +0100
Subject: [PATCH] x86_64: Increase the variability of the process stack on
 64bit architectures

8MB is not really very random, use 1GB (or more with larger page sizes)
instead.

Also use the low bits of the random generator output now instead of
throwing them away.

Only enabled on x86-64 right now. Other architectures need to add
a suitable STACK_RND_MASK

Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c          | 13 +++++++++----
 include/asm-x86_64/elf.h |  4 ++++
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4349113881fb..537893a16014 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -500,17 +500,22 @@ out:
 #define INTERPRETER_AOUT 1
 #define INTERPRETER_ELF 2
 
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
+#endif
 
 static unsigned long randomize_stack_top(unsigned long stack_top)
 {
 	unsigned int random_variable = 0;
 
-	if (current->flags & PF_RANDOMIZE)
-		random_variable = get_random_int() % (8*1024*1024);
+	if (current->flags & PF_RANDOMIZE) {
+		random_variable = get_random_int() & STACK_RND_MASK;
+		random_variable <<= PAGE_SHIFT;
+	}
 #ifdef CONFIG_STACK_GROWSUP
-	return PAGE_ALIGN(stack_top + random_variable);
+	return PAGE_ALIGN(stack_top) + random_variable;
 #else
-	return PAGE_ALIGN(stack_top - random_variable);
+	return PAGE_ALIGN(stack_top) - random_variable;
 #endif
 }
 
diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h
index 43862cd6a569..c98633af07d2 100644
--- a/include/asm-x86_64/elf.h
+++ b/include/asm-x86_64/elf.h
@@ -8,6 +8,7 @@
 #include <asm/ptrace.h>
 #include <asm/user.h>
 #include <asm/processor.h>
+#include <asm/compat.h>
 
 /* x86-64 relocation types */
 #define R_X86_64_NONE		0	/* No reloc */
@@ -157,6 +158,9 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
 #define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
+/* 1GB for 64bit, 8MB for 32bit */
+#define STACK_RND_MASK (is_compat_task() ? 0x7ff : 0x3fffff)
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From e827f92355e1eeec2d227d3bd3350d04042a011e Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:24:46 +0200
Subject: BUG_ON() Conversion in fs/buffer.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/buffer.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b3ab5281920..4342ab0ad99a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -796,8 +796,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 	if (!mapping->assoc_mapping) {
 		mapping->assoc_mapping = buffer_mapping;
 	} else {
-		if (mapping->assoc_mapping != buffer_mapping)
-			BUG();
+		BUG_ON(mapping->assoc_mapping != buffer_mapping);
 	}
 	if (list_empty(&bh->b_assoc_buffers)) {
 		spin_lock(&buffer_mapping->private_lock);
@@ -1114,8 +1113,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	if (!page)
 		return NULL;
 
-	if (!PageLocked(page))
-		BUG();
+	BUG_ON(!PageLocked(page));
 
 	if (page_has_buffers(page)) {
 		bh = page_buffers(page);
@@ -1522,8 +1520,7 @@ void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
 	bh->b_page = page;
-	if (offset >= PAGE_SIZE)
-		BUG();
+	BUG_ON(offset >= PAGE_SIZE);
 	if (PageHighMem(page))
 		/*
 		 * This catches illegal uses and preserves the offset:
-- 
cgit v1.2.3


From 28133c7b2b9bbdf8a8765a319e818c1652f38c1f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:25:39 +0200
Subject: BUG_ON() Conversion in fs/dcache.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/dcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 939584648504..0c78f05819df 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -798,7 +798,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	if (!list_empty(&entry->d_alias)) BUG();
+	BUG_ON(!list_empty(&entry->d_alias));
 	spin_lock(&dcache_lock);
 	if (inode)
 		list_add(&entry->d_alias, &inode->i_dentry);
-- 
cgit v1.2.3


From 4d4ef9abe34a472fbfc9ab75cfde0d58bc342c44 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:26:51 +0200
Subject: BUG_ON() Conversion in fs/hfs/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/hfs/bnode.c | 9 +++------
 fs/hfs/btree.c | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index a7a7d77f3fd3..1e44dcfe49c4 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -306,8 +306,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node)
 	for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)];
 	     *p && *p != node; p = &(*p)->next_hash)
 		;
-	if (!*p)
-		BUG();
+	BUG_ON(!*p);
 	*p = node->next_hash;
 	node->tree->node_hash_cnt--;
 }
@@ -415,8 +414,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
 	spin_lock(&tree->hash_lock);
 	node = hfs_bnode_findhash(tree, num);
 	spin_unlock(&tree->hash_lock);
-	if (node)
-		BUG();
+	BUG_ON(node);
 	node = __hfs_bnode_create(tree, num);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
@@ -459,8 +457,7 @@ void hfs_bnode_put(struct hfs_bnode *node)
 
 		dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n",
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
-		if (!atomic_read(&node->refcnt))
-			BUG();
+		BUG_ON(!atomic_read(&node->refcnt));
 		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
 			return;
 		for (i = 0; i < tree->pages_per_bnode; i++) {
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 7bb11edd1488..d20131ce4b95 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -36,8 +36,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
 	tree->inode = iget_locked(sb, id);
 	if (!tree->inode)
 		goto free_tree;
-	if (!(tree->inode->i_state & I_NEW))
-		BUG();
+	BUG_ON(!(tree->inode->i_state & I_NEW));
 	{
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 	HFS_I(tree->inode)->flags = 0;
-- 
cgit v1.2.3


From 309be53da60dc24b73f3f0bceab8f0707c05371f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:27:41 +0200
Subject: BUG_ON() Conversion in fs/ext2/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/ext2/dir.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index b3dbd716cd3a..0165388c425c 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -416,8 +416,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 
 	lock_page(page);
 	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-	if (err)
-		BUG();
+	BUG_ON(err);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type (de, inode);
 	err = ext2_commit_chunk(page, from, to);
@@ -554,8 +553,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 		from = (char*)pde - (char*)page_address(page);
 	lock_page(page);
 	err = mapping->a_ops->prepare_write(NULL, page, from, to);
-	if (err)
-		BUG();
+	BUG_ON(err);
 	if (pde)
 		pde->rec_len = cpu_to_le16(to-from);
 	dir->inode = 0;
-- 
cgit v1.2.3


From 64a07bd82ed526d813b64b0957543eef55bdf9c0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sun, 26 Mar 2006 01:36:55 -0800
Subject: [PATCH] protect remove_proc_entry

It has been discovered that the remove_proc_entry has a race in the removing
of entries in the proc file system that are siblings.  There's no protection
around the traversing and removing of elements that belong in the same
subdirectory.

This subdirectory list is protected in other areas by the BKL.  So the BKL was
at first used to protect this area too, but unfortunately, remove_proc_entry
may be called with spinlocks held.  The BKL may schedule, so this was not a
solution.

The final solution was to add a new global spin lock to protect this list,
called proc_subdir_lock.  This lock now protects the list in
remove_proc_entry, and I also went around looking for other areas that this
list is modified and added this protection there too.  Care must be taken
since these locations call several functions that may also schedule.

Since I don't see any location that these functions that modify the
subdirectory list are called by interrupts, the irqsave/restore versions of
the spin lock was _not_ used.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/generic.c       | 32 +++++++++++++++++++++++++++++---
 fs/proc/proc_devtree.c  |  2 ++
 include/linux/proc_fs.h |  3 +++
 3 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 20e5c4509a43..47b7a20d45eb 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -19,6 +19,7 @@
 #include <linux/idr.h>
 #include <linux/namei.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -29,6 +30,8 @@ static ssize_t proc_file_write(struct file *file, const char __user *buffer,
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
+DEFINE_SPINLOCK(proc_subdir_lock);
+
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
 	if (de->namelen != len)
@@ -277,7 +280,9 @@ static int xlate_proc_name(const char *name,
 	const char     		*cp = name, *next;
 	struct proc_dir_entry	*de;
 	int			len;
+	int 			rtn = 0;
 
+	spin_lock(&proc_subdir_lock);
 	de = &proc_root;
 	while (1) {
 		next = strchr(cp, '/');
@@ -289,13 +294,17 @@ static int xlate_proc_name(const char *name,
 			if (proc_match(len, cp, de))
 				break;
 		}
-		if (!de)
-			return -ENOENT;
+		if (!de) {
+			rtn = -ENOENT;
+			goto out;
+		}
 		cp += len + 1;
 	}
 	*residual = cp;
 	*ret = de;
-	return 0;
+out:
+	spin_unlock(&proc_subdir_lock);
+	return rtn;
 }
 
 static DEFINE_IDR(proc_inum_idr);
@@ -380,6 +389,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 	int error = -ENOENT;
 
 	lock_kernel();
+	spin_lock(&proc_subdir_lock);
 	de = PDE(dir);
 	if (de) {
 		for (de = de->subdir; de ; de = de->next) {
@@ -388,12 +398,15 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
 				unsigned int ino = de->low_ino;
 
+				spin_unlock(&proc_subdir_lock);
 				error = -EINVAL;
 				inode = proc_get_inode(dir->i_sb, ino, de);
+				spin_lock(&proc_subdir_lock);
 				break;
 			}
 		}
 	}
+	spin_unlock(&proc_subdir_lock);
 	unlock_kernel();
 
 	if (inode) {
@@ -447,11 +460,13 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
+			spin_lock(&proc_subdir_lock);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
 				if (!de) {
 					ret = 1;
+					spin_unlock(&proc_subdir_lock);
 					goto out;
 				}
 				if (!i)
@@ -461,12 +476,16 @@ int proc_readdir(struct file * filp,
 			}
 
 			do {
+				/* filldir passes info to user space */
+				spin_unlock(&proc_subdir_lock);
 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
 					    de->low_ino, de->mode >> 12) < 0)
 					goto out;
+				spin_lock(&proc_subdir_lock);
 				filp->f_pos++;
 				de = de->next;
 			} while (de);
+			spin_unlock(&proc_subdir_lock);
 	}
 	ret = 1;
 out:	unlock_kernel();
@@ -500,9 +519,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 	if (i == 0)
 		return -EAGAIN;
 	dp->low_ino = i;
+
+	spin_lock(&proc_subdir_lock);
 	dp->next = dir->subdir;
 	dp->parent = dir;
 	dir->subdir = dp;
+	spin_unlock(&proc_subdir_lock);
+
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
 			dp->proc_fops = &proc_dir_operations;
@@ -694,6 +717,8 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 		goto out;
 	len = strlen(fn);
+
+	spin_lock(&proc_subdir_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
 			continue;
@@ -714,6 +739,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 		}
 		break;
 	}
+	spin_unlock(&proc_subdir_lock);
 out:
 	return;
 }
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 9bdd077d6f55..596b4b4f1cc8 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -136,9 +136,11 @@ void proc_device_tree_add_node(struct device_node *np,
 		 * properties are quite unimportant for us though, thus we
 		 * simply "skip" them here, but we do have to check.
 		 */
+		spin_lock(&proc_subdir_lock);
 		for (ent = de->subdir; ent != NULL; ent = ent->next)
 			if (!strcmp(ent->name, pp->name))
 				break;
+		spin_unlock(&proc_subdir_lock);
 		if (ent != NULL) {
 			printk(KERN_WARNING "device-tree: property \"%s\" name"
 			       " conflicts with node in %s\n", pp->name,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index aa6322d45198..6b12b0f661b4 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -4,6 +4,7 @@
 #include <linux/config.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/spinlock.h>
 #include <asm/atomic.h>
 
 /*
@@ -92,6 +93,8 @@ extern struct proc_dir_entry *proc_bus;
 extern struct proc_dir_entry *proc_root_driver;
 extern struct proc_dir_entry *proc_root_kcore;
 
+extern spinlock_t proc_subdir_lock;
+
 extern void proc_root_init(void);
 extern void proc_misc_init(void);
 
-- 
cgit v1.2.3


From 353ab6e97b8f209dbecc9f650f1f84e3da2a7bb1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 26 Mar 2006 01:37:12 -0800
Subject: [PATCH] sem2mutex: fs/

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Cc: Robert Love <rml@tech9.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/inode.c     | 31 ++++++++++++++++---------------
 fs/dcookies.c         | 19 ++++++++++---------
 fs/jffs2/compr_zlib.c | 19 ++++++++++---------
 fs/jfs/jfs_logmgr.c   | 27 ++++++++++++++-------------
 fs/lockd/host.c       | 19 ++++++++++---------
 fs/lockd/svc.c        | 17 +++++++++--------
 fs/lockd/svcsubs.c    | 17 +++++++++--------
 fs/nfs/callback.c     | 11 ++++++-----
 fs/nfsd/nfs4state.c   |  9 +++++----
 fs/partitions/devfs.c | 12 ++++++------
 fs/super.c            |  7 ++++---
 11 files changed, 99 insertions(+), 89 deletions(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 8ad52f5bf255..acc1b2c10a86 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cramfs_fs_sb.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/mutex.h>
 #include <asm/semaphore.h>
 
 #include <asm/uaccess.h>
@@ -31,7 +32,7 @@ static struct inode_operations cramfs_dir_inode_operations;
 static struct file_operations cramfs_directory_operations;
 static struct address_space_operations cramfs_aops;
 
-static DECLARE_MUTEX(read_mutex);
+static DEFINE_MUTEX(read_mutex);
 
 
 /* These two macros may change in future, to provide better st_ino
@@ -250,20 +251,20 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 	memset(sbi, 0, sizeof(struct cramfs_sb_info));
 
 	/* Invalidate the read buffers on mount: think disk change.. */
-	down(&read_mutex);
+	mutex_lock(&read_mutex);
 	for (i = 0; i < READ_BUFFERS; i++)
 		buffer_blocknr[i] = -1;
 
 	/* Read the first block and get the superblock from it */
 	memcpy(&super, cramfs_read(sb, 0, sizeof(super)), sizeof(super));
-	up(&read_mutex);
+	mutex_unlock(&read_mutex);
 
 	/* Do sanity checks on the superblock */
 	if (super.magic != CRAMFS_MAGIC) {
 		/* check at 512 byte offset */
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		memcpy(&super, cramfs_read(sb, 512, sizeof(super)), sizeof(super));
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		if (super.magic != CRAMFS_MAGIC) {
 			if (!silent)
 				printk(KERN_ERR "cramfs: wrong magic\n");
@@ -366,7 +367,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		mode_t mode;
 		int namelen, error;
 
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256);
 		name = (char *)(de+1);
 
@@ -379,7 +380,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		memcpy(buf, name, namelen);
 		ino = CRAMINO(de);
 		mode = de->mode;
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		nextoffset = offset + sizeof(*de) + namelen;
 		for (;;) {
 			if (!namelen) {
@@ -410,7 +411,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 	unsigned int offset = 0;
 	int sorted;
 
-	down(&read_mutex);
+	mutex_lock(&read_mutex);
 	sorted = CRAMFS_SB(dir->i_sb)->flags & CRAMFS_FLAG_SORTED_DIRS;
 	while (offset < dir->i_size) {
 		struct cramfs_inode *de;
@@ -433,7 +434,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 
 		for (;;) {
 			if (!namelen) {
-				up(&read_mutex);
+				mutex_unlock(&read_mutex);
 				return ERR_PTR(-EIO);
 			}
 			if (name[namelen-1])
@@ -447,7 +448,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 			continue;
 		if (!retval) {
 			struct cramfs_inode entry = *de;
-			up(&read_mutex);
+			mutex_unlock(&read_mutex);
 			d_add(dentry, get_cramfs_inode(dir->i_sb, &entry));
 			return NULL;
 		}
@@ -455,7 +456,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 		if (sorted)
 			break;
 	}
-	up(&read_mutex);
+	mutex_unlock(&read_mutex);
 	d_add(dentry, NULL);
 	return NULL;
 }
@@ -474,21 +475,21 @@ static int cramfs_readpage(struct file *file, struct page * page)
 		u32 start_offset, compr_len;
 
 		start_offset = OFFSET(inode) + maxblock*4;
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		if (page->index)
 			start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, 4);
 		compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - start_offset);
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		pgdata = kmap(page);
 		if (compr_len == 0)
 			; /* hole */
 		else {
-			down(&read_mutex);
+			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
 				 PAGE_CACHE_SIZE,
 				 cramfs_read(sb, start_offset, compr_len),
 				 compr_len);
-			up(&read_mutex);
+			mutex_unlock(&read_mutex);
 		}
 	} else
 		pgdata = kmap(page);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index f8274a8f83bd..ef758cfa5565 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/dcookies.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 /* The dcookies are allocated from a kmem_cache and
@@ -36,7 +37,7 @@ struct dcookie_struct {
 };
 
 static LIST_HEAD(dcookie_users);
-static DECLARE_MUTEX(dcookie_sem);
+static DEFINE_MUTEX(dcookie_mutex);
 static kmem_cache_t * dcookie_cache;
 static struct list_head * dcookie_hashtable;
 static size_t hash_size;
@@ -114,7 +115,7 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
 	int err = 0;
 	struct dcookie_struct * dcs;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	if (!is_live()) {
 		err = -EINVAL;
@@ -134,7 +135,7 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
 	*cookie = dcookie_value(dcs);
 
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return err;
 }
 
@@ -157,7 +158,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	if (!is_live()) {
 		err = -EINVAL;
@@ -192,7 +193,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 out_free:
 	kfree(kbuf);
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return err;
 }
 
@@ -290,7 +291,7 @@ struct dcookie_user * dcookie_register(void)
 {
 	struct dcookie_user * user;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
 	if (!user)
@@ -302,7 +303,7 @@ struct dcookie_user * dcookie_register(void)
 	list_add(&user->next, &dcookie_users);
 
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return user;
 out_free:
 	kfree(user);
@@ -313,7 +314,7 @@ out_free:
 
 void dcookie_unregister(struct dcookie_user * user)
 {
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	list_del(&user->next);
 	kfree(user);
@@ -321,7 +322,7 @@ void dcookie_unregister(struct dcookie_user * user)
 	if (!is_live())
 		dcookie_exit();
 
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 }
 
 EXPORT_SYMBOL_GPL(dcookie_register);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 4db8be8e90cc..5c63e0cdcf4c 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -33,13 +33,14 @@
 	*/
 #define STREAM_END_SPACE 12
 
-static DECLARE_MUTEX(deflate_sem);
-static DECLARE_MUTEX(inflate_sem);
+static DEFINE_MUTEX(deflate_mutex);
+static DEFINE_MUTEX(inflate_mutex);
 static z_stream inf_strm, def_strm;
 
 #ifdef __KERNEL__ /* Linux-only */
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 
 static int __init alloc_workspaces(void)
 {
@@ -79,11 +80,11 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 	if (*dstlen <= STREAM_END_SPACE)
 		return -1;
 
-	down(&deflate_sem);
+	mutex_lock(&deflate_mutex);
 
 	if (Z_OK != zlib_deflateInit(&def_strm, 3)) {
 		printk(KERN_WARNING "deflateInit failed\n");
-		up(&deflate_sem);
+		mutex_unlock(&deflate_mutex);
 		return -1;
 	}
 
@@ -104,7 +105,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 		if (ret != Z_OK) {
 			D1(printk(KERN_DEBUG "deflate in loop returned %d\n", ret));
 			zlib_deflateEnd(&def_strm);
-			up(&deflate_sem);
+			mutex_unlock(&deflate_mutex);
 			return -1;
 		}
 	}
@@ -133,7 +134,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 	*sourcelen = def_strm.total_in;
 	ret = 0;
  out:
-	up(&deflate_sem);
+	mutex_unlock(&deflate_mutex);
 	return ret;
 }
 
@@ -145,7 +146,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 	int ret;
 	int wbits = MAX_WBITS;
 
-	down(&inflate_sem);
+	mutex_lock(&inflate_mutex);
 
 	inf_strm.next_in = data_in;
 	inf_strm.avail_in = srclen;
@@ -173,7 +174,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 
 	if (Z_OK != zlib_inflateInit2(&inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
-		up(&inflate_sem);
+		mutex_unlock(&inflate_mutex);
 		return 1;
 	}
 
@@ -183,7 +184,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 		printk(KERN_NOTICE "inflate returned %d\n", ret);
 	}
 	zlib_inflateEnd(&inf_strm);
-	up(&inflate_sem);
+	mutex_unlock(&inflate_mutex);
         return 0;
 }
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0b348b13b551..3315f0b1fbc0 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -69,6 +69,7 @@
 #include <linux/bio.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -165,7 +166,7 @@ do {						\
  */
 static LIST_HEAD(jfs_external_logs);
 static struct jfs_log *dummy_log = NULL;
-static DECLARE_MUTEX(jfs_log_sem);
+static DEFINE_MUTEX(jfs_log_mutex);
 
 /*
  * forward references
@@ -1085,20 +1086,20 @@ int lmLogOpen(struct super_block *sb)
 	if (sbi->mntflag & JFS_INLINELOG)
 		return open_inline_log(sb);
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	list_for_each_entry(log, &jfs_external_logs, journal_list) {
 		if (log->bdev->bd_dev == sbi->logdev) {
 			if (memcmp(log->uuid, sbi->loguuid,
 				   sizeof(log->uuid))) {
 				jfs_warn("wrong uuid on JFS journal\n");
-				up(&jfs_log_sem);
+				mutex_unlock(&jfs_log_mutex);
 				return -EINVAL;
 			}
 			/*
 			 * add file system to log active file system list
 			 */
 			if ((rc = lmLogFileSystem(log, sbi, 1))) {
-				up(&jfs_log_sem);
+				mutex_unlock(&jfs_log_mutex);
 				return rc;
 			}
 			goto journal_found;
@@ -1106,7 +1107,7 @@ int lmLogOpen(struct super_block *sb)
 	}
 
 	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
-		up(&jfs_log_sem);
+		mutex_unlock(&jfs_log_mutex);
 		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&log->sb_list);
@@ -1151,7 +1152,7 @@ journal_found:
 	sbi->log = log;
 	LOG_UNLOCK(log);
 
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	return 0;
 
 	/*
@@ -1168,7 +1169,7 @@ journal_found:
 	blkdev_put(bdev);
 
       free:		/* free log descriptor */
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	kfree(log);
 
 	jfs_warn("lmLogOpen: exit(%d)", rc);
@@ -1212,11 +1213,11 @@ static int open_dummy_log(struct super_block *sb)
 {
 	int rc;
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	if (!dummy_log) {
 		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
 		if (!dummy_log) {
-			up(&jfs_log_sem);
+			mutex_unlock(&jfs_log_mutex);
 			return -ENOMEM;
 		}
 		INIT_LIST_HEAD(&dummy_log->sb_list);
@@ -1229,7 +1230,7 @@ static int open_dummy_log(struct super_block *sb)
 		if (rc) {
 			kfree(dummy_log);
 			dummy_log = NULL;
-			up(&jfs_log_sem);
+			mutex_unlock(&jfs_log_mutex);
 			return rc;
 		}
 	}
@@ -1238,7 +1239,7 @@ static int open_dummy_log(struct super_block *sb)
 	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
 	JFS_SBI(sb)->log = dummy_log;
 	LOG_UNLOCK(dummy_log);
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 
 	return 0;
 }
@@ -1466,7 +1467,7 @@ int lmLogClose(struct super_block *sb)
 
 	jfs_info("lmLogClose: log:0x%p", log);
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	LOG_LOCK(log);
 	list_del(&sbi->log_list);
 	LOG_UNLOCK(log);
@@ -1516,7 +1517,7 @@ int lmLogClose(struct super_block *sb)
 	kfree(log);
 
       out:
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	jfs_info("lmLogClose: exit(%d)", rc);
 	return rc;
 }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 112ebf8b8dfe..729ac427d359 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/sm_inter.h>
+#include <linux/mutex.h>
 
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
@@ -30,7 +31,7 @@
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
 static int			nrhosts;
-static DECLARE_MUTEX(nlm_host_sema);
+static DEFINE_MUTEX(nlm_host_mutex);
 
 
 static void			nlm_gc_hosts(void);
@@ -71,7 +72,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 	hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
 
 	/* Lock hash table */
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 
 	if (time_after_eq(jiffies, next_gc))
 		nlm_gc_hosts();
@@ -91,7 +92,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 				nlm_hosts[hash] = host;
 			}
 			nlm_get_host(host);
-			up(&nlm_host_sema);
+			mutex_unlock(&nlm_host_mutex);
 			return host;
 		}
 	}
@@ -130,7 +131,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 		next_gc = 0;
 
 nohost:
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 	return host;
 }
 
@@ -141,19 +142,19 @@ nlm_find_client(void)
 	 * and return it
 	 */
 	int hash;
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 	for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) {
 		struct nlm_host *host, **hp;
 		for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
 			if (host->h_server &&
 			    host->h_killed == 0) {
 				nlm_get_host(host);
-				up(&nlm_host_sema);
+				mutex_unlock(&nlm_host_mutex);
 				return host;
 			}
 		}
 	}
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 	return NULL;
 }
 
@@ -265,7 +266,7 @@ nlm_shutdown_hosts(void)
 	int		i;
 
 	dprintk("lockd: shutting down host module\n");
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 
 	/* First, make all hosts eligible for gc */
 	dprintk("lockd: nuking all hosts...\n");
@@ -276,7 +277,7 @@ nlm_shutdown_hosts(void)
 
 	/* Then, perform a garbage collection pass */
 	nlm_gc_hosts();
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 
 	/* complain if any hosts are left */
 	if (nrhosts) {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5e85bde6c123..fd56c8872f34 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <linux/mutex.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -43,13 +44,13 @@ static struct svc_program	nlmsvc_program;
 struct nlmsvc_binding *		nlmsvc_ops;
 EXPORT_SYMBOL(nlmsvc_ops);
 
-static DECLARE_MUTEX(nlmsvc_sema);
+static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int		nlmsvc_users;
 static pid_t			nlmsvc_pid;
 int				nlmsvc_grace_period;
 unsigned long			nlmsvc_timeout;
 
-static DECLARE_MUTEX_LOCKED(lockd_start);
+static DECLARE_COMPLETION(lockd_start_done);
 static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
 
 /*
@@ -112,7 +113,7 @@ lockd(struct svc_rqst *rqstp)
 	 * Let our maker know we're running.
 	 */
 	nlmsvc_pid = current->pid;
-	up(&lockd_start);
+	complete(&lockd_start_done);
 
 	daemonize("lockd");
 
@@ -215,7 +216,7 @@ lockd_up(void)
 	struct svc_serv *	serv;
 	int			error = 0;
 
-	down(&nlmsvc_sema);
+	mutex_lock(&nlmsvc_mutex);
 	/*
 	 * Unconditionally increment the user count ... this is
 	 * the number of clients who _want_ a lockd process.
@@ -263,7 +264,7 @@ lockd_up(void)
 			"lockd_up: create thread failed, error=%d\n", error);
 		goto destroy_and_out;
 	}
-	down(&lockd_start);
+	wait_for_completion(&lockd_start_done);
 
 	/*
 	 * Note: svc_serv structures have an initial use count of 1,
@@ -272,7 +273,7 @@ lockd_up(void)
 destroy_and_out:
 	svc_destroy(serv);
 out:
-	up(&nlmsvc_sema);
+	mutex_unlock(&nlmsvc_mutex);
 	return error;
 }
 EXPORT_SYMBOL(lockd_up);
@@ -285,7 +286,7 @@ lockd_down(void)
 {
 	static int warned;
 
-	down(&nlmsvc_sema);
+	mutex_lock(&nlmsvc_mutex);
 	if (nlmsvc_users) {
 		if (--nlmsvc_users)
 			goto out;
@@ -315,7 +316,7 @@ lockd_down(void)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 out:
-	up(&nlmsvc_sema);
+	mutex_unlock(&nlmsvc_mutex);
 }
 EXPORT_SYMBOL(lockd_down);
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c7a6e3ae44d6..a570e5c8a930 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/time.h>
 #include <linux/in.h>
+#include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfsd/nfsfh.h>
@@ -28,7 +29,7 @@
 #define FILE_HASH_BITS		5
 #define FILE_NRHASH		(1<<FILE_HASH_BITS)
 static struct nlm_file *	nlm_files[FILE_NRHASH];
-static DECLARE_MUTEX(nlm_file_sema);
+static DEFINE_MUTEX(nlm_file_mutex);
 
 #ifdef NFSD_DEBUG
 static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
@@ -91,7 +92,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 	hash = file_hash(f);
 
 	/* Lock file table */
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 
 	for (file = nlm_files[hash]; file; file = file->f_next)
 		if (!nfs_compare_fh(&file->f_handle, f))
@@ -130,7 +131,7 @@ found:
 	nfserr = 0;
 
 out_unlock:
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 	return nfserr;
 
 out_free:
@@ -239,14 +240,14 @@ nlm_traverse_files(struct nlm_host *host, int action)
 	struct nlm_file	*file, **fp;
 	int		i;
 
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
 		fp = nlm_files + i;
 		while ((file = *fp) != NULL) {
 			/* Traverse locks, blocks and shares of this file
 			 * and update file->f_locks count */
 			if (nlm_inspect_file(host, file, action)) {
-				up(&nlm_file_sema);
+				mutex_unlock(&nlm_file_mutex);
 				return 1;
 			}
 
@@ -261,7 +262,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
 			}
 		}
 	}
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 	return 0;
 }
 
@@ -281,7 +282,7 @@ nlm_release_file(struct nlm_file *file)
 				file, file->f_count);
 
 	/* Lock file table */
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 
 	/* If there are no more locks etc, delete the file */
 	if(--file->f_count == 0) {
@@ -289,7 +290,7 @@ nlm_release_file(struct nlm_file *file)
 			nlm_delete_file(file);
 	}
 
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 }
 
 /*
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 99d2cfbce863..90c95adc8c1b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
+#include <linux/mutex.h>
 
 #include <net/inet_sock.h>
 
@@ -31,7 +32,7 @@ struct nfs_callback_data {
 };
 
 static struct nfs_callback_data nfs_callback_info;
-static DECLARE_MUTEX(nfs_callback_sema);
+static DEFINE_MUTEX(nfs_callback_mutex);
 static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
@@ -95,7 +96,7 @@ int nfs_callback_up(void)
 	int ret = 0;
 
 	lock_kernel();
-	down(&nfs_callback_sema);
+	mutex_lock(&nfs_callback_mutex);
 	if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
 		goto out;
 	init_completion(&nfs_callback_info.started);
@@ -121,7 +122,7 @@ int nfs_callback_up(void)
 	nfs_callback_info.serv = serv;
 	wait_for_completion(&nfs_callback_info.started);
 out:
-	up(&nfs_callback_sema);
+	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
 	return ret;
 out_destroy:
@@ -139,7 +140,7 @@ int nfs_callback_down(void)
 	int ret = 0;
 
 	lock_kernel();
-	down(&nfs_callback_sema);
+	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
 	do {
 		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
@@ -147,7 +148,7 @@ int nfs_callback_down(void)
 		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
 			break;
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
-	up(&nfs_callback_sema);
+	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6ab762bea99..c7b87e92f91b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
+#include <linux/mutex.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -77,11 +78,11 @@ static void nfs4_set_recdir(char *recdir);
 
 /* Locking:
  *
- * client_sema: 
+ * client_mutex:
  * 	protects clientid_hashtbl[], clientstr_hashtbl[],
  * 	unconfstr_hashtbl[], uncofid_hashtbl[].
  */
-static DECLARE_MUTEX(client_sema);
+static DEFINE_MUTEX(client_mutex);
 
 static kmem_cache_t *stateowner_slab = NULL;
 static kmem_cache_t *file_slab = NULL;
@@ -91,13 +92,13 @@ static kmem_cache_t *deleg_slab = NULL;
 void
 nfs4_lock_state(void)
 {
-	down(&client_sema);
+	mutex_lock(&client_mutex);
 }
 
 void
 nfs4_unlock_state(void)
 {
-	up(&client_sema);
+	mutex_unlock(&client_mutex);
 }
 
 static inline u32
diff --git a/fs/partitions/devfs.c b/fs/partitions/devfs.c
index 87f50444fd39..3f0a780c9cec 100644
--- a/fs/partitions/devfs.c
+++ b/fs/partitions/devfs.c
@@ -6,7 +6,7 @@
 #include <linux/vmalloc.h>
 #include <linux/genhd.h>
 #include <linux/bitops.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 
 struct unique_numspace {
@@ -16,7 +16,7 @@ struct unique_numspace {
 	struct semaphore  mutex;
 };
 
-static DECLARE_MUTEX(numspace_mutex);
+static DEFINE_MUTEX(numspace_mutex);
 
 static int expand_numspace(struct unique_numspace *s)
 {
@@ -48,7 +48,7 @@ static int alloc_unique_number(struct unique_numspace *s)
 {
 	int rval = 0;
 
-	down(&numspace_mutex);
+	mutex_lock(&numspace_mutex);
 	if (s->num_free < 1)
 		rval = expand_numspace(s);
 	if (!rval) {
@@ -56,7 +56,7 @@ static int alloc_unique_number(struct unique_numspace *s)
 		--s->num_free;
 		__set_bit(rval, s->bits);
 	}
-	up(&numspace_mutex);
+	mutex_unlock(&numspace_mutex);
 
 	return rval;
 }
@@ -66,11 +66,11 @@ static void dealloc_unique_number(struct unique_numspace *s, int number)
 	int old_val;
 
 	if (number >= 0) {
-		down(&numspace_mutex);
+		mutex_lock(&numspace_mutex);
 		old_val = __test_and_clear_bit(number, s->bits);
 		if (old_val)
 			++s->num_free;
-		up(&numspace_mutex);
+		mutex_unlock(&numspace_mutex);
 	}
 }
 
diff --git a/fs/super.c b/fs/super.c
index 8743e9bbb297..a66f66bb8049 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <linux/kobject.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 
@@ -380,9 +381,9 @@ restart:
 void sync_filesystems(int wait)
 {
 	struct super_block *sb;
-	static DECLARE_MUTEX(mutex);
+	static DEFINE_MUTEX(mutex);
 
-	down(&mutex);		/* Could be down_interruptible */
+	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
@@ -411,7 +412,7 @@ restart:
 			goto restart;
 	}
 	spin_unlock(&sb_lock);
-	up(&mutex);
+	mutex_unlock(&mutex);
 }
 
 /**
-- 
cgit v1.2.3


From 3978d7179d3849848df8a37dd0a5acc20bcb8750 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 26 Mar 2006 01:37:17 -0800
Subject: [PATCH] Make address_space_operations->sync_page return void

The only user ignores the return value, and the only instanace
(block_sync_page) always returns 0...

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 | 3 +--
 fs/cifs/file.c              | 6 ++++--
 include/linux/buffer_head.h | 2 +-
 include/linux/fs.h          | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b3ab5281920..0b9456fd074f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3007,7 +3007,7 @@ out:
 }
 EXPORT_SYMBOL(try_to_free_buffers);
 
-int block_sync_page(struct page *page)
+void block_sync_page(struct page *page)
 {
 	struct address_space *mapping;
 
@@ -3015,7 +3015,6 @@ int block_sync_page(struct page *page)
 	mapping = page_mapping(page);
 	if (mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, page);
-	return 0;
 }
 
 /*
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 165d67426381..fb49aef1f2ec 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1339,7 +1339,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return rc;
 }
 
-/* static int cifs_sync_page(struct page *page)
+/* static void cifs_sync_page(struct page *page)
 {
 	struct address_space *mapping;
 	struct inode *inode;
@@ -1353,16 +1353,18 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 		return 0;
 	inode = mapping->host;
 	if (!inode)
-		return 0; */
+		return; */
 
 /*	fill in rpages then 
 	result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
 
 /*	cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index));
 
+#if 0
 	if (rc < 0)
 		return rc;
 	return 0;
+#endif
 } */
 
 /*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 9f159baf153f..27dd97b3fce9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -200,7 +200,7 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 int generic_cont_expand(struct inode *inode, loff_t size);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
-int block_sync_page(struct page *);
+void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5adf32b90f36..972435d4df5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -350,7 +350,7 @@ struct writeback_control;
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
-	int (*sync_page)(struct page *);
+	void (*sync_page)(struct page *);
 
 	/* Write back some dirty pages from this mapping. */
 	int (*writepages)(struct address_space *, struct writeback_control *);
-- 
cgit v1.2.3


From 2ff28e22bdb8727fbc7d7889807bc5a73aae56c5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 26 Mar 2006 01:37:18 -0800
Subject: [PATCH] Make address_space_operations->invalidatepage return void

The return value of this function is never used, so let's be honest and
declare it as void.

Some places where invalidatepage returned 0, I have inserted comments
suggesting a BUG_ON.

[akpm@osdl.org: JBD BUG fix]
[akpm@osdl.org: rework for git-nfs]
[akpm@osdl.org: don't go BUG in block_invalidate_page()]
Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/file.c               |  6 +++---
 fs/buffer.c                 | 18 ++++++++----------
 fs/ext3/inode.c             |  4 ++--
 fs/jbd/transaction.c        | 13 +++++--------
 fs/jfs/jfs_metapage.c       |  7 +++----
 fs/nfs/file.c               |  3 +--
 fs/reiserfs/inode.c         |  8 +++++---
 fs/xfs/linux-2.6/xfs_aops.c |  4 ++--
 include/linux/buffer_head.h |  4 ++--
 include/linux/fs.h          |  2 +-
 include/linux/jbd.h         |  2 +-
 11 files changed, 33 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 150b19227922..7bb716887e29 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -28,7 +28,7 @@ static int afs_file_release(struct inode *inode, struct file *file);
 #endif
 
 static int afs_file_readpage(struct file *file, struct page *page);
-static int afs_file_invalidatepage(struct page *page, unsigned long offset);
+static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
 
 struct inode_operations afs_file_inode_operations = {
@@ -212,7 +212,7 @@ int afs_cache_get_page_cookie(struct page *page,
 /*
  * invalidate part or all of a page
  */
-static int afs_file_invalidatepage(struct page *page, unsigned long offset)
+static void afs_file_invalidatepage(struct page *page, unsigned long offset)
 {
 	int ret = 1;
 
@@ -238,11 +238,11 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset)
 			if (!PageWriteback(page))
 				ret = page->mapping->a_ops->releasepage(page,
 									0);
+			/* possibly should BUG_ON(!ret); - neilb */
 		}
 	}
 
 	_leave(" = %d", ret);
-	return ret;
 } /* end afs_file_invalidatepage() */
 
 /*****************************************************************************/
diff --git a/fs/buffer.c b/fs/buffer.c
index 0b9456fd074f..f25f58096428 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1593,11 +1593,10 @@ EXPORT_SYMBOL(try_to_release_page);
  * point.  Because the caller is about to free (and possibly reuse) those
  * blocks on-disk.
  */
-int block_invalidatepage(struct page *page, unsigned long offset)
+void block_invalidatepage(struct page *page, unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	unsigned int curr_off = 0;
-	int ret = 1;
 
 	BUG_ON(!PageLocked(page));
 	if (!page_has_buffers(page))
@@ -1624,19 +1623,18 @@ int block_invalidatepage(struct page *page, unsigned long offset)
 	 * so real IO is not possible anymore.
 	 */
 	if (offset == 0)
-		ret = try_to_release_page(page, 0);
+		try_to_release_page(page, 0);
 out:
-	return ret;
+	return;
 }
 EXPORT_SYMBOL(block_invalidatepage);
 
-int do_invalidatepage(struct page *page, unsigned long offset)
+void do_invalidatepage(struct page *page, unsigned long offset)
 {
-	int (*invalidatepage)(struct page *, unsigned long);
-	invalidatepage = page->mapping->a_ops->invalidatepage;
-	if (invalidatepage == NULL)
-		invalidatepage = block_invalidatepage;
-	return (*invalidatepage)(page, offset);
+	void (*invalidatepage)(struct page *, unsigned long);
+	invalidatepage = page->mapping->a_ops->invalidatepage ? :
+		block_invalidatepage;
+	(*invalidatepage)(page, offset);
 }
 
 /*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c361377e0a5..76e22c9c9c6c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1430,7 +1430,7 @@ ext3_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
 }
 
-static int ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext3_invalidatepage(struct page *page, unsigned long offset)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
@@ -1440,7 +1440,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
 	if (offset == 0)
 		ClearPageChecked(page);
 
-	return journal_invalidatepage(journal, page, offset);
+	journal_invalidatepage(journal, page, offset);
 }
 
 static int ext3_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ada31fa272e3..c609f5034fcd 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1873,16 +1873,15 @@ zap_buffer_unlocked:
 }
 
 /** 
- * int journal_invalidatepage() 
+ * void journal_invalidatepage()
  * @journal: journal to use for flush... 
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
  * Reap page buffers containing data after offset in page.
  *
- * Return non-zero if the page's buffers were successfully reaped.
  */
-int journal_invalidatepage(journal_t *journal, 
+void journal_invalidatepage(journal_t *journal,
 		      struct page *page, 
 		      unsigned long offset)
 {
@@ -1893,7 +1892,7 @@ int journal_invalidatepage(journal_t *journal,
 	if (!PageLocked(page))
 		BUG();
 	if (!page_has_buffers(page))
-		return 1;
+		return;
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be
@@ -1916,11 +1915,9 @@ int journal_invalidatepage(journal_t *journal,
 	} while (bh != head);
 
 	if (!offset) {
-		if (!may_free || !try_to_free_buffers(page))
-			return 0;
-		J_ASSERT(!page_has_buffers(page));
+		if (may_free && try_to_free_buffers(page))
+			J_ASSERT(!page_has_buffers(page));
 	}
-	return 1;
 }
 
 /* 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 5fbaeaadccd3..8508043849f3 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -578,14 +578,13 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 	return 0;
 }
 
-static int metapage_invalidatepage(struct page *page, unsigned long offset)
+static void metapage_invalidatepage(struct page *page, unsigned long offset)
 {
 	BUG_ON(offset);
 
-	if (PageWriteback(page))
-		return 0;
+	BUG_ON(PageWriteback(page));
 
-	return metapage_releasepage(page, 0);
+	metapage_releasepage(page, 0);
 }
 
 struct address_space_operations jfs_metapage_aops = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5263b2864a44..dee49a0cb995 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -318,10 +318,9 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 	return status;
 }
 
-static int nfs_invalidate_page(struct page *page, unsigned long offset)
+static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
 	/* FIXME: we really should cancel any unstarted writes on this page */
-	return 1;
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d60f6238c66a..62e18c19b446 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2793,7 +2793,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
 }
 
 /* clm -- taken from fs/buffer.c:block_invalidate_page */
-static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
+static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	struct inode *inode = page->mapping->host;
@@ -2832,10 +2832,12 @@ static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
 	 * The get_block cached value has been unconditionally invalidated,
 	 * so real IO is not possible anymore.
 	 */
-	if (!offset && ret)
+	if (!offset && ret) {
 		ret = try_to_release_page(page, 0);
+		/* maybe should BUG_ON(!ret); - neilb */
+	}
       out:
-	return ret;
+	return;
 }
 
 static int reiserfs_set_page_dirty(struct page *page)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 97fc056130eb..4f2476f188b0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1442,14 +1442,14 @@ xfs_vm_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
 }
 
-STATIC int
+STATIC void
 xfs_vm_invalidatepage(
 	struct page		*page,
 	unsigned long		offset)
 {
 	xfs_page_trace(XFS_INVALIDPAGE_ENTER,
 			page->mapping->host, page, offset);
-	return block_invalidatepage(page, offset);
+	block_invalidatepage(page, offset);
 }
 
 struct address_space_operations xfs_address_space_operations = {
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 27dd97b3fce9..da917ed096a3 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -189,8 +189,8 @@ extern int buffer_heads_over_limit;
  * address_spaces.
  */
 int try_to_release_page(struct page * page, gfp_t gfp_mask);
-int block_invalidatepage(struct page *page, unsigned long offset);
-int do_invalidatepage(struct page *page, unsigned long offset);
+void block_invalidatepage(struct page *page, unsigned long offset);
+void do_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 972435d4df5c..9674679525f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -369,7 +369,7 @@ struct address_space_operations {
 	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
-	int (*invalidatepage) (struct page *, unsigned long);
+	void (*invalidatepage) (struct page *, unsigned long);
 	int (*releasepage) (struct page *, gfp_t);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 4fc7dffd66ef..6a425e370cb3 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -895,7 +895,7 @@ extern int	 journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern void	 journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
-extern int	 journal_invalidatepage(journal_t *,
+extern void	 journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
 extern int	 journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int	 journal_stop(handle_t *);
-- 
cgit v1.2.3


From fa3536cc144c1298f2ed9416c33f3b77fa2cd37a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 26 Mar 2006 01:37:24 -0800
Subject: [PATCH] Use __read_mostly on some hot fs variables

I discovered on oprofile hunting on a SMP platform that dentry lookups were
slowed down because d_hash_mask, d_hash_shift and dentry_hashtable were in
a cache line that contained inodes_stat.  So each time inodes_stats is
changed by a cpu, other cpus have to refill their cache line.

This patch moves some variables to the __read_mostly section, in order to
avoid false sharing.  RCU dentry lookups can go full speed.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c       |  4 ++--
 fs/block_dev.c |  4 ++--
 fs/dcache.c    | 14 +++++++-------
 fs/dcookies.c  |  6 +++---
 fs/dnotify.c   |  4 ++--
 fs/eventpoll.c |  6 +++---
 fs/fcntl.c     |  4 ++--
 fs/inode.c     |  8 ++++----
 fs/inotify.c   | 12 ++++++------
 fs/locks.c     |  2 +-
 fs/namespace.c |  4 ++--
 fs/pipe.c      |  2 +-
 12 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 73e664c01d30..49db9286a3b4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab;
+static kmem_cache_t *bio_slab __read_mostly;
 
 #define BIOVEC_NR_POOLS 6
 
@@ -39,7 +39,7 @@ static kmem_cache_t *bio_slab;
  * basically we just need to survive
  */
 #define BIO_SPLIT_ENTRIES 8	
-mempool_t *bio_split_pool;
+mempool_t *bio_split_pool __read_mostly;
 
 struct biovec_slab {
 	int nr_vecs;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 573fc8e0b67a..9a451a9ffad4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -234,7 +234,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static kmem_cache_t * bdev_cachep;
+static kmem_cache_t * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
@@ -308,7 +308,7 @@ static struct file_system_type bd_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static struct vfsmount *bd_mnt;
+static struct vfsmount *bd_mnt __read_mostly;
 struct super_block *blockdev_superblock;
 
 void __init bdev_cache_init(void)
diff --git a/fs/dcache.c b/fs/dcache.c
index 939584648504..aaca5e7970bc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -36,7 +36,7 @@
 
 /* #define DCACHE_DEBUG 1 */
 
-int sysctl_vfs_cache_pressure = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
@@ -44,7 +44,7 @@ static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache; 
+static kmem_cache_t *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -59,9 +59,9 @@ static kmem_cache_t *dentry_cache;
 #define D_HASHBITS     d_hash_shift
 #define D_HASHMASK     d_hash_mask
 
-static unsigned int d_hash_mask;
-static unsigned int d_hash_shift;
-static struct hlist_head *dentry_hashtable;
+static unsigned int d_hash_mask __read_mostly;
+static unsigned int d_hash_shift __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
 static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
@@ -1719,10 +1719,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep;
+kmem_cache_t *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep;
+kmem_cache_t *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ef758cfa5565..8749339bf4f6 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -38,9 +38,9 @@ struct dcookie_struct {
 
 static LIST_HEAD(dcookie_users);
 static DEFINE_MUTEX(dcookie_mutex);
-static kmem_cache_t * dcookie_cache;
-static struct list_head * dcookie_hashtable;
-static size_t hash_size;
+static kmem_cache_t *dcookie_cache __read_mostly;
+static struct list_head *dcookie_hashtable __read_mostly;
+static size_t hash_size __read_mostly;
 
 static inline int is_live(void)
 {
diff --git a/fs/dnotify.c b/fs/dnotify.c
index f3b540dd5d11..f932591df5a4 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -21,9 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 
-int dir_notify_enable = 1;
+int dir_notify_enable __read_mostly = 1;
 
-static kmem_cache_t *dn_cache;
+static kmem_cache_t *dn_cache __read_mostly;
 
 static void redo_inode_mask(struct inode *inode)
 {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a0f682cdd03e..e067a06c6464 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -281,13 +281,13 @@ static struct mutex epmutex;
 static struct poll_safewake psw;
 
 /* Slab cache used to allocate "struct epitem" */
-static kmem_cache_t *epi_cache;
+static kmem_cache_t *epi_cache __read_mostly;
 
 /* Slab cache used to allocate "struct eppoll_entry" */
-static kmem_cache_t *pwq_cache;
+static kmem_cache_t *pwq_cache __read_mostly;
 
 /* Virtual fs used to allocate inodes for eventpoll files */
-static struct vfsmount *eventpoll_mnt;
+static struct vfsmount *eventpoll_mnt __read_mostly;
 
 /* File callbacks that implement the eventpoll file behaviour */
 static struct file_operations eventpoll_fops = {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 03c789560fb8..2a2479196f96 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -412,7 +412,7 @@ out:
 
 /* Table to convert sigio signal codes into poll band bitmaps */
 
-static long band_table[NSIGPOLL] = {
+static const long band_table[NSIGPOLL] = {
 	POLLIN | POLLRDNORM,			/* POLL_IN */
 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
@@ -531,7 +531,7 @@ int send_sigurg(struct fown_struct *fown)
 }
 
 static DEFINE_RWLOCK(fasync_lock);
-static kmem_cache_t *fasync_cache;
+static kmem_cache_t *fasync_cache __read_mostly;
 
 /*
  * fasync_helper() is used by some character device drivers (mainly mice)
diff --git a/fs/inode.c b/fs/inode.c
index 85da11044adc..1fddf2803af8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -56,8 +56,8 @@
 #define I_HASHBITS	i_hash_shift
 #define I_HASHMASK	i_hash_mask
 
-static unsigned int i_hash_mask;
-static unsigned int i_hash_shift;
+static unsigned int i_hash_mask __read_mostly;
+static unsigned int i_hash_shift __read_mostly;
 
 /*
  * Each inode can be on two separate lists. One is
@@ -73,7 +73,7 @@ static unsigned int i_hash_shift;
 
 LIST_HEAD(inode_in_use);
 LIST_HEAD(inode_unused);
-static struct hlist_head *inode_hashtable;
+static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
  * A simple spinlock to protect the list manipulations.
@@ -98,7 +98,7 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep;
+static kmem_cache_t * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
diff --git a/fs/inotify.c b/fs/inotify.c
index a61e93e17853..f48a3dae0712 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -39,15 +39,15 @@
 
 static atomic_t inotify_cookie;
 
-static kmem_cache_t *watch_cachep;
-static kmem_cache_t *event_cachep;
+static kmem_cache_t *watch_cachep __read_mostly;
+static kmem_cache_t *event_cachep __read_mostly;
 
-static struct vfsmount *inotify_mnt;
+static struct vfsmount *inotify_mnt __read_mostly;
 
 /* these are configurable via /proc/sys/fs/inotify/ */
-int inotify_max_user_instances;
-int inotify_max_user_watches;
-int inotify_max_queued_events;
+int inotify_max_user_instances __read_mostly;
+int inotify_max_user_watches __read_mostly;
+int inotify_max_queued_events __read_mostly;
 
 /*
  * Lock ordering:
diff --git a/fs/locks.c b/fs/locks.c
index 56f996e98bbc..709450a7b89d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,7 +142,7 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
-static kmem_cache_t *filelock_cache;
+static kmem_cache_t *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
diff --git a/fs/namespace.c b/fs/namespace.c
index 71e75bcf4d28..e069a4c5e389 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -43,9 +43,9 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
 static int event;
 
-static struct list_head *mount_hashtable;
+static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
-static kmem_cache_t *mnt_cache;
+static kmem_cache_t *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
 /* /sys/fs */
diff --git a/fs/pipe.c b/fs/pipe.c
index d976866a115b..4384c9290943 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -675,7 +675,7 @@ fail_page:
 	return NULL;
 }
 
-static struct vfsmount *pipe_mnt;
+static struct vfsmount *pipe_mnt __read_mostly;
 static int pipefs_delete_dentry(struct dentry *dentry)
 {
 	return 1;
-- 
cgit v1.2.3


From 6dc0fe8f8b40854982929e4f24d8c65115769b60 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Sun, 26 Mar 2006 01:37:24 -0800
Subject: [PATCH] VFS,fs/locks.c: cleanup locks_insert_block

BUG instead of handling a case that should never happen.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 709450a7b89d..4badf6a0e7b6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -533,12 +533,7 @@ static void locks_delete_block(struct file_lock *waiter)
 static void locks_insert_block(struct file_lock *blocker, 
 			       struct file_lock *waiter)
 {
-	if (!list_empty(&waiter->fl_block)) {
-		printk(KERN_ERR "locks_insert_block: removing duplicated lock "
-			"(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid,
-			waiter->fl_start, waiter->fl_end, waiter->fl_type);
-		__locks_delete_block(waiter);
-	}
+	BUG_ON(!list_empty(&waiter->fl_block));
 	list_add_tail(&waiter->fl_block, &blocker->fl_block);
 	waiter->fl_next = blocker;
 	if (IS_POSIX(blocker))
-- 
cgit v1.2.3


From 5842add2f3b519111b6401f3a35862bd00a3aa7e Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Sun, 26 Mar 2006 01:37:26 -0800
Subject: [PATCH] VFS,fs/locks.c,NFSD4: add race_free posix_lock_file_conf()
 interface

Lockd and the NFSv4 server both exercise a race condition where
posix_test_lock() is called either before or after posix_lock_file() to
deal with a denied lock request due to a conflicting lock.

Remove the race condition for the NFSv4 server by adding a new conflicting
lock parameter to __posix_lock_file() , changing the name to
__posix_lock_file_conf().

Keep posix_lock_file() interface, add posix_lock_conf() interface, both
call __posix_lock_file_conf().

[akpm@osdl.org: Put the EXPORT_SYMBOL() where it belongs]
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c         | 32 ++++++++++++++++++++++++--------
 include/linux/fs.h |  1 +
 2 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 4badf6a0e7b6..4d9e71d43e7e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -792,9 +792,7 @@ out:
 	return error;
 }
 
-EXPORT_SYMBOL(posix_lock_file);
-
-static int __posix_lock_file(struct inode *inode, struct file_lock *request)
+static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
 	struct file_lock *new_fl, *new_fl2;
@@ -818,6 +816,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 				continue;
 			if (!posix_locks_conflict(request, fl))
 				continue;
+			if (conflock)
+				locks_copy_lock(conflock, fl);
 			error = -EAGAIN;
 			if (!(request->fl_flags & FL_SLEEP))
 				goto out;
@@ -987,8 +987,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
  */
 int posix_lock_file(struct file *filp, struct file_lock *fl)
 {
-	return __posix_lock_file(filp->f_dentry->d_inode, fl);
+	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL);
+}
+EXPORT_SYMBOL(posix_lock_file);
+
+/**
+ * posix_lock_file_conf - Apply a POSIX-style lock to a file
+ * @filp: The file to apply the lock to
+ * @fl: The lock to be applied
+ * @conflock: Place to return a copy of the conflicting lock, if found.
+ *
+ * Except for the conflock parameter, acts just like posix_lock_file.
+ */
+int posix_lock_file_conf(struct file *filp, struct file_lock *fl,
+			struct file_lock *conflock)
+{
+	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock);
 }
+EXPORT_SYMBOL(posix_lock_file_conf);
 
 /**
  * posix_lock_file_wait - Apply a POSIX-style lock to a file
@@ -1004,7 +1020,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = __posix_lock_file(filp->f_dentry->d_inode, fl);
+		error = posix_lock_file(filp, fl);
 		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1076,7 +1092,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 	fl.fl_end = offset + count - 1;
 
 	for (;;) {
-		error = __posix_lock_file(inode, &fl);
+		error = __posix_lock_file_conf(inode, &fl, NULL);
 		if (error != -EAGAIN)
 			break;
 		if (!(fl.fl_flags & FL_SLEEP))
@@ -1689,7 +1705,7 @@ again:
 		error = filp->f_op->lock(filp, cmd, file_lock);
 	else {
 		for (;;) {
-			error = __posix_lock_file(inode, file_lock);
+			error = posix_lock_file(filp, file_lock);
 			if ((error != -EAGAIN) || (cmd == F_SETLK))
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
@@ -1832,7 +1848,7 @@ again:
 		error = filp->f_op->lock(filp, cmd, file_lock);
 	else {
 		for (;;) {
-			error = __posix_lock_file(inode, file_lock);
+			error = posix_lock_file(filp, file_lock);
 			if ((error != -EAGAIN) || (cmd == F_SETLK64))
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9674679525f9..ab67181a5a55 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -763,6 +763,7 @@ extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
 extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
+extern int posix_lock_file_conf(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
-- 
cgit v1.2.3


From eb76b3fda1f7c2aa2d1523b36835048a15e5e5d2 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Sun, 26 Mar 2006 01:37:26 -0800
Subject: [PATCH] NFSD4: return conflict lock without races

Update the NFSv4 server to use the new posix_lock_file_conf() interface.
Remove unnecessary (and race-prone) posix_test_file() calls.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c7b87e92f91b..47ec112b266c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2750,37 +2750,31 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	* Note: locks.c uses the BKL to protect the inode's lock list.
 	*/
 
-	status = posix_lock_file(filp, &file_lock);
-	dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
+	/* XXX?: Just to divert the locks_release_private at the start of
+	 * locks_copy_lock: */
+	conflock.fl_ops = NULL;
+	conflock.fl_lmops = NULL;
+	status = posix_lock_file_conf(filp, &file_lock, &conflock);
+	dprintk("NFSD: nfsd4_lock: posix_lock_file_conf status %d\n",status);
 	switch (-status) {
 	case 0: /* success! */
 		update_stateid(&lock_stp->st_stateid);
 		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 
 				sizeof(stateid_t));
-		goto out;
-	case (EAGAIN):
-		goto conflicting_lock;
+		break;
+	case (EAGAIN):		/* conflock holds conflicting lock */
+		status = nfserr_denied;
+		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
+		nfs4_set_lock_denied(&conflock, &lock->lk_denied);
+		break;
 	case (EDEADLK):
 		status = nfserr_deadlock;
-		dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
-		goto out;
+		break;
 	default:        
-		status = nfserrno(status);
-		dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
-		goto out;
-	}
-
-conflicting_lock:
-	dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
-	status = nfserr_denied;
-	/* XXX There is a race here. Future patch needed to provide 
-	 * an atomic posix_lock_and_test_file
-	 */
-	if (!posix_test_lock(filp, &file_lock, &conflock)) {
-		status = nfserr_serverfault;
-		goto out;
+		dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",status);
+		status = nfserr_resource;
+		break;
 	}
-	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
-- 
cgit v1.2.3


From 0eaae62abaa1ad1f231932b6cdd9fb1b91df6651 Mon Sep 17 00:00:00 2001
From: Matthew Dobson <colpatch@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:47 -0800
Subject: [PATCH] mempool: use common mempool kmalloc allocator

This patch changes several mempool users, all of which are basically just
wrappers around kmalloc(), to use the common mempool_kmalloc/kfree, rather
than their own wrapper function, removing a bunch of duplicated code.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/pktcdvd.c      | 26 +++----------------
 drivers/md/bitmap.c          | 14 ++---------
 drivers/md/dm-io.c           | 13 ++--------
 drivers/md/dm-raid1.c        | 14 ++---------
 drivers/s390/scsi/zfcp_aux.c | 60 +++++++++++++-------------------------------
 drivers/scsi/lpfc/lpfc_mem.c | 22 +++-------------
 fs/bio.c                     | 14 ++---------
 7 files changed, 34 insertions(+), 129 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 1d261f985f31..a04f60693c39 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -230,16 +230,6 @@ static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
 	return 1;
 }
 
-static void *pkt_rb_alloc(gfp_t gfp_mask, void *data)
-{
-	return kmalloc(sizeof(struct pkt_rb_node), gfp_mask);
-}
-
-static void pkt_rb_free(void *ptr, void *data)
-{
-	kfree(ptr);
-}
-
 static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
 {
 	struct rb_node *n = rb_next(&node->rb_node);
@@ -2073,16 +2063,6 @@ static int pkt_close(struct inode *inode, struct file *file)
 }
 
 
-static void *psd_pool_alloc(gfp_t gfp_mask, void *data)
-{
-	return kmalloc(sizeof(struct packet_stacked_data), gfp_mask);
-}
-
-static void psd_pool_free(void *ptr, void *data)
-{
-	kfree(ptr);
-}
-
 static int pkt_end_io_read_cloned(struct bio *bio, unsigned int bytes_done, int err)
 {
 	struct packet_stacked_data *psd = bio->bi_private;
@@ -2475,7 +2455,8 @@ static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd)
 	if (!pd)
 		return ret;
 
-	pd->rb_pool = mempool_create(PKT_RB_POOL_SIZE, pkt_rb_alloc, pkt_rb_free, NULL);
+	pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
+						  sizeof(struct pkt_rb_node));
 	if (!pd->rb_pool)
 		goto out_mem;
 
@@ -2639,7 +2620,8 @@ static int __init pkt_init(void)
 {
 	int ret;
 
-	psd_pool = mempool_create(PSD_POOL_SIZE, psd_pool_alloc, psd_pool_free, NULL);
+	psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
+					sizeof(struct packet_stacked_data));
 	if (!psd_pool)
 		return -ENOMEM;
 
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index e1c18aa1d712..f8ffaee20ff8 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -89,16 +89,6 @@ int bitmap_active(struct bitmap *bitmap)
 }
 
 #define WRITE_POOL_SIZE 256
-/* mempool for queueing pending writes on the bitmap file */
-static void *write_pool_alloc(gfp_t gfp_flags, void *data)
-{
-	return kmalloc(sizeof(struct page_list), gfp_flags);
-}
-
-static void write_pool_free(void *ptr, void *data)
-{
-	kfree(ptr);
-}
 
 /*
  * just a placeholder - calls kmalloc for bitmap pages
@@ -1564,8 +1554,8 @@ int bitmap_create(mddev_t *mddev)
 	spin_lock_init(&bitmap->write_lock);
 	INIT_LIST_HEAD(&bitmap->complete_pages);
 	init_waitqueue_head(&bitmap->write_wait);
-	bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc,
-				write_pool_free, NULL);
+	bitmap->write_pool = mempool_create_kmalloc_pool(WRITE_POOL_SIZE,
+						sizeof(struct page_list));
 	err = -ENOMEM;
 	if (!bitmap->write_pool)
 		goto error;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 4809b209fbb1..da663d2ff552 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -32,16 +32,6 @@ struct io {
 static unsigned _num_ios;
 static mempool_t *_io_pool;
 
-static void *alloc_io(gfp_t gfp_mask, void *pool_data)
-{
-	return kmalloc(sizeof(struct io), gfp_mask);
-}
-
-static void free_io(void *element, void *pool_data)
-{
-	kfree(element);
-}
-
 static unsigned int pages_to_ios(unsigned int pages)
 {
 	return 4 * pages;	/* too many ? */
@@ -65,7 +55,8 @@ static int resize_pool(unsigned int new_ios)
 
 	} else {
 		/* create new pool */
-		_io_pool = mempool_create(new_ios, alloc_io, free_io, NULL);
+		_io_pool = mempool_create_kmalloc_pool(new_ios,
+						       sizeof(struct io));
 		if (!_io_pool)
 			return -ENOMEM;
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6cfa8d435d55..4e90f231fbfb 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -122,16 +122,6 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
 /* FIXME move this */
 static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
 
-static void *region_alloc(gfp_t gfp_mask, void *pool_data)
-{
-	return kmalloc(sizeof(struct region), gfp_mask);
-}
-
-static void region_free(void *element, void *pool_data)
-{
-	kfree(element);
-}
-
 #define MIN_REGIONS 64
 #define MAX_RECOVERY 1
 static int rh_init(struct region_hash *rh, struct mirror_set *ms,
@@ -173,8 +163,8 @@ static int rh_init(struct region_hash *rh, struct mirror_set *ms,
 	INIT_LIST_HEAD(&rh->quiesced_regions);
 	INIT_LIST_HEAD(&rh->recovered_regions);
 
-	rh->region_pool = mempool_create(MIN_REGIONS, region_alloc,
-					 region_free, NULL);
+	rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
+						      sizeof(struct region));
 	if (!rh->region_pool) {
 		vfree(rh->buckets);
 		rh->buckets = NULL;
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 95b92f317b6f..395cfc6a344f 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -829,18 +829,6 @@ zfcp_unit_dequeue(struct zfcp_unit *unit)
 	device_unregister(&unit->sysfs_device);
 }
 
-static void *
-zfcp_mempool_alloc(gfp_t gfp_mask, void *size)
-{
-	return kmalloc((size_t) size, gfp_mask);
-}
-
-static void
-zfcp_mempool_free(void *element, void *size)
-{
-	kfree(element);
-}
-
 /*
  * Allocates a combined QTCB/fsf_req buffer for erp actions and fcp/SCSI
  * commands.
@@ -853,51 +841,39 @@ static int
 zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter)
 {
 	adapter->pool.fsf_req_erp =
-		mempool_create(ZFCP_POOL_FSF_REQ_ERP_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free, (void *)
-			       sizeof(struct zfcp_fsf_req_pool_element));
-
-	if (NULL == adapter->pool.fsf_req_erp)
+		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ERP_NR,
+				sizeof(struct zfcp_fsf_req_pool_element));
+	if (!adapter->pool.fsf_req_erp)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_scsi =
-		mempool_create(ZFCP_POOL_FSF_REQ_SCSI_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free, (void *)
-			       sizeof(struct zfcp_fsf_req_pool_element));
-
-	if (NULL == adapter->pool.fsf_req_scsi)
+		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_SCSI_NR,
+				sizeof(struct zfcp_fsf_req_pool_element));
+	if (!adapter->pool.fsf_req_scsi)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_abort =
-		mempool_create(ZFCP_POOL_FSF_REQ_ABORT_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free, (void *)
-			       sizeof(struct zfcp_fsf_req_pool_element));
-
-	if (NULL == adapter->pool.fsf_req_abort)
+		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ABORT_NR,
+				sizeof(struct zfcp_fsf_req_pool_element));
+	if (!adapter->pool.fsf_req_abort)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_status_read =
-		mempool_create(ZFCP_POOL_STATUS_READ_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free,
-			       (void *) sizeof(struct zfcp_fsf_req));
-
-	if (NULL == adapter->pool.fsf_req_status_read)
+		mempool_create_kmalloc_pool(ZFCP_POOL_STATUS_READ_NR,
+					    sizeof(struct zfcp_fsf_req));
+	if (!adapter->pool.fsf_req_status_read)
 		return -ENOMEM;
 
 	adapter->pool.data_status_read =
-		mempool_create(ZFCP_POOL_STATUS_READ_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free,
-			       (void *) sizeof(struct fsf_status_read_buffer));
-
-	if (NULL == adapter->pool.data_status_read)
+		mempool_create_kmalloc_pool(ZFCP_POOL_STATUS_READ_NR,
+					sizeof(struct fsf_status_read_buffer));
+	if (!adapter->pool.data_status_read)
 		return -ENOMEM;
 
 	adapter->pool.data_gid_pn =
-		mempool_create(ZFCP_POOL_DATA_GID_PN_NR,
-			       zfcp_mempool_alloc, zfcp_mempool_free, (void *)
-			       sizeof(struct zfcp_gid_pn_data));
-
-	if (NULL == adapter->pool.data_gid_pn)
+		mempool_create_kmalloc_pool(ZFCP_POOL_DATA_GID_PN_NR,
+					    sizeof(struct zfcp_gid_pn_data));
+	if (!adapter->pool.data_gid_pn)
 		return -ENOMEM;
 
 	return 0;
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 352df47bcaca..07017658ac56 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -38,18 +38,6 @@
 #define LPFC_MBUF_POOL_SIZE     64      /* max elements in MBUF safety pool */
 #define LPFC_MEM_POOL_SIZE      64      /* max elem in non-DMA safety pool */
 
-static void *
-lpfc_pool_kmalloc(gfp_t gfp_flags, void *data)
-{
-	return kmalloc((unsigned long)data, gfp_flags);
-}
-
-static void
-lpfc_pool_kfree(void *obj, void *data)
-{
-	kfree(obj);
-}
-
 int
 lpfc_mem_alloc(struct lpfc_hba * phba)
 {
@@ -79,15 +67,13 @@ lpfc_mem_alloc(struct lpfc_hba * phba)
 		pool->current_count++;
 	}
 
-	phba->mbox_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
-				lpfc_pool_kmalloc, lpfc_pool_kfree,
-				(void *)(unsigned long)sizeof(LPFC_MBOXQ_t));
+	phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE,
+							 sizeof(LPFC_MBOXQ_t));
 	if (!phba->mbox_mem_pool)
 		goto fail_free_mbuf_pool;
 
-	phba->nlp_mem_pool = mempool_create(LPFC_MEM_POOL_SIZE,
-			lpfc_pool_kmalloc, lpfc_pool_kfree,
-			(void *)(unsigned long)sizeof(struct lpfc_nodelist));
+	phba->nlp_mem_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE,
+						sizeof(struct lpfc_nodelist));
 	if (!phba->nlp_mem_pool)
 		goto fail_free_mbox_pool;
 
diff --git a/fs/bio.c b/fs/bio.c
index 49db9286a3b4..377046d82945 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1125,16 +1125,6 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	return bp;
 }
 
-static void *bio_pair_alloc(gfp_t gfp_flags, void *data)
-{
-	return kmalloc(sizeof(struct bio_pair), gfp_flags);
-}
-
-static void bio_pair_free(void *bp, void *data)
-{
-	kfree(bp);
-}
-
 
 /*
  * create memory pools for biovec's in a bio_set.
@@ -1254,8 +1244,8 @@ static int __init init_bio(void)
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
-	bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES,
-				bio_pair_alloc, bio_pair_free, NULL);
+	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
+						     sizeof(struct bio_pair));
 	if (!bio_split_pool)
 		panic("bio: can't create split pool\n");
 
-- 
cgit v1.2.3


From 93d2341c750cda0df48a6cc67b35fe25f1ec47df Mon Sep 17 00:00:00 2001
From: Matthew Dobson <colpatch@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:50 -0800
Subject: [PATCH] mempool: use mempool_create_slab_pool()

Modify well over a dozen mempool users to call mempool_create_slab_pool()
rather than calling mempool_create() with extra arguments, saving about 30
lines of code and increasing readability.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/cfq-iosched.c             |  2 +-
 drivers/block/aoe/aoeblk.c      |  4 +---
 drivers/md/dm-crypt.c           |  3 +--
 drivers/md/dm-mpath.c           |  3 +--
 drivers/md/dm-snap.c            |  3 +--
 drivers/md/dm.c                 |  6 ++----
 drivers/md/kcopyd.c             |  3 +--
 drivers/message/i2o/i2o_block.c |  7 +++----
 drivers/scsi/iscsi_tcp.c        |  4 ++--
 drivers/scsi/qla2xxx/qla_os.c   |  3 +--
 drivers/scsi/scsi_lib.c         |  5 ++---
 fs/bio.c                        |  7 ++-----
 fs/cifs/cifsfs.c                | 18 ++++++------------
 fs/jfs/jfs_metapage.c           |  4 ++--
 fs/nfs/read.c                   |  6 ++----
 fs/nfs/write.c                  | 12 ++++--------
 fs/xfs/linux-2.6/xfs_super.c    |  5 ++---
 include/linux/i2o.h             |  4 +---
 net/sunrpc/sched.c              | 12 ++++--------
 19 files changed, 39 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c4a0d5d8d7f0..bde40a6ae665 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2191,7 +2191,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 	if (!cfqd->cfq_hash)
 		goto out_cfqhash;
 
-	cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
+	cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
 	if (!cfqd->crq_pool)
 		goto out_crqpool;
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 32fea55fac48..393b86a3dbf8 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -211,9 +211,7 @@ aoeblk_gdalloc(void *vp)
 		return;
 	}
 
-	d->bufpool = mempool_create(MIN_BUFS,
-				    mempool_alloc_slab, mempool_free_slab,
-				    buf_pool_cache);
+	d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
 	if (d->bufpool == NULL) {
 		printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate bufpool "
 			"for %ld.%ld\n", d->aoemajor, d->aoeminor);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d88b8eda3903..259e86f26549 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -616,8 +616,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		}
 	}
 
-	cc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
-				     mempool_free_slab, _crypt_io_pool);
+	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
 	if (!cc->io_pool) {
 		ti->error = PFX "Cannot allocate crypt io mempool";
 		goto bad3;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f72a82fb9434..1816f30678ed 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -179,8 +179,7 @@ static struct multipath *alloc_multipath(void)
 		m->queue_io = 1;
 		INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
 		INIT_WORK(&m->trigger_event, trigger_event, m);
-		m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
-					      mempool_free_slab, _mpio_cache);
+		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
 		if (!m->mpio_pool) {
 			kfree(m);
 			return NULL;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f3759dd7828e..7401540086df 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1174,8 +1174,7 @@ static int __init dm_snapshot_init(void)
 		goto bad4;
 	}
 
-	pending_pool = mempool_create(128, mempool_alloc_slab,
-				      mempool_free_slab, pending_cache);
+	pending_pool = mempool_create_slab_pool(128, pending_cache);
 	if (!pending_pool) {
 		DMERR("Couldn't create pending pool.");
 		r = -ENOMEM;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8c82373f7ff3..a64798ef481e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -823,13 +823,11 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
 	md->queue->unplug_fn = dm_unplug_all;
 	md->queue->issue_flush_fn = dm_flush_all;
 
-	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
-				     mempool_free_slab, _io_cache);
+	md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
  	if (!md->io_pool)
  		goto bad2;
 
-	md->tio_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
-				      mempool_free_slab, _tio_cache);
+	md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
 	if (!md->tio_pool)
 		goto bad3;
 
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 0d54e8b7d9de..9dcb2c8a3853 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -227,8 +227,7 @@ static int jobs_init(void)
 	if (!_job_cache)
 		return -ENOMEM;
 
-	_job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
-				   mempool_free_slab, _job_cache);
+	_job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 	if (!_job_pool) {
 		kmem_cache_destroy(_job_cache);
 		return -ENOMEM;
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index b09fb6307153..7d4c5497785b 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1179,10 +1179,9 @@ static int __init i2o_block_init(void)
 		goto exit;
 	}
 
-	i2o_blk_req_pool.pool = mempool_create(I2O_BLOCK_REQ_MEMPOOL_SIZE,
-					       mempool_alloc_slab,
-					       mempool_free_slab,
-					       i2o_blk_req_pool.slab);
+	i2o_blk_req_pool.pool =
+		mempool_create_slab_pool(I2O_BLOCK_REQ_MEMPOOL_SIZE,
+					 i2o_blk_req_pool.slab);
 	if (!i2o_blk_req_pool.pool) {
 		osm_err("can't init request mempool\n");
 		rc = -ENOMEM;
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 7b82ff090d42..2068b66822b7 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -3200,8 +3200,8 @@ iscsi_r2tpool_alloc(struct iscsi_session *session)
 		 * Data-Out PDU's within R2T-sequence can be quite big;
 		 * using mempool
 		 */
-		ctask->datapool = mempool_create(ISCSI_DTASK_DEFAULT_MAX,
-			 mempool_alloc_slab, mempool_free_slab, taskcache);
+		ctask->datapool = mempool_create_slab_pool(ISCSI_DTASK_DEFAULT_MAX,
+							   taskcache);
 		if (ctask->datapool == NULL) {
 			kfifo_free(ctask->r2tqueue);
 			iscsi_pool_free(&ctask->r2tpool, (void**)ctask->r2ts);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 029bbf461bb2..017729c59a49 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2154,8 +2154,7 @@ qla2x00_allocate_sp_pool(scsi_qla_host_t *ha)
 	int      rval;
 
 	rval = QLA_SUCCESS;
-	ha->srb_mempool = mempool_create(SRB_MIN_REQ, mempool_alloc_slab,
-	    mempool_free_slab, srb_cachep);
+	ha->srb_mempool = mempool_create_slab_pool(SRB_MIN_REQ, srb_cachep);
 	if (ha->srb_mempool == NULL) {
 		qla_printk(KERN_INFO, ha, "Unable to allocate SRB mempool.\n");
 		rval = QLA_FUNCTION_FAILED;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ede158d08d9d..8f010a314a3d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1787,9 +1787,8 @@ int __init scsi_init_queue(void)
 					sgp->name);
 		}
 
-		sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
-				mempool_alloc_slab, mempool_free_slab,
-				sgp->slab);
+		sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
+						     sgp->slab);
 		if (!sgp->pool) {
 			printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
 					sgp->name);
diff --git a/fs/bio.c b/fs/bio.c
index 377046d82945..eb8fbc53f2cd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1141,8 +1141,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
 		if (i >= scale)
 			pool_entries >>= 1;
 
-		*bvp = mempool_create(pool_entries, mempool_alloc_slab,
-					mempool_free_slab, bp->slab);
+		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
 		if (!*bvp)
 			return -ENOMEM;
 	}
@@ -1179,9 +1178,7 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
 	if (!bs)
 		return NULL;
 
-	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
-			mempool_free_slab, bio_slab);
-
+	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 221b3334b737..6b99b51d6694 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -738,10 +738,8 @@ cifs_init_request_bufs(void)
 		cERROR(1,("cifs_min_rcv set to maximum (64)"));
 	}
 
-	cifs_req_poolp = mempool_create(cifs_min_rcv,
-					mempool_alloc_slab,
-					mempool_free_slab,
-					cifs_req_cachep);
+	cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
+						  cifs_req_cachep);
 
 	if(cifs_req_poolp == NULL) {
 		kmem_cache_destroy(cifs_req_cachep);
@@ -771,10 +769,8 @@ cifs_init_request_bufs(void)
 		cFYI(1,("cifs_min_small set to maximum (256)"));
 	}
 
-	cifs_sm_req_poolp = mempool_create(cifs_min_small,
-				mempool_alloc_slab,
-				mempool_free_slab,
-				cifs_sm_req_cachep);
+	cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
+						     cifs_sm_req_cachep);
 
 	if(cifs_sm_req_poolp == NULL) {
 		mempool_destroy(cifs_req_poolp);
@@ -808,10 +804,8 @@ cifs_init_mids(void)
 	if (cifs_mid_cachep == NULL)
 		return -ENOMEM;
 
-	cifs_mid_poolp = mempool_create(3 /* a reasonable min simultan opers */,
-					mempool_alloc_slab,
-					mempool_free_slab,
-					cifs_mid_cachep);
+	/* 3 is a reasonable minimum number of simultaneous operations */
+	cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep);
 	if(cifs_mid_poolp == NULL) {
 		kmem_cache_destroy(cifs_mid_cachep);
 		return -ENOMEM;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 8508043849f3..f28696f235c4 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -220,8 +220,8 @@ int __init metapage_init(void)
 	if (metapage_cache == NULL)
 		return -ENOMEM;
 
-	metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
-					  mempool_free_slab, metapage_cache);
+	metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
+						    metapage_cache);
 
 	if (metapage_mempool == NULL) {
 		kmem_cache_destroy(metapage_cache);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3961524fd4ab..624ca7146b6b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -663,10 +663,8 @@ int nfs_init_readpagecache(void)
 	if (nfs_rdata_cachep == NULL)
 		return -ENOMEM;
 
-	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_rdata_cachep);
+	nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
+						     nfs_rdata_cachep);
 	if (nfs_rdata_mempool == NULL)
 		return -ENOMEM;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3f5225404c97..4cfada2cc09f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1521,17 +1521,13 @@ int nfs_init_writepagecache(void)
 	if (nfs_wdata_cachep == NULL)
 		return -ENOMEM;
 
-	nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_wdata_cachep);
+	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
+						     nfs_wdata_cachep);
 	if (nfs_wdata_mempool == NULL)
 		return -ENOMEM;
 
-	nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_wdata_cachep);
+	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
+						      nfs_wdata_cachep);
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8355faf8ffde..1884300417e3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -375,9 +375,8 @@ xfs_init_zones(void)
 	if (!xfs_ioend_zone)
 		goto out_destroy_vnode_zone;
 
-	xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
-					mempool_alloc_slab, mempool_free_slab,
-					xfs_ioend_zone);
+	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+						  xfs_ioend_zone);
 	if (!xfs_ioend_pool)
 		goto out_free_ioend_zone;
 	return 0;
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 5a9d8c599171..dd7d627bf66f 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -950,9 +950,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
 	if (!pool->slab)
 		goto free_name;
 
-	pool->mempool =
-	    mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab,
-			   pool->slab);
+	pool->mempool = mempool_create_slab_pool(min_nr, pool->slab);
 	if (!pool->mempool)
 		goto free_slab;
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b9969b91a9f7..5c3eee768504 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -1167,16 +1167,12 @@ rpc_init_mempool(void)
 					     NULL, NULL);
 	if (!rpc_buffer_slabp)
 		goto err_nomem;
-	rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE,
-					    mempool_alloc_slab,
-					    mempool_free_slab,
-					    rpc_task_slabp);
+	rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,
+						    rpc_task_slabp);
 	if (!rpc_task_mempool)
 		goto err_nomem;
-	rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE,
-					    mempool_alloc_slab,
-					    mempool_free_slab,
-					    rpc_buffer_slabp);
+	rpc_buffer_mempool = mempool_create_slab_pool(RPC_BUFFER_POOLSIZE,
+						      rpc_buffer_slabp);
 	if (!rpc_buffer_mempool)
 		goto err_nomem;
 	return 0;
-- 
cgit v1.2.3


From 5515eff811cb807f0d3221a6e8cc223c7850d205 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 26 Mar 2006 01:37:53 -0800
Subject: [PATCH] 2tb-files-add-blkcnt_t-fixes

Cc: Takashi Sato <sho@tnes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/inode.c    | 6 +++---
 fs/cifs/readdir.c  | 8 ++++----
 fs/ocfs2/journal.c | 8 +++++---
 fs/ocfs2/namei.c   | 5 +++--
 4 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ff93a9f81d1c..598eec9778f6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -163,9 +163,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 
 		if (num_of_bytes < end_of_file)
 			cFYI(1, ("allocation size less than end of file"));
-		cFYI(1,
-		     ("Size %ld and blocks %ld",
-		      (unsigned long) inode->i_size, inode->i_blocks));
+		cFYI(1, ("Size %ld and blocks %llu",
+			(unsigned long) inode->i_size,
+			(unsigned long long)inode->i_blocks));
 		if (S_ISREG(inode->i_mode)) {
 			cFYI(1, ("File inode"));
 			inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index edb3b6eb34bc..488bd0d81dcf 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -197,10 +197,10 @@ static void fill_in_inode(struct inode *tmp_inode,
 
 	if (allocation_size < end_of_file)
 		cFYI(1, ("May be sparse file, allocation less than file size"));
-	cFYI(1,
-	     ("File Size %ld and blocks %ld and blocksize %ld",
-	      (unsigned long)tmp_inode->i_size, tmp_inode->i_blocks,
-	      tmp_inode->i_blksize));
+	cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld",
+		(unsigned long)tmp_inode->i_size,
+		(unsigned long long)tmp_inode->i_blocks,
+		tmp_inode->i_blksize));
 	if (S_ISREG(tmp_inode->i_mode)) {
 		cFYI(1, ("File inode"));
 		tmp_inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ae3440ca083c..90641929a437 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -582,7 +582,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	}
 
 	mlog(0, "inode->i_size = %lld\n", inode->i_size);
-	mlog(0, "inode->i_blocks = %lu\n", inode->i_blocks);
+	mlog(0, "inode->i_blocks = %llu\n",
+			(unsigned long long)inode->i_blocks);
 	mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
 
 	/* call the kernels journal init function now */
@@ -850,8 +851,9 @@ static int ocfs2_force_read_journal(struct inode *inode)
 
 	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
 
-	mlog(0, "Force reading %lu blocks\n",
-	     (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9)));
+	mlog(0, "Force reading %llu blocks\n",
+		(unsigned long long)(inode->i_blocks >>
+			(inode->i_sb->s_blocksize_bits - 9)));
 
 	v_blkno = 0;
 	while (v_blkno <
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 274f61d0cda9..0673862c8bdd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1444,8 +1444,9 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 	 * write i_size + 1 bytes. */
 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 
-	mlog_entry("i_blocks = %lu, i_size = %llu, blocks = %d\n",
-		       inode->i_blocks, i_size_read(inode), blocks);
+	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
+			(unsigned long long)inode->i_blocks,
+			i_size_read(inode), blocks);
 
 	/* Sanity check -- make sure we're going to fit. */
 	if (bytes_left >
-- 
cgit v1.2.3


From 89747d369d34e333b9b60f10f333a0b727b4e4e2 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:55 -0800
Subject: [PATCH] ext3_get_blocks: Mapping multiple blocks at a once

Currently ext3_get_block() only maps or allocates one block at a time.  This
is quite inefficient for sequential IO workload.

I have posted a early implements a simply multiple block map and allocation
with current ext3.  The basic idea is allocating the 1st block in the existing
way, and attempting to allocate the next adjacent blocks on a best effort
basis.  More description about the implementation could be found here:
http://marc.theaimsgroup.com/?l=ext2-devel&m=112162230003522&w=2

The following the latest version of the patch: break the original patch into 5
patches, re-worked some logicals, and fixed some bugs.  The break ups are:

 [patch 1] Adding map multiple blocks at a time in ext3_get_blocks()
 [patch 2] Extend ext3_get_blocks() to support multiple block allocation
 [patch 3] Implement multiple block allocation in ext3-try-to-allocate
 (called via ext3_new_block()).
 [patch 4] Proper accounting updates in ext3_new_blocks()
 [patch 5] Adjust reservation window size properly (by the given number
 of blocks to allocate) before block allocation to increase the
 possibility of allocating multiple blocks in a single call.

Tests done so far includes fsx,tiobench and dbench.  The following numbers
collected from Direct IO tests (1G file creation/read) shows the system time
have been greatly reduced (more than 50% on my 8 cpu system) with the patches.

 1G file DIO write:
 	2.6.15		2.6.15+patches
 real    0m31.275s	0m31.161s
 user    0m0.000s	0m0.000s
 sys     0m3.384s	0m0.564s

 1G file DIO read:
 	2.6.15		2.6.15+patches
 real    0m30.733s	0m30.624s
 user    0m0.000s	0m0.004s
 sys     0m0.748s	0m0.380s

Some previous test we did on buffered IO with using multiple blocks allocation
and delayed allocation shows noticeable improvement on throughput and system
time.

This patch:

Add support of mapping multiple blocks in one call.

This is useful for DIO reads and re-writes (where blocks are already
allocated), also is in line with Christoph's proposal of using getblocks() in
mpage_readpage() or mpage_readpages().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c           |   5 ++-
 fs/ext3/inode.c         | 105 +++++++++++++++++++++++++++++++++++-------------
 include/linux/ext3_fs.h |   6 +--
 3 files changed, 82 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 773459164bb2..38bd3f6ec147 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -131,8 +131,9 @@ static int ext3_readdir(struct file * filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
-		if (!err) {
+		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+						&map_bh, 0, 0);
+		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
 				&filp->f_ra,
 				filp,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 76e22c9c9c6c..fcfb10f77120 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -330,7 +330,7 @@ static int ext3_block_to_path(struct inode *inode,
 		ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
 	}
 	if (boundary)
-		*boundary = (i_block & (ptrs - 1)) == (final - 1);
+		*boundary = final - 1 - (i_block & (ptrs - 1));
 	return n;
 }
 
@@ -669,11 +669,15 @@ err_out:
  * akpm: `handle' can be NULL if create == 0.
  *
  * The BKL may not be held on entry here.  Be sure to take it early.
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
  */
 
 int
-ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create, int extend_disksize)
+ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
+		unsigned long maxblocks, struct buffer_head *bh_result,
+		int create, int extend_disksize)
 {
 	int err = -EIO;
 	int offsets[4];
@@ -681,11 +685,15 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	Indirect *partial;
 	unsigned long goal;
 	int left;
-	int boundary = 0;
-	const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
+	int blocks_to_boundary = 0;
+	int depth;
 	struct ext3_inode_info *ei = EXT3_I(inode);
+	int count = 0;
+	unsigned long first_block = 0;
+
 
 	J_ASSERT(handle != NULL || create == 0);
+	depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
@@ -694,8 +702,31 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
+		first_block = chain[depth - 1].key;
 		clear_buffer_new(bh_result);
-		goto got_it;
+		count++;
+		/*map more blocks*/
+		while (count < maxblocks && count <= blocks_to_boundary) {
+			if (!verify_chain(chain, partial)) {
+				/*
+				 * Indirect block might be removed by
+				 * truncate while we were reading it.
+				 * Handling of that case: forget what we've
+				 * got now. Flag the err as EAGAIN, so it
+				 * will reread.
+				 */
+				err = -EAGAIN;
+				count = 0;
+				break;
+			}
+			if (le32_to_cpu(*(chain[depth-1].p+count) ==
+					(first_block + count)))
+				count++;
+			else
+				break;
+		}
+		if (err != -EAGAIN)
+			goto got_it;
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
@@ -723,6 +754,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		}
 		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
+			count++;
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
@@ -772,8 +804,9 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	set_buffer_new(bh_result);
 got_it:
 	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
-	if (boundary)
+	if (blocks_to_boundary == 0)
 		set_buffer_boundary(bh_result);
+	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
 cleanup:
@@ -787,21 +820,6 @@ out:
 	return err;
 }
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create)
-{
-	handle_t *handle = NULL;
-	int ret;
-
-	if (create) {
-		handle = ext3_journal_current_handle();
-		J_ASSERT(handle != 0);
-	}
-	ret = ext3_get_block_handle(handle, inode, iblock,
-				bh_result, create, 1);
-	return ret;
-}
-
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
 static int
@@ -812,9 +830,12 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
 
-	if (!handle)
+	if (!create)
 		goto get_block;		/* A read */
 
+	if (max_blocks == 1)
+		goto get_block;		/* A single block get */
+
 	if (handle->h_transaction->t_state == T_LOCKED) {
 		/*
 		 * Huge direct-io writes can hold off commits for long
@@ -841,13 +862,31 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
 	}
 
 get_block:
-	if (ret == 0)
-		ret = ext3_get_block_handle(handle, inode, iblock,
-					bh_result, create, 0);
-	bh_result->b_size = (1 << inode->i_blkbits);
+	if (ret == 0) {
+		ret = ext3_get_blocks_handle(handle, inode, iblock,
+					max_blocks, bh_result, create, 0);
+		if (ret > 0) {
+			bh_result->b_size = (ret << inode->i_blkbits);
+			ret = 0;
+		}
+	}
 	return ret;
 }
 
+static int ext3_get_blocks(struct inode *inode, sector_t iblock,
+		unsigned long maxblocks, struct buffer_head *bh_result,
+		int create)
+{
+	return ext3_direct_io_get_blocks(inode, iblock, maxblocks,
+					bh_result, create);
+}
+
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	return ext3_get_blocks(inode, iblock, 1, bh_result, create);
+}
+
 /*
  * `handle' can be NULL if create is zero
  */
@@ -862,8 +901,16 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
-	if (!*errp && buffer_mapped(&dummy)) {
+	err = ext3_get_blocks_handle(handle, inode, block, 1,
+					&dummy, create, 1);
+	if (err == 1) {
+		err = 0;
+	} else if (err >= 0) {
+		WARN_ON(1);
+		err = -EIO;
+	}
+	*errp = err;
+	if (!err && buffer_mapped(&dummy)) {
 		struct buffer_head *bh;
 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
 		if (!bh) {
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index e7239f2f97a1..0adadd85fa66 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -775,9 +775,9 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
 int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
 struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-int ext3_get_block_handle(handle_t *handle, struct inode *inode,
-	sector_t iblock, struct buffer_head *bh_result, int create,
-	int extend_disksize);
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
+	int create, int extend_disksize);
 
 extern void ext3_read_inode (struct inode *);
 extern int  ext3_write_inode (struct inode *, int);
-- 
cgit v1.2.3


From b47b24781c59565f45acd765dc995a752d561e96 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:56 -0800
Subject: [PATCH] ext3_get_blocks: multiple block allocation

Add support for multiple block allocation in ext3-get-blocks().

Look up the disk block mapping and count the total number of blocks to
allocate, then pass it to ext3_new_block(), where the real block allocation is
performed.  Once multiple blocks are allocated, prepare the branch with those
just allocated blocks info and finally splice the whole branch into the block
mapping tree.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 270 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 193 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fcfb10f77120..34e5b0dc9168 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -235,16 +235,6 @@ no_delete:
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
-static int ext3_alloc_block (handle_t *handle,
-			struct inode * inode, unsigned long goal, int *err)
-{
-	unsigned long result;
-
-	result = ext3_new_block(handle, inode, goal, err);
-	return result;
-}
-
-
 typedef struct {
 	__le32	*p;
 	__le32	key;
@@ -476,15 +466,115 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
 
 	return ext3_find_near(inode, partial);
 }
+/**
+ *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	of direct blocks need to be allocated for the given branch.
+ *
+ * 	@branch: chain of indirect blocks
+ *	@k: number of blocks need for indirect blocks
+ *	@blks: number of data blocks to be mapped.
+ *	@blocks_to_boundary:  the offset in the indirect block
+ *
+ *	return the total number of blocks to be allocate, including the
+ *	direct and indirect blocks.
+ */
+static int
+ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
+		int blocks_to_boundary)
+{
+	unsigned long count = 0;
+
+	/*
+	 * Simple case, [t,d]Indirect block(s) has not allocated yet
+	 * then it's clear blocks on that path have not allocated
+	 */
+	if (k > 0) {
+		/* right now don't hanel cross boundary allocation */
+		if (blks < blocks_to_boundary + 1)
+			count += blks;
+		else
+			count += blocks_to_boundary + 1;
+		return count;
+	}
+
+	count++;
+	while (count < blks && count <= blocks_to_boundary &&
+		le32_to_cpu(*(branch[0].p + count)) == 0) {
+		count++;
+	}
+	return count;
+}
+
+/**
+ *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	@indirect_blks: the number of blocks need to allocate for indirect
+ *			blocks
+ *
+ *	@new_blocks: on return it will store the new block numbers for
+ *	the indirect blocks(if needed) and the first direct block,
+ *	@blks:	on return it will store the total number of allocated
+ *		direct blocks
+ */
+static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
+			unsigned long goal, int indirect_blks, int blks,
+			unsigned long long new_blocks[4], int *err)
+{
+	int target, i;
+	unsigned long count = 0;
+	int index = 0;
+	unsigned long current_block = 0;
+	int ret = 0;
+
+	/*
+	 * Here we try to allocate the requested multiple blocks at once,
+	 * on a best-effort basis.
+	 * To build a branch, we should allocate blocks for
+	 * the indirect blocks(if not allocated yet), and at least
+	 * the first direct block of this branch.  That's the
+	 * minimum number of blocks need to allocate(required)
+	 */
+	target = blks + indirect_blks;
+
+	while (1) {
+		count = target;
+		/* allocating blocks for indirect blocks and direct blocks */
+		current_block = ext3_new_blocks(handle, inode, goal, &count, err);
+		if (*err)
+			goto failed_out;
+
+		target -= count;
+		/* allocate blocks for indirect blocks */
+		while (index < indirect_blks && count) {
+			new_blocks[index++] = current_block++;
+			count--;
+		}
+
+		if (count > 0)
+			break;
+	}
+
+	/* save the new block number for the first direct block */
+	new_blocks[index] = current_block;
+
+	/* total number of blocks allocated for direct blocks */
+	ret = count;
+	*err = 0;
+	return ret;
+failed_out:
+	for (i = 0; i <index; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+	return ret;
+}
 
 /**
  *	ext3_alloc_branch - allocate and set up a chain of blocks.
  *	@inode: owner
- *	@num: depth of the chain (number of blocks to allocate)
+ *	@indirect_blks: number of allocated indirect blocks
+ *	@blks: number of allocated direct blocks
  *	@offsets: offsets (in the blocks) to store the pointers to next.
  *	@branch: place to store the chain in.
  *
- *	This function allocates @num blocks, zeroes out all but the last one,
+ *	This function allocates blocks, zeroes out all but the last one,
  *	links them into chain and (if we are synchronous) writes them to disk.
  *	In other words, it prepares a branch that can be spliced onto the
  *	inode. It stores the information about that chain in the branch[], in
@@ -503,71 +593,79 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
  */
 
 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-			     int num,
-			     unsigned long goal,
-			     int *offsets,
-			     Indirect *branch)
+			int indirect_blks, int *blks, unsigned long goal,
+			int *offsets, Indirect *branch)
 {
 	int blocksize = inode->i_sb->s_blocksize;
-	int n = 0, keys = 0;
+	int i, n = 0;
 	int err = 0;
-	int i;
-	int parent = ext3_alloc_block(handle, inode, goal, &err);
-
-	branch[0].key = cpu_to_le32(parent);
-	if (parent) {
-		for (n = 1; n < num; n++) {
-			struct buffer_head *bh;
-			/* Allocate the next block */
-			int nr = ext3_alloc_block(handle, inode, parent, &err);
-			if (!nr)
-				break;
-			branch[n].key = cpu_to_le32(nr);
+	struct buffer_head *bh;
+	int num;
+	unsigned long long new_blocks[4];
+	unsigned long long current_block;
 
-			/*
-			 * Get buffer_head for parent block, zero it out
-			 * and set the pointer to new one, then send
-			 * parent to disk.  
-			 */
-			bh = sb_getblk(inode->i_sb, parent);
-			if (!bh)
-				break;
-			keys = n+1;
-			branch[n].bh = bh;
-			lock_buffer(bh);
-			BUFFER_TRACE(bh, "call get_create_access");
-			err = ext3_journal_get_create_access(handle, bh);
-			if (err) {
-				unlock_buffer(bh);
-				brelse(bh);
-				break;
-			}
+	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+				*blks, new_blocks, &err);
+	if (err)
+		return err;
 
-			memset(bh->b_data, 0, blocksize);
-			branch[n].p = (__le32*) bh->b_data + offsets[n];
-			*branch[n].p = branch[n].key;
-			BUFFER_TRACE(bh, "marking uptodate");
-			set_buffer_uptodate(bh);
+	branch[0].key = cpu_to_le32(new_blocks[0]);
+	/*
+	 * metadata blocks and data blocks are allocated.
+	 */
+	for (n = 1; n <= indirect_blks;  n++) {
+		/*
+		 * Get buffer_head for parent block, zero it out
+		 * and set the pointer to new one, then send
+		 * parent to disk.
+		 */
+		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		branch[n].bh = bh;
+		lock_buffer(bh);
+		BUFFER_TRACE(bh, "call get_create_access");
+		err = ext3_journal_get_create_access(handle, bh);
+		if (err) {
 			unlock_buffer(bh);
+			brelse(bh);
+			goto failed;
+		}
 
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			err = ext3_journal_dirty_metadata(handle, bh);
-			if (err)
-				break;
-
-			parent = nr;
+		memset(bh->b_data, 0, blocksize);
+		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+		branch[n].key = cpu_to_le32(new_blocks[n]);
+		*branch[n].p = branch[n].key;
+		if ( n == indirect_blks) {
+			current_block = new_blocks[n];
+			/*
+			 * End of chain, update the last new metablock of
+			 * the chain to point to the new allocated
+			 * data blocks numbers
+			 */
+			for (i=1; i < num; i++)
+				*(branch[n].p + i) = cpu_to_le32(++current_block);
 		}
-	}
-	if (n == num)
-		return 0;
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
 
+		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh);
+		if (err)
+			goto failed;
+	}
+	*blks = num;
+	return err;
+failed:
 	/* Allocation failed, free what we already allocated */
-	for (i = 1; i < keys; i++) {
+	for (i = 1; i <= n ; i++) {
 		BUFFER_TRACE(branch[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, branch[i].bh);
 	}
-	for (i = 0; i < keys; i++)
-		ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
+	for (i = 0; i <indirect_blks; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+
+	ext3_free_blocks(handle, inode, new_blocks[i], num);
+
 	return err;
 }
 
@@ -578,7 +676,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  *	@chain: chain of indirect blocks (with a missing link - see
  *		ext3_alloc_branch)
  *	@where: location of missing link
- *	@num:   number of blocks we are adding
+ *	@num:   number of indirect blocks we are adding
+ *	@blks:  number of direct blocks we are adding
  *
  *	This function fills the missing link and does all housekeeping needed in
  *	inode (->i_blocks, etc.). In case of success we end up with the full
@@ -586,12 +685,12 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  */
 
 static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-			      Indirect chain[4], Indirect *where, int num)
+			      Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
 	struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
-
+	unsigned long current_block;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -606,6 +705,13 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* That's it */
 
 	*where->p = where->key;
+	/* update host bufferhead or inode to point to
+	 * more just allocated direct blocks blocks */
+	if (num == 0 && blks > 1) {
+		current_block = le32_to_cpu(where->key + 1);
+		for (i = 1; i < blks; i++)
+			*(where->p + i ) = cpu_to_le32(current_block++);
+	}
 
 	/*
 	 * update the most recently allocated logical & physical block
@@ -613,8 +719,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	 * allocation
 	 */
 	if (block_i) {
-		block_i->last_alloc_logical_block = block;
-		block_i->last_alloc_physical_block = le32_to_cpu(where[num-1].key);
+		block_i->last_alloc_logical_block = block + blks - 1;
+		block_i->last_alloc_physical_block = le32_to_cpu(where[num].key + blks - 1);
 	}
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -647,10 +753,13 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	return err;
 
 err_out:
-	for (i = 1; i < num; i++) {
+	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, where[i].bh);
+		ext3_free_blocks(handle, inode, le32_to_cpu(where[i-1].key), 1);
 	}
+	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+
 	return err;
 }
 
@@ -684,7 +793,7 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	Indirect chain[4];
 	Indirect *partial;
 	unsigned long goal;
-	int left;
+	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -772,12 +881,19 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	goal = ext3_find_goal(inode, iblock, chain, partial);
 
-	left = (chain + depth) - partial;
+	/* the number of blocks need to allocate for [d,t]indirect blocks */
+	indirect_blks = (chain + depth) - partial - 1;
 
+	/*
+	 * Next look up the indirect map to count the totoal number of
+	 * direct blocks to allocate for this branch.
+	 */
+	count = ext3_blks_to_allocate(partial, indirect_blks,
+					maxblocks, blocks_to_boundary);
 	/*
 	 * Block out ext3_truncate while we alter the tree
 	 */
-	err = ext3_alloc_branch(handle, inode, left, goal,
+	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
 				offsets + (partial - chain), partial);
 
 	/*
@@ -788,8 +904,8 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	 * may need to return -EAGAIN upwards in the worst case.  --sct
 	 */
 	if (!err)
-		err = ext3_splice_branch(handle, inode, iblock, chain,
-					 partial, left);
+		err = ext3_splice_branch(handle, inode, iblock,
+					partial, indirect_blks, count);
 	/*
 	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
@@ -824,8 +940,8 @@ out:
 
 static int
 ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks, struct buffer_head *bh_result,
-		int create)
+		unsigned long max_blocks,
+		struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
-- 
cgit v1.2.3


From b54e41ec17ae91dce174eb5a3515e7af4a440d42 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:57 -0800
Subject: [PATCH] ext3_get_blocks: support multiple blocks allocation in
 ext3_new_block()

Change ext3_try_to_allocate() (called via ext3_new_blocks()) to try to
allocate the requested number of blocks on a best effort basis: After
allocated the first block, it will always attempt to allocate the next few(up
to the requested size and not beyond the reservation window) adjacent blocks
at the same time.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c        | 46 ++++++++++++++++++++++++++++++++++++----------
 include/linux/ext3_fs.h |  5 ++++-
 2 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 46623f77666b..bba4aeb4a708 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -653,9 +653,11 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
  */
 static int
 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-	struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv)
+			struct buffer_head *bitmap_bh, int goal,
+			unsigned long *count, struct ext3_reserve_window *my_rsv)
 {
 	int group_first_block, start, end;
+	unsigned long num = 0;
 
 	/* we do allocation within the reservation window if we have a window */
 	if (my_rsv) {
@@ -713,8 +715,18 @@ repeat:
 			goto fail_access;
 		goto repeat;
 	}
-	return goal;
+	num++;
+	goal++;
+	while (num < *count && goal < end
+		&& ext3_test_allocatable(goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
+		num++;
+		goal++;
+	}
+	*count = num;
+	return goal - num;
 fail_access:
+	*count = num;
 	return -1;
 }
 
@@ -1024,11 +1036,12 @@ static int
 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			unsigned int group, struct buffer_head *bitmap_bh,
 			int goal, struct ext3_reserve_window_node * my_rsv,
-			int *errp)
+			unsigned long *count, int *errp)
 {
 	unsigned long group_first_block;
 	int ret = 0;
 	int fatal;
+	unsigned long num = *count;
 
 	*errp = 0;
 
@@ -1051,7 +1064,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * or last attempt to allocate a block with reservation turned on failed
 	 */
 	if (my_rsv == NULL ) {
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+						goal, count, NULL);
 		goto out;
 	}
 	/*
@@ -1093,11 +1107,13 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-					   &my_rsv->rsv_window);
+					   &num, &my_rsv->rsv_window);
 		if (ret >= 0) {
-			my_rsv->rsv_alloc_hit++;
+			my_rsv->rsv_alloc_hit += num;
+			*count = num;
 			break;				/* succeed */
 		}
+		num = *count;
 	}
 out:
 	if (ret >= 0) {
@@ -1154,8 +1170,8 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
  */
-int ext3_new_block(handle_t *handle, struct inode *inode,
-			unsigned long goal, int *errp)
+int ext3_new_blocks(handle_t *handle, struct inode *inode,
+			unsigned long goal, unsigned long *count, int *errp)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
@@ -1178,6 +1194,7 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
 	static int goal_hits, goal_attempts;
 #endif
 	unsigned long ngroups;
+	unsigned long num = *count;
 
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
@@ -1244,7 +1261,7 @@ retry:
 		if (!bitmap_bh)
 			goto io_error;
 		ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-					bitmap_bh, ret_block, my_rsv, &fatal);
+					bitmap_bh, ret_block, my_rsv, &num, &fatal);
 		if (fatal)
 			goto out;
 		if (ret_block >= 0)
@@ -1281,7 +1298,7 @@ retry:
 		if (!bitmap_bh)
 			goto io_error;
 		ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-					bitmap_bh, -1, my_rsv, &fatal);
+					bitmap_bh, -1, my_rsv, &num, &fatal);
 		if (fatal)
 			goto out;
 		if (ret_block >= 0) 
@@ -1388,6 +1405,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
+	*count = num;
 	return ret_block;
 
 io_error:
@@ -1406,6 +1424,14 @@ out:
 	return 0;
 }
 
+int ext3_new_block(handle_t *handle, struct inode *inode,
+			unsigned long goal, int *errp)
+{
+	unsigned long count = 1;
+
+	return ext3_new_blocks(handle, inode, goal, &count, errp);
+}
+
 unsigned long ext3_count_free_blocks(struct super_block *sb)
 {
 	unsigned long desc_count;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 0adadd85fa66..8bb4f842cded 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -36,7 +36,8 @@ struct statfs;
  * Define EXT3_RESERVATION to reserve data blocks for expanding files
  */
 #define EXT3_DEFAULT_RESERVE_BLOCKS     8
-#define EXT3_MAX_RESERVE_BLOCKS         1024
+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
+#define EXT3_MAX_RESERVE_BLOCKS         1027
 #define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
 /*
  * Always enable hashed directories
@@ -732,6 +733,8 @@ struct dir_private_info {
 extern int ext3_bg_has_super(struct super_block *sb, int group);
 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
 extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+extern int ext3_new_blocks (handle_t *, struct inode *, unsigned long,
+			unsigned long *, int *);
 extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
 			      unsigned long);
 extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
-- 
cgit v1.2.3


From faa569763a7753e0a7cb8fd3919a62c0f7cc1e3c Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:58 -0800
Subject: [PATCH] ext3_get_blocks: Adjust accounting info in ext3_new_blocks()

Update accounting information (quota, boundary checks, free blocks number etc)
in ext3_new_blocks().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index bba4aeb4a708..7a2c7198b059 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1206,7 +1206,7 @@ int ext3_new_blocks(handle_t *handle, struct inode *inode,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
@@ -1333,13 +1333,15 @@ allocated:
 	target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
 				+ le32_to_cpu(es->s_first_data_block);
 
-	if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
-	    target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) ||
+	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) ||
 	    in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group) ||
+	    in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
 		      EXT3_SB(sb)->s_itb_per_group))
 		ext3_error(sb, "ext3_new_block",
 			    "Allocating block in system zone - "
-			    "block = %u", target_block);
+			    "blocks from %u, length %lu", target_block, num);
 
 	performed_allocation = 1;
 
@@ -1358,10 +1360,14 @@ allocated:
 	jbd_lock_bh_state(bitmap_bh);
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
-		if (ext3_test_bit(ret_block,
-				bh2jh(bitmap_bh)->b_committed_data)) {
-			printk("%s: block was unexpectedly set in "
-				"b_committed_data\n", __FUNCTION__);
+		int i;
+
+		for (i = 0; i < num; i++) {
+			if (ext3_test_bit(ret_block,
+					bh2jh(bitmap_bh)->b_committed_data)) {
+				printk("%s: block was unexpectedly set in "
+					"b_committed_data\n", __FUNCTION__);
+			}
 		}
 	}
 	ext3_debug("found bit %d\n", ret_block);
@@ -1372,7 +1378,7 @@ allocated:
 	/* ret_block was blockgroup-relative.  Now it becomes fs-relative */
 	ret_block = target_block;
 
-	if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
+	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
 		ext3_error(sb, "ext3_new_block",
 			    "block(%d) >= blocks count(%d) - "
 			    "block_group = %d, es == %p ", ret_block,
@@ -1390,9 +1396,9 @@ allocated:
 
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	gdp->bg_free_blocks_count =
-			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
+			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
-	percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
+	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext3_journal_dirty_metadata(handle, gdp_bh);
@@ -1405,6 +1411,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
+	DQUOT_FREE_BLOCK(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1419,7 +1426,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		DQUOT_FREE_BLOCK(inode, 1);
+		DQUOT_FREE_BLOCK(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
-- 
cgit v1.2.3


From d48589bfad0e60bff0aa3f9e4875983f607bd44b Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:59 -0800
Subject: [PATCH] ext3_get_blocks: Adjust reservation window size for mblocks

Optimize the block reservation and the multiple block allocation: with the
knowledge of the total number of blocks ahead, set or adjust the reservation
window size properly (based on the number of blocks needed) before block
allocation happens: if there isn't any reservation yet, make sure the
reservation window equals to or greater than the number of blocks needed,
before create an reservation window; if a reservation window is already
exists, try to extends the window size to match the number of blocks to
allocate.  This could increase the possibility of completing multiple blocks
allocation in a single request, as blocks are only allocated in the range of
the inode's reservation window.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7a2c7198b059..77927d6938f6 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1011,6 +1011,31 @@ retry:
 	goto retry;
 }
 
+static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+			struct super_block *sb, int size)
+{
+	struct ext3_reserve_window_node *next_rsv;
+	struct rb_node *next;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	if (!spin_trylock(rsv_lock))
+		return;
+
+	next = rb_next(&my_rsv->rsv_node);
+
+	if (!next)
+		my_rsv->rsv_end += size;
+	else {
+		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+
+		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
+			my_rsv->rsv_end += size;
+		else
+			my_rsv->rsv_end = next_rsv->rsv_start - 1;
+	}
+	spin_unlock(rsv_lock);
+}
+
 /*
  * This is the main function used to allocate a new block and its reservation
  * window.
@@ -1095,6 +1120,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	while (1) {
 		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
 			!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
+			if (my_rsv->rsv_goal_size < *count)
+				my_rsv->rsv_goal_size = *count;
 			ret = alloc_new_reservation(my_rsv, goal, sb,
 							group, bitmap_bh);
 			if (ret < 0)
@@ -1102,7 +1129,10 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 
 			if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
 				goal = -1;
-		}
+		} else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count)
+			try_to_extend_reservation(my_rsv, sb,
+					*count-my_rsv->rsv_end + goal - 1);
+
 		if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
 		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
-- 
cgit v1.2.3


From 205f87f6b342444f722e4559d33318686f7df2ca Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:00 -0800
Subject: [PATCH] change buffer_head.b_size to size_t

Increase the size of the buffer_head b_size field (only) for 64 bit platforms.
Update some old and moldy comments in and around the structure as well.

The b_size increase allows us to perform larger mappings and allocations for
large I/O requests from userspace, which tie in with other changes allowing
the get_block_t() interface to map multiple blocks at once.

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 |  6 ++++--
 fs/ocfs2/journal.c          |  2 +-
 fs/reiserfs/prints.c        |  2 +-
 include/linux/buffer_head.h | 19 +++++++++++--------
 4 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index f25f58096428..e7a1461f4387 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -426,8 +426,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	if (all_mapped) {
 		printk("__find_get_block_slow() failed. "
 			"block=%llu, b_blocknr=%llu\n",
-			(unsigned long long)block, (unsigned long long)bh->b_blocknr);
-		printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
+			(unsigned long long)block,
+			(unsigned long long)bh->b_blocknr);
+		printk("b_state=0x%08lx, b_size=%zu\n",
+			bh->b_state, bh->b_size);
 		printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
 	}
 out_unlock:
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 90641929a437..6a610ae53583 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -377,7 +377,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
 	BUG_ON(!bh);
 	BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED));
 
-	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %hu\n",
+	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n",
 		   (unsigned long long)bh->b_blocknr, type,
 		   (type == OCFS2_JOURNAL_ACCESS_CREATE) ?
 		   "OCFS2_JOURNAL_ACCESS_CREATE" :
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 78b40621b88b..27bd3a1df2ad 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -143,7 +143,7 @@ static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
 	char b[BDEVNAME_SIZE];
 
 	sprintf(buf,
-		"dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+		"dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
 		bdevname(bh->b_bdev, b), bh->b_size,
 		(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
 		bh->b_state, bh->b_page,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index da917ed096a3..464f068f8b16 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -46,25 +46,28 @@ struct address_space;
 typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
 
 /*
- * Keep related fields in common cachelines.  The most commonly accessed
- * field (b_state) goes at the start so the compiler does not generate
- * indexed addressing for it.
+ * Historically, a buffer_head was used to map a single block
+ * within a page, and of course as the unit of I/O through the
+ * filesystem and block layers.  Nowadays the basic I/O unit
+ * is the bio, and buffer_heads are used for extracting block
+ * mappings (via a get_block_t call), for tracking state within
+ * a page (via a page_mapping) and for wrapping bio submission
+ * for backward compatibility reasons (e.g. submit_bh).
  */
 struct buffer_head {
-	/* First cache line: */
 	unsigned long b_state;		/* buffer state bitmap (see above) */
 	struct buffer_head *b_this_page;/* circular list of page's buffers */
 	struct page *b_page;		/* the page this bh is mapped to */
-	atomic_t b_count;		/* users using this block */
-	u32 b_size;			/* block size */
 
-	sector_t b_blocknr;		/* block number */
-	char *b_data;			/* pointer to data block */
+	sector_t b_blocknr;		/* start block number */
+	size_t b_size;			/* size of mapping */
+	char *b_data;			/* pointer to data within the page */
 
 	struct block_device *b_bdev;
 	bh_end_io_t *b_end_io;		/* I/O completion */
  	void *b_private;		/* reserved for b_end_io */
 	struct list_head b_assoc_buffers; /* associated with another mapping */
+	atomic_t b_count;		/* users using this buffer_head */
 };
 
 /*
-- 
cgit v1.2.3


From b0cf2321c6599138f860517745503691556d8453 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:00 -0800
Subject: [PATCH] pass b_size to ->get_block()

Pass amount of disk needs to be mapped to get_block().  This way one can
modify the fs ->get_block() functions to map multiple blocks at the same time.

[akpm@osdl.org: performance tweak]
[akpm@osdl.org: remove unneeded assignments]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 | 9 ++++++++-
 fs/mpage.c                  | 2 ++
 include/linux/buffer_head.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index e7a1461f4387..a507b58550f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1738,6 +1738,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	sector_t block;
 	sector_t last_block;
 	struct buffer_head *bh, *head;
+	const unsigned blocksize = 1 << inode->i_blkbits;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1745,7 +1746,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
 	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, 1 << inode->i_blkbits,
+		create_empty_buffers(page, blocksize,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
 	}
 
@@ -1780,6 +1781,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			clear_buffer_dirty(bh);
 			set_buffer_uptodate(bh);
 		} else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
+			WARN_ON(bh->b_size != blocksize);
 			err = get_block(inode, block, bh, 1);
 			if (err)
 				goto recover;
@@ -1933,6 +1935,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (buffer_new(bh))
 			clear_buffer_new(bh);
 		if (!buffer_mapped(bh)) {
+			WARN_ON(bh->b_size != blocksize);
 			err = get_block(inode, block, bh, 1);
 			if (err)
 				break;
@@ -2088,6 +2091,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 
 			fully_mapped = 0;
 			if (iblock < lblock) {
+				WARN_ON(bh->b_size != blocksize);
 				err = get_block(inode, iblock, bh, 0);
 				if (err)
 					SetPageError(page);
@@ -2409,6 +2413,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
 		create = 1;
 		if (block_start >= to)
 			create = 0;
+		map_bh.b_size = blocksize;
 		ret = get_block(inode, block_in_file + block_in_page,
 					&map_bh, create);
 		if (ret)
@@ -2669,6 +2674,7 @@ int block_truncate_page(struct address_space *mapping,
 
 	err = 0;
 	if (!buffer_mapped(bh)) {
+		WARN_ON(bh->b_size != blocksize);
 		err = get_block(inode, iblock, bh, 0);
 		if (err)
 			goto unlock;
@@ -2755,6 +2761,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
 	struct inode *inode = mapping->host;
 	tmp.b_state = 0;
 	tmp.b_blocknr = 0;
+	tmp.b_size = 1 << inode->i_blkbits;
 	get_block(inode, block, &tmp, 0);
 	return tmp.b_blocknr;
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index e431cb3878d6..7903b740cc11 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -192,6 +192,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 				page_block++, block_in_file++) {
 		bh.b_state = 0;
 		if (block_in_file < last_block) {
+			bh.b_size = blocksize;
 			if (get_block(inode, block_in_file, &bh, 0))
 				goto confused;
 		}
@@ -472,6 +473,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	for (page_block = 0; page_block < blocks_per_page; ) {
 
 		map_bh.b_state = 0;
+		map_bh.b_size = 1 << blkbits;
 		if (get_block(inode, block_in_file, &map_bh, 1))
 			goto confused;
 		if (buffer_new(&map_bh))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 464f068f8b16..fb7e9b7ccbe3 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -280,6 +280,7 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block)
 	set_buffer_mapped(bh);
 	bh->b_bdev = sb->s_bdev;
 	bh->b_blocknr = block;
+	bh->b_size = sb->s_blocksize;
 }
 
 /*
-- 
cgit v1.2.3


From fa30bd058b746c0e2318a77ff8b4977faa924c2c Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:01 -0800
Subject: [PATCH] map multiple blocks for mpage_readpages()

This patch changes mpage_readpages() and get_block() to get the disk mapping
information for multiple blocks at the same time.

b_size represents the amount of disk mapping that needs to mapped.  On the
successful get_block() b_size indicates the amount of disk mapping thats
actually mapped.  Only the filesystems who care to use this information and
provide multiple disk blocks at a time can choose to do so.

No changes are needed for the filesystems who wants to ignore this.

[akpm@osdl.org: cleanups]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/inode.c              |   3 +-
 fs/mpage.c                  | 104 ++++++++++++++++++++++++++++++++++++--------
 fs/xfs/linux-2.6/xfs_aops.c |   5 ++-
 3 files changed, 90 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 51a5fed90cca..7239ef339489 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -258,7 +258,8 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 static int jfs_get_block(struct inode *ip, sector_t lblock,
 			 struct buffer_head *bh_result, int create)
 {
-	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
+	return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
+			bh_result, create);
 }
 
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/mpage.c b/fs/mpage.c
index 7903b740cc11..9bf2eb30e6f4 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -163,9 +163,19 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 	} while (page_bh != head);
 }
 
+/*
+ * This is the worker routine which does all the work of mapping the disk
+ * blocks and constructs largest possible bios, submits them for IO if the
+ * blocks are not contiguous on the disk.
+ *
+ * We pass a buffer_head back and forth and use its buffer_mapped() flag to
+ * represent the validity of its disk mapping and to decide when to do the next
+ * get_block() call.
+ */
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
-			sector_t *last_block_in_bio, get_block_t get_block)
+		sector_t *last_block_in_bio, struct buffer_head *map_bh,
+		unsigned long *first_logical_block, get_block_t get_block)
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
@@ -173,34 +183,72 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
+	sector_t last_block_in_file;
 	sector_t blocks[MAX_BUF_PER_PAGE];
 	unsigned page_block;
 	unsigned first_hole = blocks_per_page;
 	struct block_device *bdev = NULL;
-	struct buffer_head bh;
 	int length;
 	int fully_mapped = 1;
+	unsigned nblocks;
+	unsigned relative_block;
 
 	if (page_has_buffers(page))
 		goto confused;
 
 	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
-	last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
+	last_block = block_in_file + nr_pages * blocks_per_page;
+	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
+	if (last_block > last_block_in_file)
+		last_block = last_block_in_file;
+	page_block = 0;
+
+	/*
+	 * Map blocks using the result from the previous get_blocks call first.
+	 */
+	nblocks = map_bh->b_size >> blkbits;
+	if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
+			block_in_file < (*first_logical_block + nblocks)) {
+		unsigned map_offset = block_in_file - *first_logical_block;
+		unsigned last = nblocks - map_offset;
+
+		for (relative_block = 0; ; relative_block++) {
+			if (relative_block == last) {
+				clear_buffer_mapped(map_bh);
+				break;
+			}
+			if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr + map_offset +
+						relative_block;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
+	}
+
+	/*
+	 * Then do more get_blocks calls until we are done with this page.
+	 */
+	map_bh->b_page = page;
+	while (page_block < blocks_per_page) {
+		map_bh->b_state = 0;
+		map_bh->b_size = 0;
 
-	bh.b_page = page;
-	for (page_block = 0; page_block < blocks_per_page;
-				page_block++, block_in_file++) {
-		bh.b_state = 0;
 		if (block_in_file < last_block) {
-			bh.b_size = blocksize;
-			if (get_block(inode, block_in_file, &bh, 0))
+			map_bh->b_size = (last_block-block_in_file) << blkbits;
+			if (get_block(inode, block_in_file, map_bh, 0))
 				goto confused;
+			*first_logical_block = block_in_file;
 		}
 
-		if (!buffer_mapped(&bh)) {
+		if (!buffer_mapped(map_bh)) {
 			fully_mapped = 0;
 			if (first_hole == blocks_per_page)
 				first_hole = page_block;
+			page_block++;
+			block_in_file++;
+			clear_buffer_mapped(map_bh);
 			continue;
 		}
 
@@ -210,8 +258,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 		 * we just collected from get_block into the page's buffers
 		 * so readpage doesn't have to repeat the get_block call
 		 */
-		if (buffer_uptodate(&bh)) {
-			map_buffer_to_page(page, &bh, page_block);
+		if (buffer_uptodate(map_bh)) {
+			map_buffer_to_page(page, map_bh, page_block);
 			goto confused;
 		}
 	
@@ -219,10 +267,20 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 			goto confused;		/* hole -> non-hole */
 
 		/* Contiguous blocks? */
-		if (page_block && blocks[page_block-1] != bh.b_blocknr-1)
+		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
 			goto confused;
-		blocks[page_block] = bh.b_blocknr;
-		bdev = bh.b_bdev;
+		nblocks = map_bh->b_size >> blkbits;
+		for (relative_block = 0; ; relative_block++) {
+			if (relative_block == nblocks) {
+				clear_buffer_mapped(map_bh);
+				break;
+			} else if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr+relative_block;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
 	}
 
 	if (first_hole != blocks_per_page) {
@@ -261,7 +319,7 @@ alloc_new:
 		goto alloc_new;
 	}
 
-	if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
+	if (buffer_boundary(map_bh) || (first_hole != blocks_per_page))
 		bio = mpage_bio_submit(READ, bio);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
@@ -332,7 +390,10 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
 	struct pagevec lru_pvec;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
 
+	clear_buffer_mapped(&map_bh);
 	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
@@ -343,7 +404,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 					page->index, GFP_KERNEL)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
-					&last_block_in_bio, get_block);
+					&last_block_in_bio, &map_bh,
+					&first_logical_block,
+					get_block);
 			if (!pagevec_add(&lru_pvec, page))
 				__pagevec_lru_add(&lru_pvec);
 		} else {
@@ -365,9 +428,12 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 {
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
 
-	bio = do_mpage_readpage(bio, page, 1,
-			&last_block_in_bio, get_block);
+	clear_buffer_mapped(&map_bh);
+	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
+			&map_bh, &first_logical_block, get_block);
 	if (bio)
 		mpage_bio_submit(READ, bio);
 	return 0;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4f2476f188b0..a79b84f8b55c 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1310,8 +1310,9 @@ xfs_get_block(
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock, 0, bh_result,
-					create, 0, BMAPI_WRITE);
+	return __xfs_get_block(inode, iblock,
+				bh_result->b_size >> inode->i_blkbits,
+				bh_result, create, 0, BMAPI_WRITE);
 }
 
 STATIC int
-- 
cgit v1.2.3


From 1d8fa7a2b9a39d18727acc5c468e870df606c852 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:02 -0800
Subject: [PATCH] remove ->get_blocks() support

Now that get_block() can handle mapping multiple disk blocks, no need to have
->get_blocks().  This patch removes fs specific ->get_blocks() added for DIO
and makes it users use get_block() instead.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c              |  3 ++-
 fs/direct-io.c              | 27 ++++++++++++++-------------
 fs/ext2/inode.c             | 14 +-------------
 fs/ext3/inode.c             | 12 ++----------
 fs/fat/inode.c              |  2 +-
 fs/hfs/inode.c              | 13 +------------
 fs/hfsplus/inode.c          | 13 +------------
 fs/jfs/inode.c              |  2 +-
 fs/ocfs2/aops.c             |  2 +-
 fs/reiserfs/inode.c         |  1 -
 fs/xfs/linux-2.6/xfs_aops.c |  6 +++---
 include/linux/fs.h          | 17 +++++++----------
 12 files changed, 34 insertions(+), 78 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9a451a9ffad4..5983d42df015 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -131,9 +131,10 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 
 static int
 blkdev_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks, struct buffer_head *bh, int create)
+		struct buffer_head *bh, int create)
 {
 	sector_t end_block = max_block(I_BDEV(inode));
+	unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
 
 	if ((iblock + max_blocks) > end_block) {
 		max_blocks = end_block - iblock;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 235ed8d1f11e..9d1d2aa73e42 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -86,12 +86,12 @@ struct dio {
 	unsigned first_block_in_page;	/* doesn't change, Used only once */
 	int boundary;			/* prev block is at a boundary */
 	int reap_counter;		/* rate limit reaping */
-	get_blocks_t *get_blocks;	/* block mapping function */
+	get_block_t *get_block;		/* block mapping function */
 	dio_iodone_t *end_io;		/* IO completion function */
 	sector_t final_block_in_bio;	/* current final block in bio + 1 */
 	sector_t next_block_for_io;	/* next block to be put under IO,
 					   in dio_blocks units */
-	struct buffer_head map_bh;	/* last get_blocks() result */
+	struct buffer_head map_bh;	/* last get_block() result */
 
 	/*
 	 * Deferred addition of a page to the dio.  These variables are
@@ -211,9 +211,9 @@ static struct page *dio_get_page(struct dio *dio)
 
 /*
  * Called when all DIO BIO I/O has been completed - let the filesystem
- * know, if it registered an interest earlier via get_blocks.  Pass the
+ * know, if it registered an interest earlier via get_block.  Pass the
  * private field of the map buffer_head so that filesystems can use it
- * to hold additional state between get_blocks calls and dio_complete.
+ * to hold additional state between get_block calls and dio_complete.
  */
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
@@ -493,7 +493,7 @@ static int dio_bio_reap(struct dio *dio)
  * The fs is allowed to map lots of blocks at once.  If it wants to do that,
  * it uses the passed inode-relative block number as the file offset, as usual.
  *
- * get_blocks() is passed the number of i_blkbits-sized blocks which direct_io
+ * get_block() is passed the number of i_blkbits-sized blocks which direct_io
  * has remaining to do.  The fs should not map more than this number of blocks.
  *
  * If the fs has mapped a lot of blocks, it should populate bh->b_size to
@@ -506,7 +506,7 @@ static int dio_bio_reap(struct dio *dio)
  * In the case of filesystem holes: the fs may return an arbitrarily-large
  * hole by returning an appropriate value in b_size and by clearing
  * buffer_mapped().  However the direct-io code will only process holes one
- * block at a time - it will repeatedly call get_blocks() as it walks the hole.
+ * block at a time - it will repeatedly call get_block() as it walks the hole.
  */
 static int get_more_blocks(struct dio *dio)
 {
@@ -548,7 +548,8 @@ static int get_more_blocks(struct dio *dio)
 		 * at a higher level for inside-i_size block-instantiating
 		 * writes.
 		 */
-		ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
+		map_bh->b_size = fs_count << dio->blkbits;
+		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
 	return ret;
@@ -783,11 +784,11 @@ static void dio_zero_block(struct dio *dio, int end)
  * happily perform page-sized but 512-byte aligned IOs.  It is important that
  * blockdev IO be able to have fine alignment and large sizes.
  *
- * So what we do is to permit the ->get_blocks function to populate bh.b_size
+ * So what we do is to permit the ->get_block function to populate bh.b_size
  * with the size of IO which is permitted at this offset and this i_blkbits.
  *
  * For best results, the blockdev should be set up with 512-byte i_blkbits and
- * it should set b_size to PAGE_SIZE or more inside get_blocks().  This gives
+ * it should set b_size to PAGE_SIZE or more inside get_block().  This gives
  * fine alignment but still allows this function to work in PAGE_SIZE units.
  */
 static int do_direct_IO(struct dio *dio)
@@ -947,7 +948,7 @@ out:
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
-	unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io,
+	unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
 	struct dio *dio)
 {
 	unsigned long user_addr; 
@@ -969,7 +970,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 	dio->boundary = 0;
 	dio->reap_counter = 0;
-	dio->get_blocks = get_blocks;
+	dio->get_block = get_block;
 	dio->end_io = end_io;
 	dio->map_bh.b_private = NULL;
 	dio->final_block_in_bio = -1;
@@ -1177,7 +1178,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-	unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
+	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	int dio_lock_type)
 {
 	int seg;
@@ -1273,7 +1274,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		(end > i_size_read(inode)));
 
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
-				nr_segs, blkbits, get_blocks, end_io, dio);
+				nr_segs, blkbits, get_block, end_io, dio);
 
 	if (rw == READ && dio_lock_type == DIO_LOCKING)
 		release_i_mutex = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a717837f272e..04af9c45dce2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -667,18 +667,6 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping,block,ext2_get_block);
 }
 
-static int
-ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = ext2_get_block(inode, iblock, bh_result, create);
-	if (ret == 0)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t
 ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
@@ -687,7 +675,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = file->f_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				offset, nr_segs, ext2_get_blocks, NULL);
+				offset, nr_segs, ext2_get_block, NULL);
 }
 
 static int
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 34e5b0dc9168..0cd126176bbb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -940,11 +940,11 @@ out:
 
 static int
 ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks,
 		struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
+	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	if (!create)
 		goto get_block;		/* A read */
@@ -989,18 +989,10 @@ get_block:
 	return ret;
 }
 
-static int ext3_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long maxblocks, struct buffer_head *bh_result,
-		int create)
-{
-	return ext3_direct_io_get_blocks(inode, iblock, maxblocks,
-					bh_result, create);
-}
-
 static int ext3_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
-	return ext3_get_blocks(inode, iblock, 1, bh_result, create);
+	return ext3_direct_io_get_blocks(inode, iblock, bh_result, create);
 }
 
 /*
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 297300fe81c2..404bfc9f7385 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -101,11 +101,11 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
 }
 
 static int fat_get_blocks(struct inode *inode, sector_t iblock,
-			  unsigned long max_blocks,
 			  struct buffer_head *bh_result, int create)
 {
 	struct super_block *sb = inode->i_sb;
 	int err;
+	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
 	if (err)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 39fd85b9b916..2c564701724f 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -98,17 +98,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static int hfs_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			  struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = hfs_get_block(inode, iblock, bh_result, create);
-	if (!ret)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
@@ -116,7 +105,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, hfs_get_blocks, NULL);
+				  offset, nr_segs, hfs_get_block, NULL);
 }
 
 static int hfs_writepages(struct address_space *mapping,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 12ed2b7d046b..9fbe4d2aeece 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -93,17 +93,6 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static int hfsplus_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			      struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = hfsplus_get_block(inode, iblock, bh_result, create);
-	if (!ret)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
@@ -111,7 +100,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, hfsplus_get_blocks, NULL);
+				  offset, nr_segs, hfsplus_get_block, NULL);
 }
 
 static int hfsplus_writepages(struct address_space *mapping,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7239ef339489..04eb78f1252e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -302,7 +302,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				offset, nr_segs, jfs_get_blocks, NULL);
+				offset, nr_segs, jfs_get_block, NULL);
 }
 
 struct address_space_operations jfs_aops = {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index bf931ba1d364..0d858d0b25be 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -540,7 +540,6 @@ bail:
  * 					fs_count, map_bh, dio->rw == WRITE);
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
-				     unsigned long max_blocks,
 				     struct buffer_head *bh_result, int create)
 {
 	int ret;
@@ -548,6 +547,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	u64 p_blkno;
 	int contig_blocks;
 	unsigned char blocksize_bits;
+	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	if (!inode || !bh_result) {
 		mlog(ML_ERROR, "inode or bh_result is null\n");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 62e18c19b446..9857e50f85e7 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -466,7 +466,6 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
    direct_IO request. */
 static int reiserfs_get_blocks_direct_io(struct inode *inode,
 					 sector_t iblock,
-					 unsigned long max_blocks,
 					 struct buffer_head *bh_result,
 					 int create)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a79b84f8b55c..c02f7c5b7462 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1319,12 +1319,12 @@ STATIC int
 xfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
-	unsigned long		max_blocks,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock, max_blocks, bh_result,
-					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+	return __xfs_get_block(inode, iblock,
+				bh_result->b_size >> inode->i_blkbits,
+				bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
 STATIC void
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 155d29d5e5e4..9d9674946956 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -252,9 +252,6 @@ extern void __init files_init(unsigned long);
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
-typedef int (get_blocks_t)(struct inode *inode, sector_t iblock,
-			unsigned long max_blocks,
-			struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
 
@@ -1645,7 +1642,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
+	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	int lock_type);
 
 enum {
@@ -1656,29 +1653,29 @@ enum {
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
+	loff_t offset, unsigned long nr_segs, get_block_t get_block,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_blocks, end_io, DIO_LOCKING);
+				nr_segs, get_block, end_io, DIO_LOCKING);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
 	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
+	loff_t offset, unsigned long nr_segs, get_block_t get_block,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_blocks, end_io, DIO_NO_LOCKING);
+				nr_segs, get_block, end_io, DIO_NO_LOCKING);
 }
 
 static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
 	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
+	loff_t offset, unsigned long nr_segs, get_block_t get_block,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_blocks, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
 
 extern struct file_operations generic_ro_fops;
-- 
cgit v1.2.3


From d6859bfca8cbfe4105704e410b0afa50beabbbb9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 26 Mar 2006 01:38:03 -0800
Subject: [PATCH] ext3: cleanups and WARN_ON()

- Clean up a few little layout things and comments.

- Add a WARN_ON to a case which I was wondering about.

- Tune up some inlines.

Cc: Mingming Cao <cmm@us.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 239 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 114 insertions(+), 125 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0cd126176bbb..e68587a7f366 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -44,16 +44,16 @@ static int ext3_writepage_trans_blocks(struct inode *inode);
 /*
  * Test whether an inode is a fast symlink.
  */
-static inline int ext3_inode_is_fast_symlink(struct inode *inode)
+static int ext3_inode_is_fast_symlink(struct inode *inode)
 {
 	int ea_blocks = EXT3_I(inode)->i_file_acl ?
 		(inode->i_sb->s_blocksize >> 9) : 0;
 
-	return (S_ISLNK(inode->i_mode) &&
-		inode->i_blocks - ea_blocks == 0);
+	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
-/* The ext3 forget function must perform a revoke if we are freeing data
+/*
+ * The ext3 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
  * revoked in all cases. 
  *
@@ -61,10 +61,8 @@ static inline int ext3_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  */
-
-int ext3_forget(handle_t *handle, int is_metadata,
-		       struct inode *inode, struct buffer_head *bh,
-		       int blocknr)
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, int blocknr)
 {
 	int err;
 
@@ -104,10 +102,9 @@ int ext3_forget(handle_t *handle, int is_metadata,
 }
 
 /*
- * Work out how many blocks we need to progress with the next chunk of a
+ * Work out how many blocks we need to proceed with the next chunk of a
  * truncate transaction.
  */
-
 static unsigned long blocks_for_truncate(struct inode *inode) 
 {
 	unsigned long needed;
@@ -141,7 +138,6 @@ static unsigned long blocks_for_truncate(struct inode *inode)
  * extend fails, we need to propagate the failure up and restart the
  * transaction in the top-level truncate loop. --sct 
  */
-
 static handle_t *start_transaction(struct inode *inode) 
 {
 	handle_t *result;
@@ -194,9 +190,11 @@ void ext3_delete_inode (struct inode * inode)
 
 	handle = start_transaction(inode);
 	if (IS_ERR(handle)) {
-		/* If we're going to skip the normal cleanup, we still
-		 * need to make sure that the in-core orphan linked list
-		 * is properly cleaned up. */
+		/*
+		 * If we're going to skip the normal cleanup, we still need to
+		 * make sure that the in-core orphan linked list is properly
+		 * cleaned up.
+		 */
 		ext3_orphan_del(NULL, inode);
 		goto no_delete;
 	}
@@ -247,7 +245,7 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
 	p->bh = bh;
 }
 
-static inline int verify_chain(Indirect *from, Indirect *to)
+static int verify_chain(Indirect *from, Indirect *to)
 {
 	while (from <= to && from->key == *from->p)
 		from++;
@@ -317,7 +315,7 @@ static int ext3_block_to_path(struct inode *inode,
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
+		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
 	}
 	if (boundary)
 		*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -409,7 +407,6 @@ no_block:
  *
  *	Caller must make sure that @ind is valid and will stay that way.
  */
-
 static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 {
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -419,17 +416,18 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 	unsigned long colour;
 
 	/* Try to find previous block */
-	for (p = ind->p - 1; p >= start; p--)
+	for (p = ind->p - 1; p >= start; p--) {
 		if (*p)
 			return le32_to_cpu(*p);
+	}
 
 	/* No such thing, so let's try location of indirect block */
 	if (ind->bh)
 		return ind->bh->b_blocknr;
 
 	/*
-	 * It is going to be refered from inode itself? OK, just put it into
-	 * the same cylinder group then.
+	 * It is going to be referred to from the inode itself? OK, just put it
+	 * into the same cylinder group then.
 	 */
 	bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
 		le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
@@ -453,7 +451,9 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 static unsigned long ext3_find_goal(struct inode *inode, long block,
 		Indirect chain[4], Indirect *partial)
 {
-	struct ext3_block_alloc_info *block_i =  EXT3_I(inode)->i_block_alloc_info;
+	struct ext3_block_alloc_info *block_i;
+
+	block_i =  EXT3_I(inode)->i_block_alloc_info;
 
 	/*
 	 * try the heuristic for sequential allocation,
@@ -466,6 +466,7 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
 
 	return ext3_find_near(inode, partial);
 }
+
 /**
  *	ext3_blks_to_allocate: Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
@@ -478,8 +479,7 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
  *	return the total number of blocks to be allocate, including the
  *	direct and indirect blocks.
  */
-static int
-ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
+static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 		int blocks_to_boundary)
 {
 	unsigned long count = 0;
@@ -489,7 +489,7 @@ ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
 	 * then it's clear blocks on that path have not allocated
 	 */
 	if (k > 0) {
-		/* right now don't hanel cross boundary allocation */
+		/* right now we don't handle cross boundary allocation */
 		if (blks < blocks_to_boundary + 1)
 			count += blks;
 		else
@@ -538,7 +538,7 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	while (1) {
 		count = target;
 		/* allocating blocks for indirect blocks and direct blocks */
-		current_block = ext3_new_blocks(handle, inode, goal, &count, err);
+		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
 		if (*err)
 			goto failed_out;
 
@@ -591,7 +591,6 @@ failed_out:
  *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *	as described above and return 0.
  */
-
 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 			int indirect_blks, int *blks, unsigned long goal,
 			int *offsets, Indirect *branch)
@@ -670,27 +669,28 @@ failed:
 }
 
 /**
- *	ext3_splice_branch - splice the allocated branch onto inode.
- *	@inode: owner
- *	@block: (logical) number of block we are adding
- *	@chain: chain of indirect blocks (with a missing link - see
- *		ext3_alloc_branch)
- *	@where: location of missing link
- *	@num:   number of indirect blocks we are adding
- *	@blks:  number of direct blocks we are adding
- *
- *	This function fills the missing link and does all housekeeping needed in
- *	inode (->i_blocks, etc.). In case of success we end up with the full
- *	chain to new block and return 0.
+ * ext3_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *	ext3_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
  */
-
-static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-			      Indirect *where, int num, int blks)
+static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+			long block, Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
-	struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
+	struct ext3_block_alloc_info *block_i;
 	unsigned long current_block;
+
+	block_i = EXT3_I(inode)->i_block_alloc_info;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -705,8 +705,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* That's it */
 
 	*where->p = where->key;
-	/* update host bufferhead or inode to point to
-	 * more just allocated direct blocks blocks */
+
+	/*
+	 * Update the host buffer_head or inode to point to more just allocated
+	 * direct blocks blocks
+	 */
 	if (num == 0 && blks > 1) {
 		current_block = le32_to_cpu(where->key + 1);
 		for (i = 1; i < blks; i++)
@@ -720,7 +723,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	 */
 	if (block_i) {
 		block_i->last_alloc_logical_block = block + blks - 1;
-		block_i->last_alloc_physical_block = le32_to_cpu(where[num].key + blks - 1);
+		block_i->last_alloc_physical_block =
+				le32_to_cpu(where[num].key + blks - 1);
 	}
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -731,7 +735,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* had we spliced it onto indirect block? */
 	if (where->bh) {
 		/*
-		 * akpm: If we spliced it onto an indirect block, we haven't
+		 * If we spliced it onto an indirect block, we haven't
 		 * altered the inode.  Note however that if it is being spliced
 		 * onto an indirect block at the very end of the file (the
 		 * file is growing) then we *will* alter the inode to reflect
@@ -756,7 +760,7 @@ err_out:
 	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, where[i].bh);
-		ext3_free_blocks(handle, inode, le32_to_cpu(where[i-1].key), 1);
+		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
 	}
 	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
 
@@ -775,17 +779,16 @@ err_out:
  * allocations is needed - we simply release blocks and do not touch anything
  * reachable from inode.
  *
- * akpm: `handle' can be NULL if create == 0.
+ * `handle' can be NULL if create == 0.
  *
  * The BKL may not be held on entry here.  Be sure to take it early.
  * return > 0, # of blocks mapped or allocated.
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  */
-
-int
-ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
-		unsigned long maxblocks, struct buffer_head *bh_result,
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+		sector_t iblock, unsigned long maxblocks,
+		struct buffer_head *bh_result,
 		int create, int extend_disksize)
 {
 	int err = -EIO;
@@ -802,7 +805,7 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 
 	J_ASSERT(handle != NULL || create == 0);
-	depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary);
+	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
@@ -998,8 +1001,8 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 /*
  * `handle' can be NULL if create is zero
  */
-struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
-				long block, int create, int * errp)
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+				long block, int create, int *errp)
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
@@ -1029,17 +1032,18 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 			J_ASSERT(create != 0);
 			J_ASSERT(handle != 0);
 
-			/* Now that we do not always journal data, we
-			   should keep in mind whether this should
-			   always journal the new buffer as metadata.
-			   For now, regular file writes use
-			   ext3_get_block instead, so it's not a
-			   problem. */
+			/*
+			 * Now that we do not always journal data, we should
+			 * keep in mind whether this should always journal the
+			 * new buffer as metadata.  For now, regular file
+			 * writes use ext3_get_block instead, so it's not a
+			 * problem.
+			 */
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
 			fatal = ext3_journal_get_create_access(handle, bh);
 			if (!fatal && !buffer_uptodate(bh)) {
-				memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+				memset(bh->b_data,0,inode->i_sb->s_blocksize);
 				set_buffer_uptodate(bh);
 			}
 			unlock_buffer(bh);
@@ -1061,7 +1065,7 @@ err:
 	return NULL;
 }
 
-struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,
+struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
 			       int block, int create, int *err)
 {
 	struct buffer_head * bh;
@@ -1137,9 +1141,8 @@ static int walk_page_buffers(	handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.  
  */
-
-static int do_journal_get_write_access(handle_t *handle, 
-				       struct buffer_head *bh)
+static int do_journal_get_write_access(handle_t *handle,
+					struct buffer_head *bh)
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
@@ -1180,8 +1183,7 @@ out:
 	return ret;
 }
 
-int
-ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_data(handle, bh);
 	if (err)
@@ -1206,7 +1208,6 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
  * ext3 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-
 static int ext3_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
@@ -1416,7 +1417,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * we don't need to open a transaction here.
  */
 static int ext3_ordered_writepage(struct page *page,
-			struct writeback_control *wbc)
+				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head *page_bufs;
@@ -1907,11 +1908,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *		c) free the subtrees growing from the inode past the @chain[0].
  *			(no partially truncated stuff there).  */
 
-static Indirect *ext3_find_shared(struct inode *inode,
-				int depth,
-				int offsets[4],
-				Indirect chain[4],
-				__le32 *top)
+static Indirect *ext3_find_shared(struct inode *inode, int depth,
+			int offsets[4], Indirect chain[4], __le32 *top)
 {
 	Indirect *partial, *p;
 	int k, err;
@@ -1950,8 +1948,7 @@ static Indirect *ext3_find_shared(struct inode *inode,
 	}
 	/* Writer: end */
 
-	while(partial > p)
-	{
+	while(partial > p) {
 		brelse(partial->bh);
 		partial--;
 	}
@@ -1967,10 +1964,9 @@ no_top:
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void
-ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
-		unsigned long block_to_free, unsigned long count,
-		__le32 *first, __le32 *last)
+static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, unsigned long block_to_free,
+		unsigned long count, __le32 *first, __le32 *last)
 {
 	__le32 *p;
 	if (try_to_extend_transaction(handle, inode)) {
@@ -2231,8 +2227,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * that's fine - as long as they are linked from the inode, the post-crash
  * ext3_truncate() run will find them and release them.
  */
-
-void ext3_truncate(struct inode * inode)
+void ext3_truncate(struct inode *inode)
 {
 	handle_t *handle;
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -2356,29 +2351,26 @@ void ext3_truncate(struct inode * inode)
 do_indirects:
 	/* Kill the remaining (whole) subtrees */
 	switch (offsets[0]) {
-		default:
-			nr = i_data[EXT3_IND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 1);
-				i_data[EXT3_IND_BLOCK] = 0;
-			}
-		case EXT3_IND_BLOCK:
-			nr = i_data[EXT3_DIND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 2);
-				i_data[EXT3_DIND_BLOCK] = 0;
-			}
-		case EXT3_DIND_BLOCK:
-			nr = i_data[EXT3_TIND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 3);
-				i_data[EXT3_TIND_BLOCK] = 0;
-			}
-		case EXT3_TIND_BLOCK:
-			;
+	default:
+		nr = i_data[EXT3_IND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT3_IND_BLOCK] = 0;
+		}
+	case EXT3_IND_BLOCK:
+		nr = i_data[EXT3_DIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT3_DIND_BLOCK] = 0;
+		}
+	case EXT3_DIND_BLOCK:
+		nr = i_data[EXT3_TIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT3_TIND_BLOCK] = 0;
+		}
+	case EXT3_TIND_BLOCK:
+		;
 	}
 
 	ext3_discard_reservation(inode);
@@ -2387,8 +2379,10 @@ do_indirects:
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	ext3_mark_inode_dirty(handle, inode);
 
-	/* In a multi-transaction truncate, we only make the final
-	 * transaction synchronous */
+	/*
+	 * In a multi-transaction truncate, we only make the final transaction
+	 * synchronous
+	 */
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 out_stop:
@@ -2414,20 +2408,16 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
 	struct ext3_group_desc * gdp;
 
 
-	if ((ino != EXT3_ROOT_INO &&
-		ino != EXT3_JOURNAL_INO &&
-		ino != EXT3_RESIZE_INO &&
-		ino < EXT3_FIRST_INO(sb)) ||
-		ino > le32_to_cpu(
-			EXT3_SB(sb)->s_es->s_inodes_count)) {
-		ext3_error (sb, "ext3_get_inode_block",
+	if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO &&
+		ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) ||
+		ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
+		ext3_error(sb, "ext3_get_inode_block",
 			    "bad inode number: %lu", ino);
 		return 0;
 	}
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 	if (block_group >= EXT3_SB(sb)->s_groups_count) {
-		ext3_error (sb, "ext3_get_inode_block",
-			    "group >= groups count");
+		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
 		return 0;
 	}
 	smp_rmb();
@@ -2440,7 +2430,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	gdp = (struct ext3_group_desc *) bh->b_data;
+	gdp = (struct ext3_group_desc *)bh->b_data;
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
@@ -2989,7 +2979,7 @@ err_out:
 
 
 /*
- * akpm: how many blocks doth make a writepage()?
+ * How many blocks doth make a writepage()?
  *
  * With N blocks per page, it may be:
  * N data blocks
@@ -3079,8 +3069,8 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 }
 
 /*
- * akpm: What we do here is to mark the in-core inode as clean
- * with respect to inode dirtiness (it may still be data-dirty).
+ * What we do here is to mark the in-core inode as clean with respect to inode
+ * dirtiness (it may still be data-dirty).
  * This means that the in-core inode may be reaped by prune_icache
  * without having to perform any I/O.  This is a very good thing,
  * because *any* task may call prune_icache - even ones which
@@ -3112,7 +3102,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
 }
 
 /*
- * akpm: ext3_dirty_inode() is called from __mark_inode_dirty()
+ * ext3_dirty_inode() is called from __mark_inode_dirty()
  *
  * We're really interested in the case where a file is being extended.
  * i_size has been changed by generic_commit_write() and we thus need
@@ -3148,7 +3138,7 @@ out:
 	return;
 }
 
-#ifdef AKPM
+#if 0
 /* 
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
@@ -3156,8 +3146,7 @@ out:
  * returns no iloc structure, so the caller needs to repeat the iloc
  * lookup to mark the inode dirty later.
  */
-static inline int
-ext3_pin_inode(handle_t *handle, struct inode *inode)
+static int ext3_pin_inode(handle_t *handle, struct inode *inode)
 {
 	struct ext3_iloc iloc;
 
-- 
cgit v1.2.3


From f91a2ad2ed97099fb565e3336f8df0df717f2ba9 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:04 -0800
Subject: [PATCH] ext3: multi-block get_block()

Mingming Cao recently added multi-block allocation support for ext3,
currently used only by DIO.  I added support to map multiple blocks for
mpage_readpages().  This patch add support for ext3_get_block() to deal
with multi-block mapping.  Basically it renames ext3_direct_io_get_blocks()
as ext3_get_block().

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index e68587a7f366..48ae0339af17 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -941,9 +941,8 @@ out:
 
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
-static int
-ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create)
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
@@ -992,12 +991,6 @@ get_block:
 	return ret;
 }
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create)
-{
-	return ext3_direct_io_get_blocks(inode, iblock, bh_result, create);
-}
-
 /*
  * `handle' can be NULL if create is zero
  */
@@ -1648,11 +1641,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 
 				 offset, nr_segs,
-				 ext3_direct_io_get_blocks, NULL);
+				 ext3_get_block, NULL);
 
 	/*
-	 * Reacquire the handle: ext3_direct_io_get_block() can restart the
-	 * transaction
+	 * Reacquire the handle: ext3_get_block() can restart the transaction
 	 */
 	handle = journal_current_handle();
 
-- 
cgit v1.2.3


From a0e9285233a32edf267d27cd03fe0056951422cf Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:05 -0800
Subject: [PATCH] ext3: "nobh" writeback support for filesystems blocksize <
 pagesize

There is no valid reason why we can't support "nobh" option for filesystems
with blocksize != PAGESIZE.

This patch lets them use "nobh" option for writeback mode for blocksize <
pagesize.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 86e443182de4..f8a5266ea1ff 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1678,12 +1678,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (test_opt(sb, NOBH)) {
-		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
-			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
-				"since filesystem blocksize doesn't match "
-				"pagesize\n");
-			clear_opt(sbi->s_mount_opt, NOBH);
-		}
 		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
 			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
 				"its supported only with writeback mode\n");
-- 
cgit v1.2.3


From 4dee26b7e26eb3c78da8bf6c1c52ee5419145b28 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 26 Mar 2006 01:38:10 -0800
Subject: [PATCH] hrtimers: remove it_real_value calculation from proc/*/stat

Remove the it_real_value from /proc/*/stat, during 1.2.x was the last time it
returned useful data (as it was directly maintained by the scheduler), now
it's only a waste of time to calculate it.  Return 0 instead.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/array.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7eb1bd7f800c..7a76ad570230 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -330,7 +330,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	unsigned long  min_flt = 0,  maj_flt = 0;
 	cputime_t cutime, cstime, utime, stime;
 	unsigned long rsslim = 0;
-	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
 
@@ -386,7 +385,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			utime = cputime_add(utime, task->signal->utime);
 			stime = cputime_add(stime, task->signal->stime);
 		}
-		it_real_value = task->signal->real_timer.expires;
 	}
 	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
 	read_unlock(&tasklist_lock);
@@ -413,7 +411,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	start_time = nsec_to_clock_t(start_time);
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
 		task->pid,
 		tcomm,
@@ -435,7 +433,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		priority,
 		nice,
 		num_threads,
-		(long) ktime_to_clock_t(it_real_value),
 		start_time,
 		vsize,
 		mm ? get_mm_rss(mm) : 0,
-- 
cgit v1.2.3


From 05cfb614ddbf3181540ce09d44d96486f8ba8d6a Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 26 Mar 2006 01:38:12 -0800
Subject: [PATCH] hrtimers: remove data field

The nanosleep cleanup allows to remove the data field of hrtimer.  The
callback function can use container_of() to get it's own data.  Since the
hrtimer structure is anyway embedded in other structures, this adds no
overhead.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c               |  2 +-
 include/linux/hrtimer.h |  5 +----
 include/linux/sched.h   |  1 +
 include/linux/timer.h   |  3 ++-
 kernel/fork.c           |  2 +-
 kernel/hrtimer.c        | 12 +++++-------
 kernel/itimer.c         | 15 +++++++--------
 kernel/posix-timers.c   |  9 ++++-----
 8 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 995cba3c62b8..c7397c46ad6d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -632,7 +632,7 @@ static int de_thread(struct task_struct *tsk)
 		 * synchronize with any firing (by calling del_timer_sync)
 		 * before we can safely let the old group leader die.
 		 */
-		sig->real_timer.data = current;
+		sig->tsk = current;
 		spin_unlock_irq(lock);
 		if (hrtimer_cancel(&sig->real_timer))
 			hrtimer_restart(&sig->real_timer);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f57cc7bd7008..93830158348e 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -45,9 +45,7 @@ struct hrtimer_base;
  * @expires:	the absolute expiry time in the hrtimers internal
  *		representation. The time is related to the clock on
  *		which the timer is based.
- * @state:	state of the timer
  * @function:	timer expiry callback function
- * @data:	argument for the callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  *
  * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE()
@@ -55,8 +53,7 @@ struct hrtimer_base;
 struct hrtimer {
 	struct rb_node		node;
 	ktime_t			expires;
-	int			(*function)(void *);
-	void			*data;
+	int			(*function)(struct hrtimer *);
 	struct hrtimer_base	*base;
 };
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e0054c1b9a09..036d14d2bf90 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -402,6 +402,7 @@ struct signal_struct {
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
+	struct task_struct *tsk;
 	ktime_t it_real_incr;
 
 	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index ee5a09e806e8..b5caabca553c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -96,6 +96,7 @@ static inline void add_timer(struct timer_list *timer)
 
 extern void init_timers(void);
 extern void run_local_timers(void);
-extern int it_real_fn(void *);
+struct hrtimer;
+extern int it_real_fn(struct hrtimer *);
 
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index a02063903aaa..4bd6486aa67d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -848,7 +848,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
-	sig->real_timer.data = tsk;
+	sig->tsk = tsk;
 
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 44108de4f028..0237a556eb1f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -613,21 +613,19 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
 
 	while ((node = base->first)) {
 		struct hrtimer *timer;
-		int (*fn)(void *);
+		int (*fn)(struct hrtimer *);
 		int restart;
-		void *data;
 
 		timer = rb_entry(node, struct hrtimer, node);
 		if (base->softirq_time.tv64 <= timer->expires.tv64)
 			break;
 
 		fn = timer->function;
-		data = timer->data;
 		set_curr_timer(base, timer);
 		__remove_hrtimer(timer, base);
 		spin_unlock_irq(&base->lock);
 
-		restart = fn(data);
+		restart = fn(timer);
 
 		spin_lock_irq(&base->lock);
 
@@ -664,9 +662,10 @@ struct sleep_hrtimer {
 	int expired;
 };
 
-static int nanosleep_wakeup(void *data)
+static int nanosleep_wakeup(struct hrtimer *timer)
 {
-	struct sleep_hrtimer *t = data;
+	struct sleep_hrtimer *t =
+		container_of(timer, struct sleep_hrtimer, timer);
 
 	t->expired = 1;
 	wake_up_process(t->task);
@@ -677,7 +676,6 @@ static int nanosleep_wakeup(void *data)
 static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode)
 {
 	t->timer.function = nanosleep_wakeup;
-	t->timer.data = t;
 	t->task = current;
 	t->expired = 0;
 
diff --git a/kernel/itimer.c b/kernel/itimer.c
index af2ec6b4392c..204ed7939e75 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -128,17 +128,16 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
 /*
  * The timer is automagically restarted, when interval != 0
  */
-int it_real_fn(void *data)
+int it_real_fn(struct hrtimer *timer)
 {
-	struct task_struct *tsk = (struct task_struct *) data;
+	struct signal_struct *sig =
+	    container_of(timer, struct signal_struct, real_timer);
 
-	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk);
-
-	if (tsk->signal->it_real_incr.tv64 != 0) {
-		hrtimer_forward(&tsk->signal->real_timer,
-				tsk->signal->real_timer.base->softirq_time,
-				tsk->signal->it_real_incr);
+	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
 
+	if (sig->it_real_incr.tv64 != 0) {
+		hrtimer_forward(timer, timer->base->softirq_time,
+				sig->it_real_incr);
 		return HRTIMER_RESTART;
 	}
 	return HRTIMER_NORESTART;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7c5f44787c8c..ac6dc8744429 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int,
 			    struct itimerspec *, struct itimerspec *);
 static int common_timer_del(struct k_itimer *timer);
 
-static int posix_timer_fn(void *data);
+static int posix_timer_fn(struct hrtimer *data);
 
 static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
 
@@ -334,14 +334,14 @@ EXPORT_SYMBOL_GPL(posix_timer_event);
 
  * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
  */
-static int posix_timer_fn(void *data)
+static int posix_timer_fn(struct hrtimer *timer)
 {
-	struct k_itimer *timr = data;
-	struct hrtimer *timer = &timr->it.real.timer;
+	struct k_itimer *timr;
 	unsigned long flags;
 	int si_private = 0;
 	int ret = HRTIMER_NORESTART;
 
+	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
 
 	if (timr->it.real.interval.tv64 != 0)
@@ -725,7 +725,6 @@ common_timer_set(struct k_itimer *timr, int flags,
 
 	mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
 	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
-	timr->it.real.timer.data = timr;
 	timr->it.real.timer.function = posix_timer_fn;
 
 	timer->expires = timespec_to_ktime(new_setting->it_value);
-- 
cgit v1.2.3


From b9a2838cc26c4c5369fbd2482acbc5ab60573479 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <mita@miraclelinux.com>
Date: Sun, 26 Mar 2006 01:39:53 -0800
Subject: [PATCH] bitops: ntfs: remove generic_ffs()

Now the only user who are using generic_ffs() is ntfs filesystem.  This patch
isolates generic_ffs() as ntfs_ffs() for ntfs.

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
Cc: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ntfs/logfile.c |  4 ++--
 fs/ntfs/mft.c     |  2 +-
 fs/ntfs/ntfs.h    | 29 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 0fd70295cca6..4af2ad1193ec 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -515,10 +515,10 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 		log_page_size = PAGE_CACHE_SIZE;
 	log_page_mask = log_page_size - 1;
 	/*
-	 * Use generic_ffs() instead of ffs() to enable the compiler to
+	 * Use ntfs_ffs() instead of ffs() to enable the compiler to
 	 * optimize log_page_size and log_page_bits into constants.
 	 */
-	log_page_bits = generic_ffs(log_page_size) - 1;
+	log_page_bits = ntfs_ffs(log_page_size) - 1;
 	size &= ~(s64)(log_page_size - 1);
 	/*
 	 * Ensure the log file is big enough to store at least the two restart
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 4e72bc7afdf9..2438c00ec0ce 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2670,7 +2670,7 @@ mft_rec_already_initialized:
 			ni->name_len = 4;
 
 			ni->itype.index.block_size = 4096;
-			ni->itype.index.block_size_bits = generic_ffs(4096) - 1;
+			ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1;
 			ni->itype.index.collation_rule = COLLATION_FILE_NAME;
 			if (vol->cluster_size <= ni->itype.index.block_size) {
 				ni->itype.index.vcn_size = vol->cluster_size;
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 0624c8ef4d9c..166142960b53 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -132,4 +132,33 @@ extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
 /* From fs/ntfs/upcase.c */
 extern ntfschar *generate_default_upcase(void);
 
+static inline int ntfs_ffs(int x)
+{
+	int r = 1;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff)) {
+		x >>= 16;
+		r += 16;
+	}
+	if (!(x & 0xff)) {
+		x >>= 8;
+		r += 8;
+	}
+	if (!(x & 0xf)) {
+		x >>= 4;
+		r += 4;
+	}
+	if (!(x & 3)) {
+		x >>= 2;
+		r += 2;
+	}
+	if (!(x & 1)) {
+		x >>= 1;
+		r += 1;
+	}
+	return r;
+}
+
 #endif /* _LINUX_NTFS_H */
-- 
cgit v1.2.3


From 3be7d29fb9c02962b49be636b30ca9cadd0fda4b Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Sun, 26 Mar 2006 18:58:31 +0200
Subject: Remove ugly debugging stuff

Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/dcache.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0c78f05819df..0778f49f993b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 
-/* #define DCACHE_DEBUG 1 */
 
 int sysctl_vfs_cache_pressure = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -603,10 +602,6 @@ resume:
 		 */
 		if (!list_empty(&dentry->d_subdirs)) {
 			this_parent = dentry;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, found);
-#endif
 			goto repeat;
 		}
 	}
@@ -616,10 +611,6 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
 	if (this_parent != parent) {
 		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
-this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
-#endif
 		goto resume;
 	}
 out:
-- 
cgit v1.2.3


From 86c79cbcee7d617c5aaf9b4f7e6cc093397d3451 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:41 -0800
Subject: [PATCH] uml: fix hostfs stack corruption

Noted by Oleg Drokin:
We initialized an extra slot of struct kstatfs.spare, sometimes
causing stack corruption.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Oleg Drokin <green@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_user.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b97809deba66..23b7cee72123 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -360,7 +360,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	spare_out[2] = buf.f_spare[2];
 	spare_out[3] = buf.f_spare[3];
 	spare_out[4] = buf.f_spare[4];
-	spare_out[5] = buf.f_spare[5];
 	return(0);
 }
 
-- 
cgit v1.2.3


From 718c604a28e35848754a65b839e4877ec34b2fca Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:42 -0800
Subject: [PATCH] autofs4: lookup white space cleanup

Whitespace and formating changes to lookup code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 62d8d4acb8bb..2c676bd44acd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -296,20 +296,20 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 {
 	struct super_block *sb = mnt->mnt_sb;
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
-	struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
 	/* Block on any pending expiry here; invalidate the dentry
            when expiration is done to trigger mount request with a new
            dentry */
-	if (de_info && (de_info->flags & AUTOFS_INF_EXPIRING)) {
+	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 		DPRINTK("waiting for expire %p name=%.*s",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
-		
+
 		DPRINTK("expire done status=%d", status);
-		
+
 		/*
 		 * If the directory still exists the mount request must
 		 * continue otherwise it can't be followed at the right
@@ -323,18 +323,21 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	DPRINTK("dentry=%p %.*s ino=%p",
 		 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 
-	/* Wait for a pending mount, triggering one if there isn't one already */
+	/*
+	 * Wait for a pending mount, triggering one if there
+	 * isn't one already
+	 */
 	if (dentry->d_inode == NULL) {
 		DPRINTK("waiting for mount name=%.*s",
 			 dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-		 
+
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
 			return 0; /* Try to get the kernel to invalidate this dentry */
-		
+
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
@@ -367,8 +370,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/* We don't update the usages for the autofs daemon itself, this
-	   is necessary for recursive autofs mounts */
+	/*
+	 * We don't update the usages for the autofs daemon itself, this
+	 * is necessary for recursive autofs mounts
+	 */
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
@@ -384,9 +389,9 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
  * yet completely filled in, and revalidate has to delay such
  * lookups..
  */
-static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode * dir = dentry->d_parent->d_inode;
+	struct inode *dir = dentry->d_parent->d_inode;
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
@@ -462,12 +467,13 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	DPRINTK("name = %.*s",
 		dentry->d_name.len, dentry->d_name.name);
 
+	/* File name too long to exist */
 	if (dentry->d_name.len > NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
+		return ERR_PTR(-ENAMETOOLONG);
 
 	sbi = autofs4_sbi(dir->i_sb);
-
 	oz_mode = autofs4_oz_mode(sbi);
+
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, process_group(current), sbi->catatonic, oz_mode);
 
@@ -519,7 +525,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	 * doesn't do the right thing for all system calls, but it should
 	 * be OK for the operations we permit from an autofs.
 	 */
-	if ( dentry->d_inode && d_unhashed(dentry) )
+	if (dentry->d_inode && d_unhashed(dentry))
 		return ERR_PTR(-ENOENT);
 
 	return NULL;
-- 
cgit v1.2.3


From f360ce3be466d50de153b001b24561ca7593042b Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:43 -0800
Subject: [PATCH] autofs4: use libfs routines for readdir

Change readdir routines to use the cursor based routines in libfs.c.  This
removes reliance on old readdir code from 2.4 and should improve efficiency of
readdir in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 126 +++++++++++++++++-------------------------------------
 1 file changed, 40 insertions(+), 86 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2c676bd44acd..af9a4c6bbadf 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -30,7 +30,6 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs4_dcache_readdir(struct file *, void *, filldir_t);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -82,7 +81,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 
 	DPRINTK("needs_reghost = %d", sbi->needs_reghost);
 
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 /* Update usage from here to top of tree, so that scan of
@@ -103,75 +102,21 @@ static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
 	spin_unlock(&dcache_lock);
 }
 
-/*
- * From 2.4 kernel readdir.c
- */
-static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
-{
-	int i;
-	struct dentry *dentry = filp->f_dentry;
-
-	i = filp->f_pos;
-	switch (i) {
-		case 0:
-			if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		case 1:
-			if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		default: {
-			struct list_head *list;
-			int j = i-2;
-
-			spin_lock(&dcache_lock);
-			list = dentry->d_subdirs.next;
-
-			for (;;) {
-				if (list == &dentry->d_subdirs) {
-					spin_unlock(&dcache_lock);
-					return 0;
-				}
-				if (!j)
-					break;
-				j--;
-				list = list->next;
-			}
-
-			while(1) {
-				struct dentry *de = list_entry(list,
-						struct dentry, d_u.d_child);
-
-				if (!d_unhashed(de) && de->d_inode) {
-					spin_unlock(&dcache_lock);
-					if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
-						break;
-					spin_lock(&dcache_lock);
-				}
-				filp->f_pos++;
-				list = list->next;
-				if (list != &dentry->d_subdirs)
-					continue;
-				spin_unlock(&dcache_lock);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct vfsmount *mnt = file->f_vfsmnt;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor;
 	int status;
 
+	status = dcache_dir_open(inode, file);
+	if (status)
+		goto out;
+
+	cursor = file->private_data;
+	cursor->d_fsdata = NULL;
+
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
 
@@ -180,12 +125,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		dcache_dir_close(inode, file);
+		status = -EBUSY;
+		goto out;
 	}
 
+	status = -ENOENT;
 	if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
 		struct nameidata nd;
-		int empty;
+		int empty, ret;
 
 		/* In case there are stale directory dentrys from a failed mount */
 		spin_lock(&dcache_lock);
@@ -195,13 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!empty)
 			d_invalidate(dentry);
 
-		nd.dentry = dentry;
-		nd.mnt = mnt;
 		nd.flags = LOOKUP_DIRECTORY;
-		status = (dentry->d_op->d_revalidate)(dentry, &nd);
+		ret = (dentry->d_op->d_revalidate)(dentry, &nd);
 
-		if (!status)
-			return -ENOENT;
+		if (!ret) {
+			dcache_dir_close(inode, file);
+			goto out;
+		}
 	}
 
 	if (d_mountpoint(dentry)) {
@@ -212,25 +160,29 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
 			dput(fp_dentry);
 			mntput(fp_mnt);
-			return -ENOENT;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
 
 		fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
 		status = PTR_ERR(fp);
 		if (IS_ERR(fp)) {
-			file->private_data = NULL;
-			return status;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
-		file->private_data = fp;
+		cursor->d_fsdata = fp;
 	}
-out:
 	return 0;
+out:
+	return status;
 }
 
 static int autofs4_dir_close(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
+	int status = 0;
 
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -240,26 +192,28 @@ static int autofs4_dir_close(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		status = -EBUSY;
+		goto out;
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
-
-		if (!fp)
-			return -ENOENT;
-
+		struct file *fp = cursor->d_fsdata;
+		if (!fp) {
+			status = -ENOENT;
+			goto out;
+		}
 		filp_close(fp, current->files);
-		file->private_data = NULL;
 	}
 out:
-	return 0;
+	dcache_dir_close(inode, file);
+	return status;
 }
 
 static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
 	int status;
 
 	DPRINTK("file=%p dentry=%p %.*s",
@@ -274,7 +228,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
+		struct file *fp = cursor->d_fsdata;
 
 		if (!fp)
 			return -ENOENT;
@@ -289,7 +243,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 		return status;
 	}
 out:
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
-- 
cgit v1.2.3


From 2d753e62b87ab2fc72bb4ff5153791d32ff9c08e Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: can't mount due to mount point dir not empty

Addresse a problem where stale dentrys stop mounts from happening.

When a mount point directory is pre-created and a non-existent entry within it
is requested a dentry ends up being created within the mount point directory
which stops future mounts.  The problem is solved by ignoring negative,
unhashed dentrys in the mount point d_subdirs list.

Additionally the apparent cacheing of -ENOENT returns from requests is
removed.  The test on d_time is a tautology and d_time is not initialised and
has an unexpected value.  In short it doesn't do what it's meant to.

The cacheing of failed requests to the daemon is important and will be
followed up later.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 8 --------
 fs/autofs4/root.c     | 9 ++++-----
 2 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f54c5b21f876..eea25934da62 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -41,14 +41,6 @@
 
 #define AUTOFS_SUPER_MAGIC 0x0187
 
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away.  This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ)	/* 1 minute */
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index af9a4c6bbadf..c7ff35774344 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -294,14 +294,13 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
-			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 1;
+			return 0;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 1;
+			return 0;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -360,13 +359,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	/* Negative dentry.. invalidate if "old" */
 	if (dentry->d_inode == NULL)
-		return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
+		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    list_empty(&dentry->d_subdirs)) {
+	    simple_empty_nolock(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From 1f5f2c3059ae8cc23cb3303daf324b06ba79517a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: expire code readability cleanup

Change the names of the boolean functions autofs4_check_mount and
autofs4_check_tree to autofs4_mount_busy and autofs4_tree_busy respectively
and alters their return codes to suit in order to aid code readabilty.

A couple of white space cleanups are included as well.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 54 ++++++++++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index dc39589df165..bcc17f533699 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -16,7 +16,7 @@
 
 static unsigned long now;
 
-/* Check if a dentry can be expired return 1 if it can else return 0 */
+/* Check if a dentry can be expired */
 static inline int autofs4_can_expire(struct dentry *dentry,
 					unsigned long timeout, int do_now)
 {
@@ -41,14 +41,13 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 		     attempts if expire fails the first time */
 		ino->last_used = now;
 	}
-
 	return 1;
 }
 
-/* Check a mount point for busyness return 1 if not busy, otherwise */
-static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
+/* Check a mount point for busyness */
+static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
-	int status = 0;
+	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -65,7 +64,7 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
 
 	/* The big question */
 	if (may_umount_tree(mnt) == 0)
-		status = 1;
+		status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -75,30 +74,29 @@ done:
 
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
- * Return 1 if the tree is busy or 0 otherwise
  */
-static int autofs4_check_tree(struct vfsmount *mnt,
-	       		      struct dentry *top,
-			      unsigned long timeout,
-			      int do_now)
+static int autofs4_tree_busy(struct vfsmount *mnt,
+	       		     struct dentry *top,
+			     unsigned long timeout,
+			     int do_now)
 {
 	struct dentry *this_parent = top;
 	struct list_head *next;
 
-	DPRINTK("parent %p %.*s",
+	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
 
 	/* Negative dentry - give up */
 	if (!simple_positive(top))
-		return 0;
+		return 1;
 
 	/* Timeout of a tree mount is determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
-		return 0;
+		return 1;
 
 	/* Is someone visiting anywhere in the tree ? */
 	if (may_umount_tree(mnt))
-		return 0;
+		return 1;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -126,9 +124,9 @@ resume:
 
 		if (d_mountpoint(dentry)) {
 			/* First busy => tree busy */
-			if (!autofs4_check_mount(mnt, dentry)) {
+			if (autofs4_mount_busy(mnt, dentry)) {
 				dput(dentry);
-				return 0;
+				return 1;
 			}
 		}
 
@@ -144,7 +142,7 @@ resume:
 	}
 	spin_unlock(&dcache_lock);
 
-	return 1;
+	return 0;
 }
 
 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
@@ -188,7 +186,7 @@ resume:
 				goto cont;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry))
+			if (!autofs4_mount_busy(mnt, dentry))
 				return dentry;
 
 		}
@@ -241,7 +239,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
-		if ( !simple_positive(dentry) ) {
+		if (!simple_positive(dentry)) {
 			next = next->next;
 			continue;
 		}
@@ -259,21 +257,21 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				goto next;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry)) {
+			if (!autofs4_mount_busy(mnt, dentry)) {
 				expired = dentry;
 				break;
 			}
 			goto next;
 		}
 
-		if ( simple_empty(dentry) )
+		if (simple_empty(dentry))
 			goto next;
 
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
 			/* Lock the tree as we must expire as a whole */
 			spin_lock(&sbi->fs_lock);
-			if (autofs4_check_tree(mnt, dentry, timeout, do_now)) {
+			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
 				struct autofs_info *inf = autofs4_dentry_ino(dentry);
 
 				/* Set this flag early to catch sys_chdir and the like */
@@ -297,7 +295,7 @@ next:
 		next = next->next;
 	}
 
-	if ( expired ) {
+	if (expired) {
 		DPRINTK("returning %p %.*s",
 			expired, (int)expired->d_name.len, expired->d_name.name);
 		spin_lock(&dcache_lock);
@@ -352,16 +350,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		return -EFAULT;
 
 	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
-		struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
                    little easier */
-		de_info->flags |= AUTOFS_INF_EXPIRING;
+		ino->flags |= AUTOFS_INF_EXPIRING;
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
-		de_info->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~AUTOFS_INF_EXPIRING;
 		dput(dentry);
 	}
-		
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 1ce12bad85863478619688c0c7363f93a9e5edb8 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:45 -0800
Subject: [PATCH] autofs4: simplify expire tree traversal

Simplify the expire tree traversal code by using a function from namespace.c
to calculate the next entry in the top down tree traversals carried out during
the expire operation.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 102 +++++++++++++++++++++-------------------------------
 1 file changed, 40 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index bcc17f533699..165fe9e2d570 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -72,6 +72,27 @@ done:
 	return status;
 }
 
+/*
+ * Calculate next entry in top down tree traversal.
+ * From next_mnt in namespace.c - elegant.
+ */
+static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+{
+	struct list_head *next = p->d_subdirs.next;
+
+	if (next == &p->d_subdirs) {
+		while (1) {
+			if (p == root)
+				return NULL;
+			next = p->d_u.d_child.next;
+			if (next != &p->d_parent->d_subdirs)
+				break;
+			p = p->d_parent;
+		}
+	}
+	return list_entry(next, struct dentry, d_u.d_child);
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -80,8 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct dentry *this_parent = top;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
@@ -99,49 +119,28 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		return 1;
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
-
-		if (!simple_empty_nolock(dentry)) {
-			this_parent = dentry;
-			goto repeat;
-		}
+			p, (int) p->d_name.len, p->d_name.name);
 
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* First busy => tree busy */
-			if (autofs4_mount_busy(mnt, dentry)) {
-				dput(dentry);
+			if (autofs4_mount_busy(mnt, p)) {
+				dput(p);
 				return 1;
 			}
 		}
-
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != top) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return 0;
 }
 
@@ -150,59 +149,38 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 					   unsigned long timeout,
 					   int do_now)
 {
-	struct dentry *this_parent = parent;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = parent; p; p = next_dentry(p, parent)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
+			p, (int) p->d_name.len, p->d_name.name);
 
-		if (!list_empty(&dentry->d_subdirs)) {
-			this_parent = dentry;
-			goto repeat;
-		}
-
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			if (!autofs4_can_expire(p, timeout, do_now))
 				goto cont;
 
 			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry))
-				return dentry;
+			if (!autofs4_mount_busy(mnt, p))
+				return p;
 
 		}
 cont:
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 1aff3c8b0511b5bb54acf7859e0c6ec9ae7287a9 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:46 -0800
Subject: [PATCH] autofs4: fix false negative return from expire

Fix the case where an expire returns busy on a tree mount when it is in fact
not busy.  This case was overlooked when the patch to prevent the expiring
away of "scaffolding" directories for tree mounts was applied.

The problem arises when a tree of mounts is a member of a map with other keys.
 The current logic will not expire the tree if any other mount in the map is
busy.  The solution is to maintain a "minimum" use count for each autofs
dentry and compare this to the actual dentry usage count during expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  1 +
 fs/autofs4/expire.c   | 34 +++++++++++++++++++++++++---------
 fs/autofs4/inode.c    | 12 +++++++++++-
 fs/autofs4/root.c     | 23 ++++++++++++++++++++++-
 4 files changed, 59 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index eea25934da62..00da71d0f32a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -55,6 +55,7 @@ struct autofs_info {
 
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
+	atomic_t count;
 
 	mode_t	mode;
 	size_t	size;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 165fe9e2d570..053d92a745b9 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -101,6 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
+	struct autofs_info *ino;
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -110,14 +111,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 	if (!simple_positive(top))
 		return 1;
 
-	/* Timeout of a tree mount is determined by its top dentry */
-	if (!autofs4_can_expire(top, timeout, do_now))
-		return 1;
-
-	/* Is someone visiting anywhere in the tree ? */
-	if (may_umount_tree(mnt))
-		return 1;
-
 	spin_lock(&dcache_lock);
 	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
@@ -130,17 +123,40 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		p = dget(p);
 		spin_unlock(&dcache_lock);
 
+		/*
+		 * Is someone visiting anywhere in the subtree ?
+		 * If there's no mount we need to check the usage
+		 * count for the autofs dentry.
+		 */
+		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
-			/* First busy => tree busy */
 			if (autofs4_mount_busy(mnt, p)) {
 				dput(p);
 				return 1;
 			}
+		} else {
+			unsigned int ino_count = atomic_read(&ino->count);
+
+			/* allow for dget above and top is already dgot */
+			if (p == top)
+				ino_count += 2;
+			else
+				ino_count++;
+
+			if (atomic_read(&p->d_count) > ino_count) {
+				dput(p);
+				return 1;
+			}
 		}
 		dput(p);
 		spin_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
+
+	/* Timeout of a tree mount is ultimately determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
 	return 0;
 }
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 1ad98d48e550..2335b1d6490f 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -46,6 +46,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	ino->size = 0;
 
 	ino->last_used = jiffies;
+	atomic_set(&ino->count, 0);
 
 	ino->sbi = sbi;
 
@@ -64,10 +65,19 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 
 void autofs4_free_ino(struct autofs_info *ino)
 {
+	struct autofs_info *p_ino;
+
 	if (ino->dentry) {
 		ino->dentry->d_fsdata = NULL;
-		if (ino->dentry->d_inode)
+		if (ino->dentry->d_inode) {
+			struct dentry *parent = ino->dentry->d_parent;
+			if (atomic_dec_and_test(&ino->count)) {
+				p_ino = autofs4_dentry_ino(parent);
+				if (p_ino && parent != ino->dentry)
+					atomic_dec(&p_ino->count);
+			}
 			dput(ino->dentry);
+		}
 		ino->dentry = NULL;
 	}
 	if (ino->free)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c7ff35774344..d196712c4b94 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -490,6 +490,7 @@ static int autofs4_dir_symlink(struct inode *dir,
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 	char *cp;
 
@@ -523,6 +524,10 @@ static int autofs4_dir_symlink(struct inode *dir,
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
 	dir->i_mtime = CURRENT_TIME;
@@ -549,11 +554,17 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	/* This allows root to remove symlinks */
 	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
 		return -EACCES;
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
 
 	dentry->d_inode->i_size = 0;
@@ -570,6 +581,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
@@ -584,8 +596,12 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
-
 	dentry->d_inode->i_size = 0;
 	dentry->d_inode->i_nlink = 0;
 
@@ -599,6 +615,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 
 	if ( !autofs4_oz_mode(sbi) )
@@ -621,6 +638,10 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 	dir->i_nlink++;
 	dir->i_mtime = CURRENT_TIME;
-- 
cgit v1.2.3


From e0a7aae94030b878601eb67686b696de4a3764f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:47 -0800
Subject: [PATCH] autofs4: expire mounts that hold no (extra) references only

Alter the expire semantics that define how "busyness" is determined.
Currently a last_used counter is updated on every revalidate from processes
other than the mount owner process group.

This patch changes that so that an expire candidate is busy only if it has a
reference count greater than the expected minimum, such as when there is an
open file or working directory in use.

This method is the only way that busyness can be established for direct mounts
within the new implementation.  For consistency the expire semantic is made
the same for all mounts.

A side effect of the patch is that mounts which remain mounted unessessarily
in the presence of some GUI programs that scan the filesystem should now
expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 36 ++++++++++++++++++++++--------------
 fs/autofs4/root.c   |  4 ++++
 2 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 053d92a745b9..6ae2fc8233ff 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -47,6 +47,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 /* Check a mount point for busyness */
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
+	struct dentry *top = dentry;
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
@@ -62,9 +63,14 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	if (is_autofs4_dentry(dentry))
 		goto done;
 
-	/* The big question */
-	if (may_umount_tree(mnt) == 0)
-		status = 0;
+	/* Update the expiry counter if fs is busy */
+	if (may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		ino->last_used = jiffies;
+		goto done;
+	}
+
+	status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -101,7 +107,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct autofs_info *ino;
+	struct autofs_info *top_ino = autofs4_dentry_ino(top);
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -127,14 +133,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
 		 * count for the autofs dentry.
+		 * If the fs is busy update the expiry counter.
 		 */
-		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
 			if (autofs4_mount_busy(mnt, p)) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		} else {
+			struct autofs_info *ino = autofs4_dentry_ino(p);
 			unsigned int ino_count = atomic_read(&ino->count);
 
 			/* allow for dget above and top is already dgot */
@@ -144,6 +152,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 				ino_count++;
 
 			if (atomic_read(&p->d_count) > ino_count) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
@@ -183,14 +192,13 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 		spin_unlock(&dcache_lock);
 
 		if (d_mountpoint(p)) {
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(p, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, p))
 				goto cont;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, p))
+			/* Can we expire this guy */
+			if (autofs4_can_expire(p, timeout, do_now))
 				return p;
-
 		}
 cont:
 		dput(p);
@@ -246,12 +254,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, dentry))
 				goto next;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry)) {
+			/* Can we expire this guy */
+			if (autofs4_can_expire(dentry, timeout, do_now)) {
 				expired = dentry;
 				break;
 			}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d196712c4b94..3a4a5b47575c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -330,6 +330,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
+	/* Initialize expiry counter after successful mount */
+	if (ino)
+		ino->last_used = jiffies;
+
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-- 
cgit v1.2.3


From 862b110f0132401e422783bf68521ade3dc67578 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:48 -0800
Subject: [PATCH] autofs4: remove update_atime unused function

Remove the update of i_atime from autofs4 in favour of having VFS update it.
i_atime is never used for expire in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 38 ++++----------------------------------
 1 file changed, 4 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3a4a5b47575c..72dca3335e25 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -84,24 +84,6 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 	return dcache_readdir(file, dirent, filldir);
 }
 
-/* Update usage from here to top of tree, so that scan of
-   top-level directories will give a useful result */
-static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
-{
-	struct dentry *top = dentry->d_sb->s_root;
-
-	spin_lock(&dcache_lock);
-	for(; dentry != top; dentry = dentry->d_parent) {
-		struct autofs_info *ino = autofs4_dentry_ino(dentry);
-
-		if (ino) {
-			touch_atime(mnt, dentry);
-			ino->last_used = jiffies;
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
@@ -246,10 +228,9 @@ out:
 	return dcache_readdir(file, dirent, filldir);
 }
 
-static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
+static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
-	struct super_block *sb = mnt->mnt_sb;
-	struct autofs_sb_info *sbi = autofs4_sbi(sb);
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
@@ -323,13 +304,6 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/*
-	 * We don't update the usages for the autofs daemon itself, this
-	 * is necessary for recursive autofs mounts
-	 */
-	if (!autofs4_oz_mode(sbi))
-		autofs4_update_usage(mnt, dentry);
-
 	/* Initialize expiry counter after successful mount */
 	if (ino)
 		ino->last_used = jiffies;
@@ -357,7 +331,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 
@@ -374,15 +348,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 	spin_unlock(&dcache_lock);
 
-	/* Update the usage list */
-	if (!oz_mode)
-		autofs4_update_usage(nd->mnt, dentry);
-
 	return 1;
 }
 
-- 
cgit v1.2.3


From d7c4a5f1080a61fd4a3a00ba5f741986f8fb71f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: add a show mount options for proc filesystem

Add show_options method to display autofs4 mount options in the proc
filesystem.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 +++
 fs/autofs4/inode.c    | 37 +++++++++++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 00da71d0f32a..d82a019ff8ef 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -87,11 +87,14 @@ struct autofs_wait_queue {
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
+	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
 	int version;
 	int sub_version;
+	int min_proto;
+	int max_proto;
 	unsigned long exp_timeout;
 	int reghost_enabled;
 	int needs_reghost;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2335b1d6490f..d9a71dab40fc 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/parser.h>
 #include <linux/bitops.h>
@@ -163,9 +164,26 @@ static void autofs4_put_super(struct super_block *sb)
 	DPRINTK("shutting down");
 }
 
+static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
+
+	if (!sbi)
+		return 0;
+
+	seq_printf(m, ",fd=%d", sbi->pipefd);
+	seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+	seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
+	seq_printf(m, ",minproto=%d", sbi->min_proto);
+	seq_printf(m, ",maxproto=%d", sbi->max_proto);
+
+	return 0;
+}
+
 static struct super_operations autofs4_sops = {
 	.put_super	= autofs4_put_super,
 	.statfs		= simple_statfs,
+	.show_options	= autofs4_show_options,
 };
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
@@ -261,7 +279,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	int pipefd;
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
-	int minproto, maxproto;
 
 	sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if ( !sbi )
@@ -273,12 +290,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->root = NULL;
+	sbi->pipefd = -1;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
 	sbi->oz_pgrp = process_group(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->min_proto = 0;
+	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
@@ -311,22 +331,26 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
 			  &sbi->oz_pgrp,
-			  &minproto, &maxproto)) {
+			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
 	/* Couldn't this be tested earlier? */
-	if (maxproto < AUTOFS_MIN_PROTO_VERSION ||
-	    minproto > AUTOFS_MAX_PROTO_VERSION) {
+	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
+	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
 		printk("autofs: kernel does not match daemon version "
 		       "daemon (%d, %d) kernel (%d, %d)\n",
-			minproto, maxproto,
+			sbi->min_proto, sbi->max_proto,
 			AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
 		goto fail_dput;
 	}
 
-	sbi->version = maxproto > AUTOFS_MAX_PROTO_VERSION ? AUTOFS_MAX_PROTO_VERSION : maxproto;
+	/* Establish highest kernel protocol version */
+	if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
+		sbi->version = AUTOFS_MAX_PROTO_VERSION;
+	else
+		sbi->version = sbi->max_proto;
 	sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
 	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
@@ -339,6 +363,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if ( !pipe->f_op || !pipe->f_op->write )
 		goto fail_fput;
 	sbi->pipe = pipe;
+	sbi->pipefd = pipefd;
 
 	/*
 	 * Take a reference to the root dentry so we get a chance to
-- 
cgit v1.2.3


From e77fbddf77c071a5735a4bc63a30649bd64baf14 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: white space cleanup for waitq.c

Whitespace and formating changes to waitq code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/waitq.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index be78e9378c03..b0bb9d43bcd9 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -33,7 +33,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 	sbi->catatonic = 1;
 	wq = sbi->queues;
 	sbi->queues = NULL;	/* Erase all wait queues */
-	while ( wq ) {
+	while (wq) {
 		nwq = wq->next;
 		wq->status = -ENOENT; /* Magic is gone - report failure */
 		kfree(wq->name);
@@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 		fput(sbi->pipe);	/* Close the pipe */
 		sbi->pipe = NULL;
 	}
-
 	shrink_dcache_sb(sbi->sb);
 }
 
@@ -165,7 +164,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	int len, status;
 
 	/* In catatonic mode, we don't wait for nobody */
-	if ( sbi->catatonic )
+	if (sbi->catatonic)
 		return -ENOENT;
 	
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
@@ -190,7 +189,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		/* Can't wait for an expire if there's no mount */
 		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
 			kfree(name);
@@ -200,7 +199,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
-		if ( !wq ) {
+		if (!wq) {
 			kfree(name);
 			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
@@ -240,14 +239,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 	/* wq->name is NULL if and only if the lock is already released */
 
-	if ( sbi->catatonic ) {
+	if (sbi->catatonic) {
 		/* We might have slept, so check again for catatonic mode */
 		wq->status = -ENOENT;
 		kfree(wq->name);
 		wq->name = NULL;
 	}
 
-	if ( wq->name ) {
+	if (wq->name) {
 		/* Block all but "shutdown" signals while waiting */
 		sigset_t oldset;
 		unsigned long irqflags;
@@ -283,12 +282,12 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
-	for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
-		if ( wq->wait_queue_token == wait_queue_token )
+	for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) {
+		if (wq->wait_queue_token == wait_queue_token)
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		mutex_unlock(&sbi->wq_mutex);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 90a59c7cf5dd68b41ffab61dd30eba1ef5746e66 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:50 -0800
Subject: [PATCH] autofs4: rename simple_empty_nolock function

Rename the function simple_empty_nolock to __simple_empty in line with kernel
naming conventions.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 2 +-
 fs/autofs4/root.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d82a019ff8ef..bc1b0543d2b6 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -201,7 +201,7 @@ static inline int simple_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
-static inline int simple_empty_nolock(struct dentry *dentry)
+static inline int __simple_empty(struct dentry *dentry)
 {
 	struct dentry *child;
 	int ret = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 72dca3335e25..dcd4802a5d5f 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -343,7 +343,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    simple_empty_nolock(dentry)) {
+	    __simple_empty(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From e3474a8eb38e48dea6690d1fabd75f3c7fd2f93f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:51 -0800
Subject: [PATCH] autofs4: change may_umount* functions to boolean

Change the functions may_umount and may_umount_tree to boolean functions to
aid code readability.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/dirhash.c | 2 +-
 fs/autofs4/expire.c | 2 +-
 fs/autofs4/root.c   | 2 +-
 fs/namespace.c      | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 5ccfcf26310d..3fded389d06b 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -92,7 +92,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			;
 		dput(dentry);
 
-		if ( may_umount(mnt) == 0 ) {
+		if ( may_umount(mnt) ) {
 			mntput(mnt);
 			DPRINTK(("autofs: signaling expire on %s\n", ent->name));
 			return ent; /* Expirable! */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 6ae2fc8233ff..02a218fbde5f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -64,7 +64,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		goto done;
 
 	/* Update the expiry counter if fs is busy */
-	if (may_umount_tree(mnt)) {
+	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
 		ino->last_used = jiffies;
 		goto done;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index dcd4802a5d5f..26eb1f024866 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -699,7 +699,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
 {
 	int status = 0;
 
-	if (may_umount(mnt) == 0)
+	if (may_umount(mnt))
 		status = 1;
 
 	DPRINTK("returning %d", status);
diff --git a/fs/namespace.c b/fs/namespace.c
index e069a4c5e389..bf478addb852 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -459,9 +459,9 @@ int may_umount_tree(struct vfsmount *mnt)
 	spin_unlock(&vfsmount_lock);
 
 	if (actual_refs > minimum_refs)
-		return -EBUSY;
+		return 0;
 
-	return 0;
+	return 1;
 }
 
 EXPORT_SYMBOL(may_umount_tree);
@@ -481,10 +481,10 @@ EXPORT_SYMBOL(may_umount_tree);
  */
 int may_umount(struct vfsmount *mnt)
 {
-	int ret = 0;
+	int ret = 1;
 	spin_lock(&vfsmount_lock);
 	if (propagate_mount_busy(mnt, 2))
-		ret = -EBUSY;
+		ret = 0;
 	spin_unlock(&vfsmount_lock);
 	return ret;
 }
-- 
cgit v1.2.3


From 051d381259eb57d6074d02a6ba6e90e744f1a29f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:53 -0800
Subject: [PATCH] autofs4: nameidata needs to be up to date for follow_link

In order to be able to trigger a mount using the follow_link inode method the
nameidata struct that is passed in needs to have the vfsmount of the autofs
trigger not its parent.

During a path walk if an autofs trigger is mounted on a dentry, when the
follow_link method is called, the nameidata struct contains the vfsmount and
mountpoint dentry of the parent mount while the dentry that is passed in is
the root of the autofs trigger mount.  I believe it is impossible to get the
vfsmount of the trigger mount, within the follow_link method, when only the
parent vfsmount and the root dentry of the trigger mount are known.

This patch updates the nameidata struct on entry to __do_follow_link if it
detects that it is out of date.  It moves the path_to_nameidata to above
__do_follow_link to facilitate calling it from there.  The dput_path is moved
as well as that seemed sensible.  No changes are made to these two functions.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 98dc2e134362..22f6e8d16aa8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -546,6 +546,22 @@ struct path {
 	struct dentry *dentry;
 };
 
+static inline void dput_path(struct path *path, struct nameidata *nd)
+{
+	dput(path->dentry);
+	if (path->mnt != nd->mnt)
+		mntput(path->mnt);
+}
+
+static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+{
+	dput(nd->dentry);
+	if (nd->mnt != path->mnt)
+		mntput(nd->mnt);
+	nd->mnt = path->mnt;
+	nd->dentry = path->dentry;
+}
+
 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int error;
@@ -555,8 +571,11 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	touch_atime(path->mnt, dentry);
 	nd_set_link(nd, NULL);
 
-	if (path->mnt == nd->mnt)
-		mntget(path->mnt);
+	if (path->mnt != nd->mnt) {
+		path_to_nameidata(path, nd);
+		dget(dentry);
+	}
+	mntget(path->mnt);
 	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(cookie);
 	if (!IS_ERR(cookie)) {
@@ -573,22 +592,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	return error;
 }
 
-static inline void dput_path(struct path *path, struct nameidata *nd)
-{
-	dput(path->dentry);
-	if (path->mnt != nd->mnt)
-		mntput(path->mnt);
-}
-
-static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
-{
-	dput(nd->dentry);
-	if (nd->mnt != path->mnt)
-		mntput(nd->mnt);
-	nd->mnt = path->mnt;
-	nd->dentry = path->dentry;
-}
-
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
-- 
cgit v1.2.3


From 34ca959cfc15cf09ad4da4f31ab034691e51af78 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:54 -0800
Subject: [PATCH] autofs4: add v5 follow_link mount trigger method

This patch adds a follow_link inode method for the root of an autofs direct
mount trigger.  It also adds the corresponding mount options and updates the
show_mount method.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  8 +++++++
 fs/autofs4/inode.c    | 52 +++++++++++++++++++++++++++++++++--------
 fs/autofs4/root.c     | 64 +++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 103 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index bc1b0543d2b6..ed388a1d8fc4 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -3,6 +3,7 @@
  * linux/fs/autofs/autofs_i.h
  *
  *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
+ *   Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -84,6 +85,10 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
+#define AUTOFS_TYP_INDIRECT     0x0001
+#define AUTOFS_TYP_DIRECT       0x0002
+#define AUTOFS_TYP_OFFSET       0x0004
+
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
@@ -96,6 +101,7 @@ struct autofs_sb_info {
 	int min_proto;
 	int max_proto;
 	unsigned long exp_timeout;
+	unsigned int type;
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
@@ -162,6 +168,8 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *,
 extern struct inode_operations autofs4_symlink_inode_operations;
 extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
+extern struct inode_operations autofs4_indirect_root_inode_operations;
+extern struct inode_operations autofs4_direct_root_inode_operations;
 extern struct file_operations autofs4_dir_operations;
 extern struct file_operations autofs4_root_operations;
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d9a71dab40fc..3801bed94e45 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -3,6 +3,7 @@
  * linux/fs/autofs/inode.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ *  Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -177,6 +178,13 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
+	if (sbi->type & AUTOFS_TYP_OFFSET)
+		seq_printf(m, ",offset");
+	else if (sbi->type & AUTOFS_TYP_DIRECT)
+		seq_printf(m, ",direct");
+	else
+		seq_printf(m, ",indirect");
+
 	return 0;
 }
 
@@ -186,7 +194,8 @@ static struct super_operations autofs4_sops = {
 	.show_options	= autofs4_show_options,
 };
 
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
+enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
+	Opt_indirect, Opt_direct, Opt_offset};
 
 static match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
@@ -195,11 +204,15 @@ static match_table_t tokens = {
 	{Opt_pgrp, "pgrp=%u"},
 	{Opt_minproto, "minproto=%u"},
 	{Opt_maxproto, "maxproto=%u"},
+	{Opt_indirect, "indirect"},
+	{Opt_direct, "direct"},
+	{Opt_offset, "offset"},
 	{Opt_err, NULL}
 };
 
 static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
-			 pid_t *pgrp, int *minproto, int *maxproto)
+			 pid_t *pgrp, unsigned int *type,
+			 int *minproto, int *maxproto)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -253,6 +266,15 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 				return 1;
 			*maxproto = option;
 			break;
+		case Opt_indirect:
+			*type = AUTOFS_TYP_INDIRECT;
+			break;
+		case Opt_direct:
+			*type = AUTOFS_TYP_DIRECT;
+			break;
+		case Opt_offset:
+			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			break;
 		default:
 			return 1;
 		}
@@ -271,6 +293,11 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
+void autofs4_dentry_release(struct dentry *);
+static struct dentry_operations autofs4_sb_dentry_operations = {
+	.d_release      = autofs4_dentry_release,
+};
+
 int autofs4_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode * root_inode;
@@ -297,6 +324,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->type = 0;
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -315,27 +343,31 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (!ino)
 		goto fail_free;
 	root_inode = autofs4_get_inode(s, ino);
-	kfree(ino);
 	if (!root_inode)
-		goto fail_free;
+		goto fail_ino;
 
-	root_inode->i_op = &autofs4_root_inode_operations;
-	root_inode->i_fop = &autofs4_root_operations;
 	root = d_alloc_root(root_inode);
-	pipe = NULL;
-
 	if (!root)
 		goto fail_iput;
+	pipe = NULL;
+
+	root->d_op = &autofs4_sb_dentry_operations;
+	root->d_fsdata = ino;
 
 	/* Can this call block? */
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
-			  &sbi->oz_pgrp,
+			  &sbi->oz_pgrp, &sbi->type,
 			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
+	root_inode->i_fop = &autofs4_root_operations;
+	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+			&autofs4_direct_root_inode_operations :
+			&autofs4_indirect_root_inode_operations;
+
 	/* Couldn't this be tested earlier? */
 	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
 	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
@@ -391,6 +423,8 @@ fail_dput:
 fail_iput:
 	printk("autofs: get root dentry failed\n");
 	iput(root_inode);
+fail_ino:
+	kfree(ino);
 fail_free:
 	kfree(sbi);
 fail_unlock:
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 26eb1f024866..3f0048582248 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -30,6 +30,7 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
+static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -46,7 +47,7 @@ struct file_operations autofs4_dir_operations = {
 	.readdir	= autofs4_dir_readdir,
 };
 
-struct inode_operations autofs4_root_inode_operations = {
+struct inode_operations autofs4_indirect_root_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
 	.symlink	= autofs4_dir_symlink,
@@ -54,6 +55,11 @@ struct inode_operations autofs4_root_inode_operations = {
 	.rmdir		= autofs4_dir_rmdir,
 };
 
+struct inode_operations autofs4_direct_root_inode_operations = {
+	.lookup		= autofs4_lookup,
+	.follow_link	= autofs4_follow_link,
+};
+
 struct inode_operations autofs4_dir_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
@@ -252,7 +258,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		 */
 		status = d_invalidate(dentry);
 		if (status != -EBUSY)
-			return 0;
+			return -ENOENT;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
@@ -271,17 +277,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
-			return 0; /* Try to get the kernel to invalidate this dentry */
+			return status; /* Try to get the kernel to invalidate this dentry */
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 0;
+			return status;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -300,7 +306,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		}
 	}
 
@@ -311,7 +317,41 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-	return 1;
+	return status;
+}
+
+/* For autofs direct mounts the follow link triggers the mount */
+static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	int oz_mode = autofs4_oz_mode(sbi);
+	unsigned int lookup_type;
+	int status;
+
+	DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
+		dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
+		nd->flags);
+
+	/* If it's our master or we shouldn't trigger a mount we're done */
+	lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
+	if (oz_mode || !lookup_type)
+		goto done;
+
+	status = try_to_fill_dentry(dentry, 0);
+	if (status)
+		goto out_error;
+
+	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+		status = -ENOENT;
+		goto out_error;
+	}
+
+done:
+	return NULL;
+
+out_error:
+	path_release(nd);
+	return ERR_PTR(status);
 }
 
 /*
@@ -326,13 +366,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
-	int status = 1;
+	int status = 0;
 
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 
 	/* Negative dentry.. invalidate if "old" */
@@ -349,14 +389,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 	spin_unlock(&dcache_lock);
 
 	return 1;
 }
 
-static void autofs4_dentry_release(struct dentry *de)
+void autofs4_dentry_release(struct dentry *de)
 {
 	struct autofs_info *inf;
 
-- 
cgit v1.2.3


From 3a15e2ab5d6e79a79291734ac24f33d51c0ae389 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add v5 expire logic

This patch adds expire logic for autofs direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 02a218fbde5f..8fd92eaf936d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -99,6 +99,39 @@ static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
 	return list_entry(next, struct dentry, d_u.d_child);
 }
 
+/*
+ * Check a direct mount point for busyness.
+ * Direct mounts have similar expiry semantics to tree mounts.
+ * The tree is not busy iff no mountpoints are busy and there are no
+ * autofs submounts.
+ */
+static int autofs4_direct_busy(struct vfsmount *mnt,
+				struct dentry *top,
+				unsigned long timeout,
+				int do_now)
+{
+	DPRINTK("top %p %.*s",
+		top, (int) top->d_name.len, top->d_name.name);
+
+	/* Not a mountpoint - give up */
+	if (!d_mountpoint(top))
+		return 1;
+
+	/* If it's busy update the expiry counters */
+	if (!may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		if (ino)
+			ino->last_used = jiffies;
+		return 1;
+	}
+
+	/* Timeout of a direct mount is determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
+	return 0;
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -208,16 +241,48 @@ cont:
 	return NULL;
 }
 
+/* Check if we can expire a direct mount (possibly a tree) */
+static struct dentry *autofs4_expire_direct(struct super_block *sb,
+					    struct vfsmount *mnt,
+					    struct autofs_sb_info *sbi,
+					    int how)
+{
+	unsigned long timeout;
+	struct dentry *root = dget(sb->s_root);
+	int do_now = how & AUTOFS_EXP_IMMEDIATE;
+
+	if (!sbi->exp_timeout || !root)
+		return NULL;
+
+	now = jiffies;
+	timeout = sbi->exp_timeout;
+
+	/* Lock the tree as we must expire as a whole */
+	spin_lock(&sbi->fs_lock);
+	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+		struct autofs_info *ino = autofs4_dentry_ino(root);
+
+		/* Set this flag early to catch sys_chdir and the like */
+		ino->flags |= AUTOFS_INF_EXPIRING;
+		spin_unlock(&sbi->fs_lock);
+		return root;
+	}
+	spin_unlock(&sbi->fs_lock);
+	dput(root);
+
+	return NULL;
+}
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
  */
-static struct dentry *autofs4_expire(struct super_block *sb,
-				     struct vfsmount *mnt,
-				     struct autofs_sb_info *sbi,
-				     int how)
+static struct dentry *autofs4_expire_indirect(struct super_block *sb,
+					      struct vfsmount *mnt,
+					      struct autofs_sb_info *sbi,
+					      int how)
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
@@ -249,7 +314,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		dentry = dget(dentry);
 		spin_unlock(&dcache_lock);
 
-		/* Case 1: indirect mount or top level direct mount */
+		/*
+		 * Case 1: (i) indirect mount or top level pseudo direct mount
+		 *	   (autofs-4.1).
+		 *	   (ii) indirect mount with offset mount, check the "/"
+		 *	   offset (autofs-5.0+).
+		 */
 		if (d_mountpoint(dentry)) {
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -283,7 +353,10 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				break;
 			}
 			spin_unlock(&sbi->fs_lock);
-		/* Case 3: direct mount, expire individual leaves */
+		/*
+		 * Case 3: pseudo direct mount, expire individual leaves
+		 *	   (autofs-4.1).
+		 */
 		} else {
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
 			if (expired) {
@@ -325,7 +398,7 @@ int autofs4_expire_run(struct super_block *sb,
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = autofs_ptype_expire;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, 0)) == NULL)
+	if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
 		return -EAGAIN;
 
 	pkt.len = dentry->d_name.len;
@@ -351,7 +424,12 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
+	if (sbi->type & AUTOFS_TYP_DIRECT)
+		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
+	else
+		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
+
+	if (dentry) {
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
-- 
cgit v1.2.3


From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add new packet type for v5 communications

This patch define a new autofs packet for autofs v5 and updates the waitq.c
functions to handle the additional packet type.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h    | 23 +++++++++----
 fs/autofs4/waitq.c       | 86 +++++++++++++++++++++++++++++++++++++++++-------
 include/linux/auto_fs4.h | 51 +++++++++++++++++++++++++---
 3 files changed, 136 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ed388a1d8fc4..37c8d909d1e9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -77,6 +77,12 @@ struct autofs_wait_queue {
 	int hash;
 	int len;
 	char *name;
+	u32 dev;
+	u64 ino;
+	uid_t uid;
+	gid_t gid;
+	pid_t pid;
+	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
 	atomic_t notified;
@@ -180,13 +186,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info
 
 /* Queue management functions */
 
-enum autofs_notify
-{
-	NFY_NONE,
-	NFY_MOUNT,
-	NFY_EXPIRE
-};
-
 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
@@ -204,6 +203,16 @@ static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **de
 	return res;
 }
 
+static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
+{
+	return new_encode_dev(sbi->sb->s_dev);
+}
+
+static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
+{
+	return sbi->sb->s_root->d_inode->i_ino;
+}
+
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index b0bb9d43bcd9..12da2c977b0a 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -3,7 +3,7 @@
  * linux/fs/autofs/waitq.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -97,7 +97,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = type;
-	if (type == autofs_ptype_missing) {
+	switch (type) {
+	/* Kernel protocol v4 missing and expire packets */
+	case autofs_ptype_missing:
+	{
 		struct autofs_packet_missing *mp = &pkt.missing;
 
 		pktsz = sizeof(*mp);
@@ -106,7 +109,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		mp->len = wq->len;
 		memcpy(mp->name, wq->name, wq->len);
 		mp->name[wq->len] = '\0';
-	} else if (type == autofs_ptype_expire_multi) {
+		break;
+	}
+	case autofs_ptype_expire_multi:
+	{
 		struct autofs_packet_expire_multi *ep = &pkt.expire_multi;
 
 		pktsz = sizeof(*ep);
@@ -115,7 +121,34 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		ep->len = wq->len;
 		memcpy(ep->name, wq->name, wq->len);
 		ep->name[wq->len] = '\0';
-	} else {
+		break;
+	}
+	/*
+	 * Kernel protocol v5 packet for handling indirect and direct
+	 * mount missing and expire requests
+	 */
+	case autofs_ptype_missing_indirect:
+	case autofs_ptype_expire_indirect:
+	case autofs_ptype_missing_direct:
+	case autofs_ptype_expire_direct:
+	{
+		struct autofs_v5_packet *packet = &pkt.v5_packet;
+
+		pktsz = sizeof(*packet);
+
+		packet->wait_queue_token = wq->wait_queue_token;
+		packet->len = wq->len;
+		memcpy(packet->name, wq->name, wq->len);
+		packet->name[wq->len] = '\0';
+		packet->dev = wq->dev;
+		packet->ino = wq->ino;
+		packet->uid = wq->uid;
+		packet->gid = wq->gid;
+		packet->pid = wq->pid;
+		packet->tgid = wq->tgid;
+		break;
+	}
+	default:
 		printk("autofs4_notify_daemon: bad type %d!\n", type);
 		return;
 	}
@@ -161,7 +194,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 {
 	struct autofs_wait_queue *wq;
 	char *name;
-	int len, status;
+	unsigned int len = 0;
+	unsigned int hash = 0;
+	int status;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
@@ -171,11 +206,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (!name)
 		return -ENOMEM;
 
-	len = autofs4_getpath(sbi, dentry, &name);
-	if (!len) {
-		kfree(name);
-		return -ENOENT;
+	/* If this is a direct mount request create a dummy name */
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+		len = sprintf(name, "%p", dentry);
+	else {
+		len = autofs4_getpath(sbi, dentry, &name);
+		if (!len) {
+			kfree(name);
+			return -ENOENT;
+		}
 	}
+	hash = full_name_hash(name, len);
 
 	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
 		kfree(name);
@@ -211,9 +252,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->next = sbi->queues;
 		sbi->queues = wq;
 		init_waitqueue_head(&wq->queue);
-		wq->hash = dentry->d_name.hash;
+		wq->hash = hash;
 		wq->name = name;
 		wq->len = len;
+		wq->dev = autofs4_get_dev(sbi);
+		wq->ino = autofs4_get_ino(sbi);
+		wq->uid = current->uid;
+		wq->gid = current->gid;
+		wq->pid = current->pid;
+		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
 		atomic_set(&wq->notified, 1);
@@ -227,8 +274,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	}
 
 	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
-		int type = (notify == NFY_MOUNT ?
-			autofs_ptype_missing : autofs_ptype_expire_multi);
+		int type;
+
+		if (sbi->version < 5) {
+			if (notify == NFY_MOUNT)
+				type = autofs_ptype_missing;
+			else
+				type = autofs_ptype_expire_multi;
+		} else {
+			if (notify == NFY_MOUNT)
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_missing_direct :
+					 autofs_ptype_missing_indirect;
+			else
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_expire_direct :
+					autofs_ptype_expire_indirect;
+		}
 
 		DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index d998ddcf7288..0a6bc52ffe88 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -19,18 +19,37 @@
 #undef AUTOFS_MIN_PROTO_VERSION
 #undef AUTOFS_MAX_PROTO_VERSION
 
-#define AUTOFS_PROTO_VERSION		4
+#define AUTOFS_PROTO_VERSION		5
 #define AUTOFS_MIN_PROTO_VERSION	3
-#define AUTOFS_MAX_PROTO_VERSION	4
+#define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		10
+#define AUTOFS_PROTO_SUBVERSION		0
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
 #define AUTOFS_EXP_LEAVES		2
 
-/* New message type */
-#define autofs_ptype_expire_multi	2	/* Expire entry (umount request) */
+/* Daemon notification packet types */
+enum autofs_notify {
+	NFY_NONE,
+	NFY_MOUNT,
+	NFY_EXPIRE
+};
+
+/* Kernel protocol version 4 packet types */
+
+/* Expire entry (umount request) */
+#define autofs_ptype_expire_multi	2
+
+/* Kernel protocol version 5 packet types */
+
+/* Indirect mount missing and expire requests. */
+#define autofs_ptype_missing_indirect	3
+#define autofs_ptype_expire_indirect	4
+
+/* Direct mount missing and expire requests */
+#define autofs_ptype_missing_direct	5
+#define autofs_ptype_expire_direct	6
 
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
@@ -40,14 +59,36 @@ struct autofs_packet_expire_multi {
 	char name[NAME_MAX+1];
 };
 
+/* autofs v5 common packet struct */
+struct autofs_v5_packet {
+	struct autofs_packet_hdr hdr;
+	autofs_wqt_t wait_queue_token;
+	__u32 dev;
+	__u64 ino;
+	__u32 uid;
+	__u32 gid;
+	__u32 pid;
+	__u32 tgid;
+	__u32 len;
+	char name[NAME_MAX+1];
+};
+
+typedef struct autofs_v5_packet autofs_packet_missing_indirect_t;
+typedef struct autofs_v5_packet autofs_packet_expire_indirect_t;
+typedef struct autofs_v5_packet autofs_packet_missing_direct_t;
+typedef struct autofs_v5_packet autofs_packet_expire_direct_t;
+
 union autofs_packet_union {
 	struct autofs_packet_hdr hdr;
 	struct autofs_packet_missing missing;
 	struct autofs_packet_expire expire;
 	struct autofs_packet_expire_multi expire_multi;
+	struct autofs_v5_packet v5_packet;
 };
 
 #define AUTOFS_IOC_EXPIRE_MULTI		_IOW(0x93,0x66,int)
+#define AUTOFS_IOC_EXPIRE_INDIRECT	AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_EXPIRE_DIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93,0x67,int)
 #define AUTOFS_IOC_ASKREGHOST           _IOR(0x93,0x68,int)
 #define AUTOFS_IOC_TOGGLEREGHOST        _IOR(0x93,0x69,int)
-- 
cgit v1.2.3


From 44d53eb041d901620b1090590a549a705767fd10 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:56 -0800
Subject: [PATCH] autofs4: change AUTOFS_TYP_* AUTOFS_TYPE_*

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  6 +++---
 fs/autofs4/expire.c   |  2 +-
 fs/autofs4/inode.c    | 12 ++++++------
 fs/autofs4/waitq.c    |  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 37c8d909d1e9..ff6239d57b4b 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -91,9 +91,9 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
-#define AUTOFS_TYP_INDIRECT     0x0001
-#define AUTOFS_TYP_DIRECT       0x0002
-#define AUTOFS_TYP_OFFSET       0x0004
+#define AUTOFS_TYPE_INDIRECT     0x0001
+#define AUTOFS_TYPE_DIRECT       0x0002
+#define AUTOFS_TYPE_OFFSET       0x0004
 
 struct autofs_sb_info {
 	u32 magic;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8fd92eaf936d..08e33218a64a 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -424,7 +424,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYP_DIRECT)
+	if (sbi->type & AUTOFS_TYPE_DIRECT)
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3801bed94e45..943888905493 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -178,9 +178,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
-	if (sbi->type & AUTOFS_TYP_OFFSET)
+	if (sbi->type & AUTOFS_TYPE_OFFSET)
 		seq_printf(m, ",offset");
-	else if (sbi->type & AUTOFS_TYP_DIRECT)
+	else if (sbi->type & AUTOFS_TYPE_DIRECT)
 		seq_printf(m, ",direct");
 	else
 		seq_printf(m, ",indirect");
@@ -267,13 +267,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 			*maxproto = option;
 			break;
 		case Opt_indirect:
-			*type = AUTOFS_TYP_INDIRECT;
+			*type = AUTOFS_TYPE_INDIRECT;
 			break;
 		case Opt_direct:
-			*type = AUTOFS_TYP_DIRECT;
+			*type = AUTOFS_TYPE_DIRECT;
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			*type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET;
 			break;
 		default:
 			return 1;
@@ -364,7 +364,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+	root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 12da2c977b0a..894d74671bd0 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -207,7 +207,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
 		len = sprintf(name, "%p", dentry);
 	else {
 		len = autofs4_getpath(sbi, dentry, &name);
@@ -283,11 +283,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
-- 
cgit v1.2.3


From 3370c74b2e147169d5bba6665e03d3281f5795ed Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 27 Mar 2006 01:14:57 -0800
Subject: [PATCH] Remove redundant check from autofs4_put_super

We have to have a valid sbi here, or we'd have oopsed already.  (There's a
dereference of sbi->catatonic a few lines above)

Coverity #740

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 943888905493..4eddee4e76fc 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -157,8 +157,7 @@ static void autofs4_put_super(struct super_block *sb)
 		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
 	/* Clean up and release dangling references */
-	if (sbi)
-		autofs4_force_release(sbi);
+	autofs4_force_release(sbi);
 
 	kfree(sbi);
 
-- 
cgit v1.2.3


From 871f94344cea36b2ce91231f442f9f9298529712 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:58 -0800
Subject: [PATCH] autofs4: follow_link missing functionality

This functionality is also need for operation of autofs v5 direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c |  4 ----
 fs/autofs4/root.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 08e33218a64a..b8ce02607d66 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -113,10 +113,6 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
 	DPRINTK("top %p %.*s",
 		top, (int) top->d_name.len, top->d_name.name);
 
-	/* Not a mountpoint - give up */
-	if (!d_mountpoint(top))
-		return 1;
-
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3f0048582248..c8fe43a475e2 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -57,6 +57,9 @@ struct inode_operations autofs4_indirect_root_inode_operations = {
 
 struct inode_operations autofs4_direct_root_inode_operations = {
 	.lookup		= autofs4_lookup,
+	.unlink		= autofs4_dir_unlink,
+	.mkdir		= autofs4_dir_mkdir,
+	.rmdir		= autofs4_dir_rmdir,
 	.follow_link	= autofs4_follow_link,
 };
 
@@ -337,15 +340,50 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (oz_mode || !lookup_type)
 		goto done;
 
-	status = try_to_fill_dentry(dentry, 0);
-	if (status)
-		goto out_error;
+	/*
+	 * If a request is pending wait for it.
+	 * If it's a mount then it won't be expired till at least
+	 * a liitle later and if it's an expire then we might need
+	 * to mount it again.
+	 */
+	if (autofs4_ispending(dentry)) {
+		DPRINTK("waiting for active request %p name=%.*s",
+			dentry, dentry->d_name.len, dentry->d_name.name);
 
-	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
-		status = -ENOENT;
-		goto out_error;
+		status = autofs4_wait(sbi, dentry, NFY_NONE);
+
+		DPRINTK("request done status=%d", status);
 	}
 
+	/*
+	 * If the dentry contains directories then it is an
+	 * autofs multi-mount with no root mount offset. So
+	 * don't try to mount it again.
+	 */
+	spin_lock(&dcache_lock);
+	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+
+		status = try_to_fill_dentry(dentry, 0);
+		if (status)
+			goto out_error;
+
+		/*
+		 * The mount succeeded but if there is no root mount
+		 * it must be an autofs multi-mount with no root offset
+		 * so we don't need to follow the mount.
+		 */
+		if (d_mountpoint(dentry)) {
+			if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+				status = -ENOENT;
+				goto out_error;
+			}
+		}
+
+		goto done;
+	}
+	spin_unlock(&dcache_lock);
+
 done:
 	return NULL;
 
-- 
cgit v1.2.3


From 3e7b19198003fc25b11838e709f17d4fa173b2d7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] autofs4: atomic var underflow

Fix accidental underflow of the atomic counter.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 2 +-
 fs/autofs4/waitq.c    | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ff6239d57b4b..617fd7b37447 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -85,7 +85,7 @@ struct autofs_wait_queue {
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
-	atomic_t notified;
+	atomic_t notify;
 	atomic_t wait_ctr;
 };
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 894d74671bd0..142ab6aa2aa1 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -263,7 +263,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
-		atomic_set(&wq->notified, 1);
+		atomic_set(&wq->notify, 1);
 		mutex_unlock(&sbi->wq_mutex);
 	} else {
 		atomic_inc(&wq->wait_ctr);
@@ -273,9 +273,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
 	}
 
-	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
+	if (notify != NFY_NONE && atomic_read(&wq->notify)) {
 		int type;
 
+		atomic_dec(&wq->notify);
+
 		if (sbi->version < 5) {
 			if (notify == NFY_MOUNT)
 				type = autofs_ptype_missing;
-- 
cgit v1.2.3


From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache'

The 'auth_domain's are simply handles on internal data structures.  They do
not cache information from user-space, and forcing them into the mold of a
'cache' misrepresents their true nature and causes confusion.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c                  |   5 +-
 include/linux/sunrpc/svcauth.h    |  12 ++--
 net/sunrpc/auth_gss/svcauth_gss.c |  14 ++---
 net/sunrpc/sunrpc_syms.c          |   4 +-
 net/sunrpc/svcauth.c              | 122 +++++++++++---------------------------
 net/sunrpc/svcauth_unix.c         |  69 ++++++++++-----------
 6 files changed, 81 insertions(+), 145 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 417ec02df44f..ac0997731fce 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -242,7 +242,7 @@ static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
 
 static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_client->h);
+	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
 	new->ek_fsid[0] = item->ek_fsid[0];
@@ -474,7 +474,7 @@ static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
 }
 static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
 {
-	cache_get(&item->ex_client->h);
+	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
@@ -1129,7 +1129,6 @@ exp_delclient(struct nfsctl_client *ncp)
 	 */
 	if (dom) {
 		err = auth_unix_forget_old(dom);
-		dom->h.expiry_time = get_seconds();
 		auth_domain_put(dom);
 	}
 
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index c119ce7cbd22..2fe2087edd66 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -45,9 +45,10 @@ struct svc_rqst;		/* forward decl */
  * of ip addresses to the given client.
  */
 struct auth_domain {
-	struct	cache_head	h;
+	struct kref		ref;
+	struct hlist_node	hash;
 	char			*name;
-	int			flavour;
+	struct auth_ops		*flavour;
 };
 
 /*
@@ -86,6 +87,9 @@ struct auth_domain {
  *
  * domain_release()
  *   This call releases a domain.
+ * set_client()
+ *   Givens a pending request (struct svc_rqst), finds and assigns
+ *   an appropriate 'auth_domain' as the client.
  */
 struct auth_ops {
 	char *	name;
@@ -117,7 +121,7 @@ extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
 extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom);
-extern struct auth_domain *auth_domain_lookup(struct auth_domain *item, int set);
+extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
 extern struct auth_domain *auth_domain_find(char *name);
 extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
@@ -160,8 +164,6 @@ static inline unsigned long hash_mem(char *buf, int length, int bits)
 	return hash >> (BITS_PER_LONG - bits);
 }
 
-extern struct cache_detail auth_domain_cache, ip_map_cache;
-
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 23632d84d8d7..6b073c2e6930 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -645,6 +645,8 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
 	return auth_domain_find(name);
 }
 
+static struct auth_ops svcauthops_gss;
+
 int
 svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
 {
@@ -655,20 +657,18 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		goto out;
-	cache_init(&new->h.h);
+	kref_init(&new->h.ref);
 	new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
 	if (!new->h.name)
 		goto out_free_dom;
 	strcpy(new->h.name, name);
-	new->h.flavour = RPC_AUTH_GSS;
+	new->h.flavour = &svcauthops_gss;
 	new->pseudoflavor = pseudoflavor;
-	new->h.h.expiry_time = NEVER;
 
-	test = auth_domain_lookup(&new->h, 1);
-	if (test == &new->h) {
-		BUG_ON(atomic_dec_and_test(&new->h.h.refcnt));
-	} else { /* XXX Duplicate registration? */
+	test = auth_domain_lookup(name, &new->h);
+	if (test != &new->h) { /* XXX Duplicate registration? */
 		auth_domain_put(&new->h);
+		/* dangling ref-count... */
 		goto out;
 	}
 	return 0;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 9f7373203592..40401196e7de 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -142,6 +142,7 @@ EXPORT_SYMBOL(nlm_debug);
 
 extern int register_rpc_pipefs(void);
 extern void unregister_rpc_pipefs(void);
+extern struct cache_detail ip_map_cache;
 
 static int __init
 init_sunrpc(void)
@@ -158,7 +159,6 @@ init_sunrpc(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_init();
 #endif
-	cache_register(&auth_domain_cache);
 	cache_register(&ip_map_cache);
 out:
 	return err;
@@ -169,8 +169,6 @@ cleanup_sunrpc(void)
 {
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
-	if (cache_unregister(&auth_domain_cache))
-		printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n");
 	if (cache_unregister(&ip_map_cache))
 		printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n");
 #ifdef RPC_DEBUG
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index dda4f0c63511..5b28c6176806 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -106,112 +106,56 @@ svc_auth_unregister(rpc_authflavor_t flavor)
 EXPORT_SYMBOL(svc_auth_unregister);
 
 /**************************************************
- * cache for domain name to auth_domain
- * Entries are only added by flavours which will normally
- * have a structure that 'inherits' from auth_domain.
- * e.g. when an IP -> domainname is given to  auth_unix,
- * and the domain name doesn't exist, it will create a
- * auth_unix_domain and add it to this hash table.
- * If it finds the name does exist, but isn't AUTH_UNIX,
- * it will complain.
+ * 'auth_domains' are stored in a hash table indexed by name.
+ * When the last reference to an 'auth_domain' is dropped,
+ * the object is unhashed and freed.
+ * If auth_domain_lookup fails to find an entry, it will return
+ * it's second argument 'new'.  If this is non-null, it will
+ * have been atomically linked into the table.
  */
 
-/*
- * Auth auth_domain cache is somewhat different to other caches,
- * largely because the entries are possibly of different types:
- * each auth flavour has it's own type.
- * One consequence of this that DefineCacheLookup cannot
- * allocate a new structure as it cannot know the size.
- * Notice that the "INIT" code fragment is quite different
- * from other caches.  When auth_domain_lookup might be
- * creating a new domain, the new domain is passed in
- * complete and it is used as-is rather than being copied into
- * another structure.
- */
 #define	DN_HASHBITS	6
 #define	DN_HASHMAX	(1<<DN_HASHBITS)
 #define	DN_HASHMASK	(DN_HASHMAX-1)
 
-static struct cache_head	*auth_domain_table[DN_HASHMAX];
-
-static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd)
-{
-	struct auth_domain *dom = container_of(item, struct auth_domain, h);
-	if (cache_put(item,cd))
-		authtab[dom->flavour]->domain_release(dom);
-}
-
-
-struct cache_detail auth_domain_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= DN_HASHMAX,
-	.hash_table	= auth_domain_table,
-	.name		= "auth.domain",
-	.cache_put	= auth_domain_drop,
-};
+static struct hlist_head	auth_domain_table[DN_HASHMAX];
+static spinlock_t	auth_domain_lock = SPIN_LOCK_UNLOCKED;
 
 void auth_domain_put(struct auth_domain *dom)
 {
-	auth_domain_drop(&dom->h, &auth_domain_cache);
-}
-
-static inline int auth_domain_hash(struct auth_domain *item)
-{
-	return hash_str(item->name, DN_HASHBITS);
-}
-static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item)
-{
-	return strcmp(tmp->name, item->name) == 0;
+	if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
+		hlist_del(&dom->hash);
+		dom->flavour->domain_release(dom);
+	}
 }
 
 struct auth_domain *
-auth_domain_lookup(struct auth_domain *item, int set)
+auth_domain_lookup(char *name, struct auth_domain *new)
 {
-	struct auth_domain *tmp = NULL;
-	struct cache_head **hp, **head;
-	head = &auth_domain_cache.hash_table[auth_domain_hash(item)];
-
-	if (set)
-		write_lock(&auth_domain_cache.hash_lock);
-	else
-		read_lock(&auth_domain_cache.hash_lock);
-	for (hp=head; *hp != NULL; hp = &tmp->h.next) {
-		tmp = container_of(*hp, struct auth_domain, h);
-		if (!auth_domain_match(tmp, item))
-			continue;
-		if (!set) {
-			cache_get(&tmp->h);
-			goto out_noset;
+	struct auth_domain *hp;
+	struct hlist_head *head;
+	struct hlist_node *np;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	spin_lock(&auth_domain_lock);
+
+	hlist_for_each_entry(hp, np, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			kref_get(&hp->ref);
+			spin_unlock(&auth_domain_lock);
+			return hp;
 		}
-		*hp = tmp->h.next;
-		tmp->h.next = NULL;
-		auth_domain_drop(&tmp->h, &auth_domain_cache);
-		goto out_set;
 	}
-	/* Didn't find anything */
-	if (!set)
-		goto out_nada;
-	auth_domain_cache.entries++;
-out_set:
-	item->h.next = *head;
-	*head = &item->h;
-	cache_get(&item->h);
-	write_unlock(&auth_domain_cache.hash_lock);
-	cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time);
-	cache_get(&item->h);
-	return item;
-out_nada:
-	tmp = NULL;
-out_noset:
-	read_unlock(&auth_domain_cache.hash_lock);
-	return tmp;
+	if (new) {
+		hlist_add_head(&new->hash, head);
+		kref_get(&new->ref);
+	}
+	spin_unlock(&auth_domain_lock);
+	return new;
 }
 
 struct auth_domain *auth_domain_find(char *name)
 {
-	struct auth_domain *rv, ad;
-
-	ad.name = name;
-	rv = auth_domain_lookup(&ad, 0);
-	return rv;
+	return auth_domain_lookup(name, NULL);
 }
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3e6c694bbad1..17e8b2a3130c 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -27,41 +27,35 @@ struct unix_domain {
 	/* other stuff later */
 };
 
+extern struct auth_ops svcauth_unix;
+
 struct auth_domain *unix_domain_find(char *name)
 {
-	struct auth_domain *rv, ud;
-	struct unix_domain *new;
-
-	ud.name = name;
-	
-	rv = auth_domain_lookup(&ud, 0);
-
- foundit:
-	if (rv && rv->flavour != RPC_AUTH_UNIX) {
-		auth_domain_put(rv);
-		return NULL;
-	}
-	if (rv)
-		return rv;
-
-	new = kmalloc(sizeof(*new), GFP_KERNEL);
-	if (new == NULL)
-		return NULL;
-	cache_init(&new->h.h);
-	new->h.name = kstrdup(name, GFP_KERNEL);
-	new->h.flavour = RPC_AUTH_UNIX;
-	new->addr_changes = 0;
-	new->h.h.expiry_time = NEVER;
-
-	rv = auth_domain_lookup(&new->h, 2);
-	if (rv == &new->h) {
-		if (atomic_dec_and_test(&new->h.h.refcnt)) BUG();
-	} else {
-		auth_domain_put(&new->h);
-		goto foundit;
+	struct auth_domain *rv;
+	struct unix_domain *new = NULL;
+
+	rv = auth_domain_lookup(name, NULL);
+	while(1) {
+		if (rv != &new->h) {
+			if (new) auth_domain_put(&new->h);
+			return rv;
+		}
+		if (rv && rv->flavour != &svcauth_unix) {
+			auth_domain_put(rv);
+			return NULL;
+		}
+		if (rv)
+			return rv;
+
+		new = kmalloc(sizeof(*new), GFP_KERNEL);
+		if (new == NULL)
+			return NULL;
+		kref_init(&new->h.ref);
+		new->h.name = kstrdup(name, GFP_KERNEL);
+		new->h.flavour = &svcauth_unix;
+		new->addr_changes = 0;
+		rv = auth_domain_lookup(name, &new->h);
 	}
-
-	return rv;
 }
 
 static void svcauth_unix_domain_release(struct auth_domain *dom)
@@ -130,7 +124,7 @@ static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
 }
 static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
 {
-	cache_get(&item->m_client->h.h);
+	kref_get(&item->m_client->h.ref);
 	new->m_client = item->m_client;
 	new->m_add_change = item->m_add_change;
 }
@@ -272,7 +266,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
 	struct unix_domain *udom;
 	struct ip_map ip, *ipmp;
 
-	if (dom->flavour != RPC_AUTH_UNIX)
+	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
 	strcpy(ip.m_class, "nfsd");
@@ -295,7 +289,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
 {
 	struct unix_domain *udom;
 	
-	if (dom->flavour != RPC_AUTH_UNIX)
+	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
 	udom->addr_changes++;
@@ -323,7 +317,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 		rv = NULL;
 	} else {
 		rv = &ipm->m_client->h;
-		cache_get(&rv->h);
+		kref_get(&rv->ref);
 	}
 	ip_map_put(&ipm->h, &ip_map_cache);
 	return rv;
@@ -332,7 +326,6 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 void svcauth_unix_purge(void)
 {
 	cache_purge(&ip_map_cache);
-	cache_purge(&auth_domain_cache);
 }
 
 static int
@@ -361,7 +354,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 			return SVC_DENIED;
 		case 0:
 			rqstp->rq_client = &ipm->m_client->h;
-			cache_get(&rqstp->rq_client->h);
+			kref_get(&rqstp->rq_client->ref);
 			ip_map_put(&ipm->h, &ip_map_cache);
 			break;
 	}
-- 
cgit v1.2.3


From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:00 -0800
Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export

Current svc_expkey holds a pointer to the svc_export structure, so updates to
that structure have to be in-place, which is a wart on the whole cache
infrastruct.  So we break that linkage and just do a second lookup.

If this became a performance issue, it would be possible to put a direct link
back in which was only used conditionally.  i.e.  when an object is replaced
in the cache, we set a flag in the old object.  When dereferencing the link
from svc_expkey, if the flag is set, we drop the reference and do a fresh
lookup.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c            | 60 ++++++++++++++++++++++++++++++---------------
 include/linux/nfsd/export.h | 20 +++------------
 2 files changed, 44 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ac0997731fce..587829ed651c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -73,8 +73,10 @@ void expkey_put(struct cache_head *item, struct cache_detail *cd)
 	if (cache_put(item, cd)) {
 		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
 		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags))
-			exp_put(key->ek_export);
+		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
+			dput(key->ek_dentry);
+			mntput(key->ek_mnt);
+		}
 		auth_domain_put(key->ek_client);
 		kfree(key);
 	}
@@ -164,26 +166,18 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	} else {
 		struct nameidata nd;
 		struct svc_expkey *ek;
-		struct svc_export *exp;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
 
 		dprintk("Found the path %s\n", buf);
-		exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
-
-		err = -ENOENT;
-		if (!exp)
-			goto out_nd;
-		key.ek_export = exp;
-		dprintk("And found export\n");
+		key.ek_mnt = nd.mnt;
+		key.ek_dentry = nd.dentry;
 		
 		ek = svc_expkey_lookup(&key, 1);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		exp_put(exp);
 		err = 0;
-	out_nd:
 		path_release(&nd);
 	}
 	cache_flush();
@@ -214,7 +208,7 @@ static int expkey_show(struct seq_file *m,
 	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		seq_printf(m, " ");
-		seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n");
+		seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n");
 	}
 	seq_printf(m, "\n");
 	return 0;
@@ -252,8 +246,8 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 
 static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_export->h);
-	new->ek_export = item->ek_export;
+	new->ek_mnt = mntget(item->ek_mnt);
+	new->ek_dentry = dget(item->ek_dentry);
 }
 
 static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
@@ -519,7 +513,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
-	key.ek_export = exp;
+	key.ek_mnt = exp->ex_mnt;
+	key.ek_dentry = exp->ex_dentry;
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
@@ -741,8 +736,8 @@ exp_export(struct nfsctl_export *nxp)
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
 	    !IS_ERR(fsid_key) &&
-	    fsid_key->ek_export &&
-	    fsid_key->ek_export != exp)
+	    fsid_key->ek_mnt &&
+	    (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) )
 		goto finish;
 
 	if (exp) {
@@ -912,6 +907,24 @@ out:
 	return err;
 }
 
+struct svc_export *
+exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+	 struct cache_req *reqp)
+{
+	struct svc_export *exp;
+	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
+	if (!ek || IS_ERR(ek))
+		return ERR_PTR(PTR_ERR(ek));
+
+	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
+	expkey_put(&ek->h, &svc_expkey_cache);
+
+	if (!exp || IS_ERR(exp))
+		return ERR_PTR(PTR_ERR(exp));
+	return exp;
+}
+
+
 /*
  * Called when we need the filehandle for the root of the pseudofs,
  * for a given NFSv4 client.   The root is defined to be the
@@ -922,6 +935,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	       struct cache_req *creq)
 {
 	struct svc_expkey *fsid_key;
+	struct svc_export *exp;
 	int rv;
 	u32 fsidv[2];
 
@@ -933,8 +947,14 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	if (!fsid_key || IS_ERR(fsid_key))
 		return nfserr_perm;
 
-	rv = fh_compose(fhp, fsid_key->ek_export, 
-			  fsid_key->ek_export->ex_dentry, NULL);
+	exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
+	if (exp == NULL)
+		rv = nfserr_perm;
+	else if (IS_ERR(exp))
+		rv = nfserrno(PTR_ERR(exp));
+	else
+		rv = fh_compose(fhp, exp,
+				fsid_key->ek_dentry, NULL);
 	expkey_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 6bad4766d3d9..d52e0b7ad37b 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -67,7 +67,8 @@ struct svc_expkey {
 	int			ek_fsidtype;
 	u32			ek_fsid[3];
 
-	struct svc_export *	ek_export;
+	struct vfsmount *	ek_mnt;
+	struct dentry *		ek_dentry;
 };
 
 #define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
@@ -114,22 +115,9 @@ static inline void exp_get(struct svc_export *exp)
 {
 	cache_get(&exp->h);
 }
-static inline struct svc_export *
+extern struct svc_export *
 exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
-	 struct cache_req *reqp)
-{
-	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
-	if (ek && !IS_ERR(ek)) {
-		struct svc_export *exp = ek->ek_export;
-		int err;
-		exp_get(exp);
-		expkey_put(&ek->h, &svc_expkey_cache);
-		if ((err = cache_check(&svc_export_cache, &exp->h, reqp)))
-			exp = ERR_PTR(err);
-		return exp;
-	} else
-		return ERR_PTR(PTR_ERR(ek));
-}
+	 struct cache_req *reqp);
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:01 -0800
Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches

These were an unnecessary wart.  Also only have one 'DefineSimpleCache..'
instead of two.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c                  |  4 ++--
 fs/nfsd/nfs4idmap.c               | 10 ++--------
 include/linux/sunrpc/cache.h      | 28 +++++++++++-----------------
 net/sunrpc/auth_gss/svcauth_gss.c |  4 ++--
 net/sunrpc/svcauth_unix.c         |  2 +-
 5 files changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 587829ed651c..c591761a1ad6 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -250,7 +250,7 @@ static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -482,7 +482,7 @@ static inline void svc_export_update(struct svc_export *new, struct svc_export *
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+static DefineSimpleCacheLookup(svc_export, svc_export)
 
 
 struct svc_expkey *
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 13369650cdf9..dea690aa8bb5 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -76,12 +76,6 @@ struct ent {
 	char              authname[IDMAP_NAMESZ];
 };
 
-#define DefineSimpleCacheLookupMap(STRUCT, FUNC)			\
-        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
-        (struct STRUCT *item, int set), /*no setup */,			\
-	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
-	STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
-
 /* Common entry handling */
 
 #define ENT_HASHBITS          8
@@ -264,7 +258,7 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookupMap(ent, idtoname);
+static DefineSimpleCacheLookup(ent, idtoname);
 
 /*
  * Name -> ID cache
@@ -390,7 +384,7 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookupMap(ent, nametoid);
+static DefineSimpleCacheLookup(ent, nametoid);
 
 /*
  * Exported API
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c4e3ea7cf154..405ac14e509a 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -133,14 +133,11 @@ struct cache_deferred_req {
  * If "set" == 0 :
  *    If an entry is found, it is returned
  *    If no entry is found, a new non-VALID entry is created.
- * If "set" == 1 and INPLACE == 0 :
+ * If "set" == 1 :
  *    If no entry is found a new one is inserted with data from "template"
  *    If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
  *    If a CACHE_VALID entry is found, a new entry is swapped in with data
  *       from "template"
- * If set == 1, and INPLACE == 1 :
- *    As above, except that if a CACHE_VALID entry is found, we UPDATE in place
- *       instead of swapping in a new entry.
  *
  * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
  * run but insteead CACHE_NEGATIVE is set in any new item.
@@ -159,13 +156,8 @@ struct cache_deferred_req {
  * TEST  tests if "tmp" matches "item"
  * INIT copies key information from "item" to "new"
  * UPDATE copies content information from "item" to "tmp"
- * INPLACE is true if updates can happen inplace rather than allocating a new structure
- *
- * WARNING: any substantial changes to this must be reflected in
- *   net/sunrpc/svcauth.c(auth_domain_lookup)
- *  which is a similar routine that is open-coded.
  */
-#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE)	\
+#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE)	\
 RTN *FNAME ARGS										\
 {											\
 	RTN *tmp, *new=NULL;								\
@@ -179,13 +171,13 @@ RTN *FNAME ARGS										\
 		tmp = container_of(*hp, RTN, MEMBER);					\
 		if (TEST) { /* found a match */						\
 											\
-			if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
+			if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new)	\
 				break;							\
 											\
 			if (new)							\
 				{INIT;}							\
 			if (set) {							\
-				if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
+				if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
 				{ /* need to swap in new */				\
 					RTN *t2;					\
 											\
@@ -206,7 +198,7 @@ RTN *FNAME ARGS										\
 			else read_unlock(&(DETAIL)->hash_lock);				\
 			if (set)							\
 				cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
-			if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
+			if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
 			if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL);		\
 			return tmp;							\
 		}									\
@@ -239,10 +231,12 @@ RTN *FNAME ARGS										\
 	return NULL;									\
 }
 
-#define DefineSimpleCacheLookup(STRUCT,INPLACE)	\
-	DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */,	\
-			  & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\
-			  STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE)
+#define DefineSimpleCacheLookup(STRUCT, FUNC)				\
+        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
+        (struct STRUCT *item, int set), /*no setup */,			\
+	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
+	STRUCT##_init(new, item), STRUCT##_update(tmp, item))
+
 
 #define cache_for_each(pos, detail, index, member) 						\
 	for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ;		\
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 6b073c2e6930..aadb4e8d6aa7 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -259,7 +259,7 @@ static struct cache_detail rsi_cache = {
 	.cache_parse    = rsi_parse,
 };
 
-static DefineSimpleCacheLookup(rsi, 0)
+static DefineSimpleCacheLookup(rsi, rsi)
 
 /*
  * The rpcsec_context cache is used to store a context that is
@@ -446,7 +446,7 @@ static struct cache_detail rsc_cache = {
 	.cache_parse	= rsc_parse,
 };
 
-static DefineSimpleCacheLookup(rsc, 0);
+static DefineSimpleCacheLookup(rsc, rsc);
 
 static struct rsc *
 gss_svc_searchbyctx(struct xdr_netobj *handle)
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 17e8b2a3130c..7ddf068b5b25 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -258,7 +258,7 @@ struct cache_detail ip_map_cache = {
 	.cache_show	= ip_map_show,
 };
 
-static DefineSimpleCacheLookup(ip_map, 0)
+static DefineSimpleCacheLookup(ip_map, ip_map)
 
 
 int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
-- 
cgit v1.2.3


From 4f7774c3a0558526c0faaf525581f2029480b313 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:03 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_export

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 125 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c591761a1ad6..2ebd77d758bc 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -258,16 +258,6 @@ static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-static inline int svc_export_hash(struct svc_export *item)
-{
-	int rv;
-
-	rv = hash_ptr(item->ex_client, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS);
-	return rv;
-}
-
 void svc_export_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -298,7 +288,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_export *svc_export_lookup(struct svc_export *, int);
+struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
 {
@@ -411,11 +402,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		if (err) goto out;
 	}
 
-	expp = svc_export_lookup(&exp, 1);
+	expp = svc_export_lookup(&exp);
 	if (expp)
-		exp_put(expp);
-	err = 0;
+		expp = svc_export_update(&exp, expp);
+	else
+		err = -ENOMEM;
 	cache_flush();
+	if (expp == NULL)
+		err = -ENOMEM;
+	else
+		exp_put(expp);
  out:
 	if (nd.dentry)
 		path_release(&nd);
@@ -449,40 +445,95 @@ static int svc_export_show(struct seq_file *m,
 	seq_puts(m, ")\n");
 	return 0;
 }
-struct cache_detail svc_export_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPORT_HASHMAX,
-	.hash_table	= export_table,
-	.name		= "nfsd.export",
-	.cache_put	= svc_export_put,
-	.cache_request	= svc_export_request,
-	.cache_parse	= svc_export_parse,
-	.cache_show	= svc_export_show,
-};
-
-static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
+static int svc_export_match(struct cache_head *a, struct cache_head *b)
 {
-	return a->ex_client == b->ex_client &&
-		a->ex_dentry == b->ex_dentry &&
-		a->ex_mnt == b->ex_mnt;
+	struct svc_export *orig = container_of(a, struct svc_export, h);
+	struct svc_export *new = container_of(b, struct svc_export, h);
+	return orig->ex_client == new->ex_client &&
+		orig->ex_dentry == new->ex_dentry &&
+		orig->ex_mnt == new->ex_mnt;
 }
-static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
+
+static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
 }
 
-static inline void svc_export_update(struct svc_export *new, struct svc_export *item)
+static void export_update(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	new->ex_flags = item->ex_flags;
 	new->ex_anon_uid = item->ex_anon_uid;
 	new->ex_anon_gid = item->ex_anon_gid;
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export, svc_export)
+static struct cache_head *svc_export_alloc(void)
+{
+	struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_export_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPORT_HASHMAX,
+	.hash_table	= export_table,
+	.name		= "nfsd.export",
+	.cache_put	= svc_export_put,
+	.cache_request	= svc_export_request,
+	.cache_parse	= svc_export_parse,
+	.cache_show	= svc_export_show,
+	.match		= svc_export_match,
+	.init		= svc_export_init,
+	.update		= export_update,
+	.alloc		= svc_export_alloc,
+};
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
+
+struct svc_export *
+svc_export_update(struct svc_export *new, struct svc_export *old)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+				 &old->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
 
 
 struct svc_expkey *
@@ -568,7 +619,7 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
 	key.ex_mnt = mnt;
 	key.ex_dentry = dentry;
 
-	exp = svc_export_lookup(&key, 0);
+	exp = svc_export_lookup(&key);
 	if (exp != NULL) 
 		switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
 		case 0: break;
@@ -770,13 +821,13 @@ exp_export(struct nfsctl_export *nxp)
 	new.ex_anon_gid = nxp->ex_anon_gid;
 	new.ex_fsid = nxp->ex_dev;
 
-	exp = svc_export_lookup(&new, 1);
+	exp = svc_export_lookup(&new);
+	if (exp)
+		exp = svc_export_update(&new, exp);
 
-	if (exp == NULL)
+	if (!exp)
 		goto finish;
 
-	err = 0;
-
 	if (exp_hash(clp, exp) ||
 	    exp_fsid_hash(clp, exp)) {
 		/* failed to create at least one index */
-- 
cgit v1.2.3


From 8d270f7f4cdab792d7263926ac59f747258735ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:04 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_expkey cache

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 136 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 99 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 2ebd77d758bc..abd68965822f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,17 +57,6 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-static inline int svc_expkey_hash(struct svc_expkey *item)
-{
-	int hash = item->ek_fsidtype;
-	char * cp = (char*)item->ek_fsid;
-	int len = key_len(item->ek_fsidtype);
-
-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
-	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
-	return hash & EXPKEY_HASHMASK;
-}
-
 void expkey_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -97,7 +86,8 @@ static void expkey_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int);
+static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -108,6 +98,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	int fsidtype;
 	char *ep;
 	struct svc_expkey key;
+	struct svc_expkey *ek;
 
 	if (mesg[mlen-1] != '\n')
 		return -EINVAL;
@@ -152,20 +143,25 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	key.ek_fsidtype = fsidtype;
 	memcpy(key.ek_fsid, buf, len);
 
+	ek = svc_expkey_lookup(&key);
+	err = -ENOMEM;
+	if (!ek)
+		goto out;
+
 	/* now we want a pathname, or empty meaning NEGATIVE  */
+	err = -EINVAL;
 	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0)
 		goto out;
 	dprintk("Path seems to be <%s>\n", buf);
 	err = 0;
 	if (len == 0) {
-		struct svc_expkey *ek;
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
+		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
-		struct svc_expkey *ek;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
@@ -174,10 +170,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		key.ek_mnt = nd.mnt;
 		key.ek_dentry = nd.dentry;
 		
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		err = 0;
+		else
+			err = -ENOMEM;
 		path_release(&nd);
 	}
 	cache_flush();
@@ -213,29 +210,25 @@ static int expkey_show(struct seq_file *m,
 	seq_printf(m, "\n");
 	return 0;
 }
-	
-struct cache_detail svc_expkey_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPKEY_HASHMAX,
-	.hash_table	= expkey_table,
-	.name		= "nfsd.fh",
-	.cache_put	= expkey_put,
-	.cache_request	= expkey_request,
-	.cache_parse	= expkey_parse,
-	.cache_show	= expkey_show,
-};
 
-static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
+static inline int expkey_match (struct cache_head *a, struct cache_head *b)
 {
-	if (a->ek_fsidtype != b->ek_fsidtype ||
-	    a->ek_client != b->ek_client ||
-	    memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0)
+	struct svc_expkey *orig = container_of(a, struct svc_expkey, h);
+	struct svc_expkey *new = container_of(b, struct svc_expkey, h);
+
+	if (orig->ek_fsidtype != new->ek_fsidtype ||
+	    orig->ek_client != new->ek_client ||
+	    memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0)
 		return 0;
 	return 1;
 }
 
-static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_init(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
@@ -244,13 +237,80 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 	new->ek_fsid[2] = item->ek_fsid[2];
 }
 
-static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_update(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	new->ek_mnt = mntget(item->ek_mnt);
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
+static struct cache_head *expkey_alloc(void)
+{
+	struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_expkey_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPKEY_HASHMAX,
+	.hash_table	= expkey_table,
+	.name		= "nfsd.fh",
+	.cache_put	= expkey_put,
+	.cache_request	= expkey_request,
+	.cache_parse	= expkey_parse,
+	.cache_show	= expkey_show,
+	.match		= expkey_match,
+	.init		= expkey_init,
+	.update       	= expkey_update,
+	.alloc		= expkey_alloc,
+};
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+	struct cache_head *ch;
+	int hash = item->ek_fsidtype;
+	char * cp = (char*)item->ek_fsid;
+	int len = key_len(item->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
+static struct svc_expkey *
+svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+{
+	struct cache_head *ch;
+	int hash = new->ek_fsidtype;
+	char * cp = (char*)new->ek_fsid;
+	int len = key_len(new->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+				 &old->h, hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -549,7 +609,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
 
-	ek = svc_expkey_lookup(&key, 0);
+	ek = svc_expkey_lookup(&key);
 	if (ek != NULL)
 		if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
 			ek = ERR_PTR(err);
@@ -569,7 +629,9 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
-	ek = svc_expkey_lookup(&key, 1);
+	ek = svc_expkey_lookup(&key);
+	if (ek)
+		ek = svc_expkey_update(&key,ek);
 	if (ek) {
 		expkey_put(&ek->h, &svc_expkey_cache);
 		return 0;
-- 
cgit v1.2.3


From f9ecc921b5b5e135050e7f22fef873c249aee3fc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:06 -0800
Subject: [PATCH] knfsd: Use new cache code for name/id lookup caches

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4idmap.c | 126 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 103 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index dea690aa8bb5..75cfbb68b205 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -82,9 +82,12 @@ struct ent {
 #define ENT_HASHMAX           (1 << ENT_HASHBITS)
 #define ENT_HASHMASK          (ENT_HASHMAX - 1)
 
-static inline void
-ent_init(struct ent *new, struct ent *itm)
+static void
+ent_init(struct cache_head *cnew, struct cache_head *citm)
 {
+	struct ent *new = container_of(cnew, struct ent, h);
+	struct ent *itm = container_of(citm, struct ent, h);
+
 	new->id = itm->id;
 	new->type = itm->type;
 
@@ -92,12 +95,6 @@ ent_init(struct ent *new, struct ent *itm)
 	strlcpy(new->authname, itm->authname, sizeof(new->name));
 }
 
-static inline void
-ent_update(struct ent *new, struct ent *itm)
-{
-	ent_init(new, itm);
-}
-
 static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
@@ -107,6 +104,16 @@ ent_put(struct cache_head *ch, struct cache_detail *cd)
 	}
 }
 
+static struct cache_head *
+ent_alloc(void)
+{
+	struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL);
+	if (e)
+		return &e->h;
+	else
+		return NULL;
+}
+
 /*
  * ID -> Name cache
  */
@@ -143,9 +150,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-idtoname_match(struct ent *a, struct ent *b)
+static int
+idtoname_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->id == b->id && a->type == b->type &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -178,7 +188,8 @@ warn_no_idmapd(struct cache_detail *detail)
 
 
 static int         idtoname_parse(struct cache_detail *, char *, int);
-static struct ent *idtoname_lookup(struct ent *, int);
+static struct ent *idtoname_lookup(struct ent *);
+static struct ent *idtoname_update(struct ent *, struct ent *);
 
 static struct cache_detail idtoname_cache = {
 	.owner		= THIS_MODULE,
@@ -190,6 +201,10 @@ static struct cache_detail idtoname_cache = {
 	.cache_parse	= idtoname_parse,
 	.cache_show	= idtoname_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= idtoname_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 int
@@ -232,6 +247,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (ent.h.expiry_time == 0)
 		goto out;
 
+	error = -ENOMEM;
+	res = idtoname_lookup(&ent);
+	if (!res)
+		goto out;
+
 	/* Name */
 	error = qword_get(&buf, buf1, PAGE_SIZE);
 	if (error == -EINVAL)
@@ -246,7 +266,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 		memcpy(ent.name, buf1, sizeof(ent.name));
 	}
 	error = -ENOMEM;
-	if ((res = idtoname_lookup(&ent, 1)) == NULL)
+	res = idtoname_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &idtoname_cache);
@@ -258,7 +279,31 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookup(ent, idtoname);
+
+static struct ent *
+idtoname_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache,
+						    &item->h,
+						    idtoname_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+idtoname_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&idtoname_cache,
+						    &new->h, &old->h,
+						    idtoname_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
 
 /*
  * Name -> ID cache
@@ -285,9 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-nametoid_match(struct ent *a, struct ent *b)
+static int
+nametoid_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -311,7 +359,8 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 	return 0;
 }
 
-static struct ent *nametoid_lookup(struct ent *, int);
+static struct ent *nametoid_lookup(struct ent *);
+static struct ent *nametoid_update(struct ent *, struct ent *);
 static int         nametoid_parse(struct cache_detail *, char *, int);
 
 static struct cache_detail nametoid_cache = {
@@ -324,6 +373,10 @@ static struct cache_detail nametoid_cache = {
 	.cache_parse	= nametoid_parse,
 	.cache_show	= nametoid_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= nametoid_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 static int
@@ -373,7 +426,11 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 		set_bit(CACHE_NEGATIVE, &ent.h.flags);
 
 	error = -ENOMEM;
-	if ((res = nametoid_lookup(&ent, 1)) == NULL)
+	res = nametoid_lookup(&ent);
+	if (res == NULL)
+		goto out;
+	res = nametoid_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &nametoid_cache);
@@ -384,7 +441,30 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookup(ent, nametoid);
+
+static struct ent *
+nametoid_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache,
+						    &item->h,
+						    nametoid_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+nametoid_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&nametoid_cache,
+						    &new->h, &old->h,
+						    nametoid_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
 
 /*
  * Exported API
@@ -452,24 +532,24 @@ idmap_defer(struct cache_req *req)
 }
 
 static inline int
-do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item,
 		struct idmap_defer_req *mdr)
 {
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		return -ENOMEM;
 	return cache_check(detail, &(*item)->h, &mdr->req);
 }
 
 static inline int
-do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int),
+do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 			struct ent *key, struct cache_detail *detail,
 			struct ent **item)
 {
 	int ret = -ENOMEM;
 
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		goto out_err;
 	ret = -ETIMEDOUT;
@@ -490,7 +570,7 @@ out_err:
 
 static int
 idmap_lookup(struct svc_rqst *rqstp,
-		struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+		struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item)
 {
 	struct idmap_defer_req *mdr;
-- 
cgit v1.2.3


From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:09 -0800
Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs

.. it makes some of the code nicer.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c                  | 51 ++++++++++++++++++---------------------
 fs/nfsd/nfs4idmap.c               | 18 ++++++--------
 fs/nfsd/nfsfh.c                   |  2 +-
 include/linux/nfsd/export.h       |  4 +--
 include/linux/sunrpc/cache.h      | 13 +++++-----
 net/sunrpc/auth_gss/svcauth_gss.c | 28 +++++++++------------
 net/sunrpc/cache.c                | 20 +++++++--------
 net/sunrpc/svcauth_unix.c         | 20 +++++++--------
 8 files changed, 72 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index abd68965822f..cc811a1094cb 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,18 +57,17 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct cache_head *item, struct cache_detail *cd)
+void expkey_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
-		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
-			dput(key->ek_dentry);
-			mntput(key->ek_mnt);
-		}
-		auth_domain_put(key->ek_client);
-		kfree(key);
+	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+
+	if (test_bit(CACHE_VALID, &key->h.flags) &&
+	    !test_bit(CACHE_NEGATIVE, &key->h.flags)) {
+		dput(key->ek_dentry);
+		mntput(key->ek_mnt);
 	}
+	auth_domain_put(key->ek_client);
+	kfree(key);
 }
 
 static void expkey_request(struct cache_detail *cd,
@@ -158,7 +157,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
@@ -172,7 +171,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else
 			err = -ENOMEM;
 		path_release(&nd);
@@ -318,15 +317,13 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-void svc_export_put(struct cache_head *item, struct cache_detail *cd)
+static void svc_export_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_export *exp = container_of(item, struct svc_export, h);
-		dput(exp->ex_dentry);
-		mntput(exp->ex_mnt);
-		auth_domain_put(exp->ex_client);
-		kfree(exp);
-	}
+	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+	dput(exp->ex_dentry);
+	mntput(exp->ex_mnt);
+	auth_domain_put(exp->ex_client);
+	kfree(exp);
 }
 
 static void svc_export_request(struct cache_detail *cd,
@@ -633,7 +630,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	if (ek)
 		ek = svc_expkey_update(&key,ek);
 	if (ek) {
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 		return 0;
 	}
 	return -ENOMEM;
@@ -762,7 +759,7 @@ static void exp_fsid_unhash(struct svc_export *exp)
 	ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -800,7 +797,7 @@ static void exp_unhash(struct svc_export *exp)
 	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -902,7 +899,7 @@ finish:
 	if (exp)
 		exp_put(exp);
 	if (fsid_key && !IS_ERR(fsid_key))
-		expkey_put(&fsid_key->h, &svc_expkey_cache);
+		cache_put(&fsid_key->h, &svc_expkey_cache);
 	if (clp)
 		auth_domain_put(clp);
 	path_release(&nd);
@@ -1030,7 +1027,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
 		return ERR_PTR(PTR_ERR(ek));
 
 	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
-	expkey_put(&ek->h, &svc_expkey_cache);
+	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (!exp || IS_ERR(exp))
 		return ERR_PTR(PTR_ERR(exp));
@@ -1068,7 +1065,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	else
 		rv = fh_compose(fhp, exp,
 				fsid_key->ek_dentry, NULL);
-	expkey_put(&fsid_key->h, &svc_expkey_cache);
+	cache_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
 
@@ -1187,7 +1184,7 @@ static int e_show(struct seq_file *m, void *p)
 	cache_get(&exp->h);
 	if (cache_check(&svc_export_cache, &exp->h, NULL))
 		return 0;
-	if (cache_put(&exp->h, &svc_export_cache)) BUG();
+	cache_put(&exp->h, &svc_export_cache);
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 75cfbb68b205..4b6aa60dfceb 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -96,12 +96,10 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
 }
 
 static void
-ent_put(struct cache_head *ch, struct cache_detail *cd)
+ent_put(struct kref *ref)
 {
-	if (cache_put(ch, cd)) {
-		struct ent *map = container_of(ch, struct ent, h);
-		kfree(map);
-	}
+	struct ent *map = container_of(ref, struct ent, h.ref);
+	kfree(map);
 }
 
 static struct cache_head *
@@ -270,7 +268,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &idtoname_cache);
+	cache_put(&res->h, &idtoname_cache);
 
 	error = 0;
 out:
@@ -433,7 +431,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &nametoid_cache);
+	cache_put(&res->h, &nametoid_cache);
 	error = 0;
 out:
 	kfree(buf1);
@@ -562,7 +560,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 		goto out_put;
 	return 0;
 out_put:
-	ent_put(&(*item)->h, detail);
+	cache_put(&(*item)->h, detail);
 out_err:
 	*item = NULL;
 	return ret;
@@ -613,7 +611,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
 	if (ret)
 		return ret;
 	*id = item->id;
-	ent_put(&item->h, &nametoid_cache);
+	cache_put(&item->h, &nametoid_cache);
 	return 0;
 }
 
@@ -635,7 +633,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 	ret = strlen(item->name);
 	BUG_ON(ret > IDMAP_NAMESZ);
 	memcpy(name, item->name, ret);
-	ent_put(&item->h, &idtoname_cache);
+	cache_put(&item->h, &idtoname_cache);
 	return ret;
 }
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 7a3e397b4ed3..3f2ec2e6d06c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -506,7 +506,7 @@ fh_put(struct svc_fh *fhp)
 		nfsd_nr_put++;
 	}
 	if (exp) {
-		svc_export_put(&exp->h, &svc_export_cache);
+		cache_put(&exp->h, &svc_export_cache);
 		fhp->fh_export = NULL;
 	}
 	return;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index d52e0b7ad37b..a6c08a47b25c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -102,13 +102,11 @@ int			exp_rootfh(struct auth_domain *,
 int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
 int			nfserrno(int errno);
 
-extern void expkey_put(struct cache_head *item, struct cache_detail *cd);
-extern void svc_export_put(struct cache_head *item, struct cache_detail *cd);
 extern struct cache_detail svc_export_cache, svc_expkey_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
-	svc_export_put(&exp->h, &svc_export_cache);
+	cache_put(&exp->h, &svc_export_cache);
 }
 
 static inline void exp_get(struct svc_export *exp)
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index a37fead1873b..ad3f5cbdb770 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -50,7 +50,7 @@ struct cache_head {
 	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall 
 					 * was sent, else this is when update was received
 					 */
-	atomic_t 	refcnt;
+	struct kref	ref;
 	unsigned long	flags;
 };
 #define	CACHE_VALID	0	/* Entry contains valid data */
@@ -68,8 +68,7 @@ struct cache_detail {
 	atomic_t		inuse; /* active user-space update or lookup */
 
 	char			*name;
-	void			(*cache_put)(struct cache_head *,
-					     struct cache_detail*);
+	void			(*cache_put)(struct kref *);
 
 	void			(*cache_request)(struct cache_detail *cd,
 						 struct cache_head *h,
@@ -151,17 +150,17 @@ extern void cache_clean_deferred(void *owner);
 
 static inline struct cache_head  *cache_get(struct cache_head *h)
 {
-	atomic_inc(&h->refcnt);
+	kref_get(&h->ref);
 	return h;
 }
 
 
-static inline int cache_put(struct cache_head *h, struct cache_detail *cd)
+static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 {
-	if (atomic_read(&h->refcnt) <= 2 &&
+	if (atomic_read(&h->ref.refcount) <= 2 &&
 	    h->expiry_time < cd->nextcheck)
 		cd->nextcheck = h->expiry_time;
-	return atomic_dec_and_test(&h->refcnt);
+	kref_put(&h->ref, cd->cache_put);
 }
 
 extern void cache_init(struct cache_head *h);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 380152603d1e..4d7eb9e704da 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -89,13 +89,11 @@ static void rsi_free(struct rsi *rsii)
 	kfree(rsii->out_token.data);
 }
 
-static void rsi_put(struct cache_head *item, struct cache_detail *cd)
+static void rsi_put(struct kref *ref)
 {
-	struct rsi *rsii = container_of(item, struct rsi, h);
-	if (cache_put(item, cd)) {
-		rsi_free(rsii);
-		kfree(rsii);
-	}
+	struct rsi *rsii = container_of(ref, struct rsi, h.ref);
+	rsi_free(rsii);
+	kfree(rsii);
 }
 
 static inline int rsi_hash(struct rsi *item)
@@ -267,7 +265,7 @@ static int rsi_parse(struct cache_detail *cd,
 out:
 	rsi_free(&rsii);
 	if (rsip)
-		rsi_put(&rsip->h, &rsi_cache);
+		cache_put(&rsip->h, &rsi_cache);
 	else
 		status = -ENOMEM;
 	return status;
@@ -357,14 +355,12 @@ static void rsc_free(struct rsc *rsci)
 		put_group_info(rsci->cred.cr_group_info);
 }
 
-static void rsc_put(struct cache_head *item, struct cache_detail *cd)
+static void rsc_put(struct kref *ref)
 {
-	struct rsc *rsci = container_of(item, struct rsc, h);
+	struct rsc *rsci = container_of(ref, struct rsc, h.ref);
 
-	if (cache_put(item, cd)) {
-		rsc_free(rsci);
-		kfree(rsci);
-	}
+	rsc_free(rsci);
+	kfree(rsci);
 }
 
 static inline int
@@ -509,7 +505,7 @@ static int rsc_parse(struct cache_detail *cd,
 out:
 	rsc_free(&rsci);
 	if (rscp)
-		rsc_put(&rscp->h, &rsc_cache);
+		cache_put(&rscp->h, &rsc_cache);
 	else
 		status = -ENOMEM;
 	return status;
@@ -1076,7 +1072,7 @@ drop:
 	ret = SVC_DROP;
 out:
 	if (rsci)
-		rsc_put(&rsci->h, &rsc_cache);
+		cache_put(&rsci->h, &rsc_cache);
 	return ret;
 }
 
@@ -1168,7 +1164,7 @@ out_err:
 		put_group_info(rqstp->rq_cred.cr_group_info);
 	rqstp->rq_cred.cr_group_info = NULL;
 	if (gsd->rsci)
-		rsc_put(&gsd->rsci->h, &rsc_cache);
+		cache_put(&gsd->rsci->h, &rsc_cache);
 	gsd->rsci = NULL;
 
 	return stat;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index edcda4fd88e8..dd81e5928172 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -42,7 +42,7 @@ void cache_init(struct cache_head *h)
 	time_t now = get_seconds();
 	h->next = NULL;
 	h->flags = 0;
-	atomic_set(&h->refcnt, 1);
+	kref_init(&h->ref);
 	h->expiry_time = now + CACHE_NEW_EXPIRY;
 	h->last_refresh = now;
 }
@@ -81,7 +81,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 		if (detail->match(tmp, key)) {
 			cache_get(tmp);
 			write_unlock(&detail->hash_lock);
-			detail->cache_put(new, detail);
+			cache_put(new, detail);
 			return tmp;
 		}
 	}
@@ -145,7 +145,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	/* We need to insert a new entry */
 	tmp = detail->alloc();
 	if (!tmp) {
-		detail->cache_put(old, detail);
+		cache_put(old, detail);
 		return NULL;
 	}
 	cache_init(tmp);
@@ -165,7 +165,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail, is_new);
 	cache_fresh_unlocked(old, detail, 0);
-	detail->cache_put(old, detail);
+	cache_put(old, detail);
 	return tmp;
 }
 EXPORT_SYMBOL(sunrpc_cache_update);
@@ -234,7 +234,7 @@ int cache_check(struct cache_detail *detail,
 		cache_defer_req(rqstp, h);
 
 	if (rv)
-		detail->cache_put(h, detail);
+		cache_put(h, detail);
 	return rv;
 }
 
@@ -431,7 +431,7 @@ static int cache_clean(void)
 			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
 				queue_loose(current_detail, ch);
 
-			if (atomic_read(&ch->refcnt) == 1)
+			if (atomic_read(&ch->ref.refcount) == 1)
 				break;
 		}
 		if (ch) {
@@ -446,7 +446,7 @@ static int cache_clean(void)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch)
-			d->cache_put(ch, d);
+			cache_put(ch, d);
 	} else
 		spin_unlock(&cache_list_lock);
 
@@ -723,7 +723,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
 			list_del(&rq->q.list);
 			spin_unlock(&queue_lock);
-			cd->cache_put(rq->item, cd);
+			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
@@ -906,7 +906,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
 				continue;
 			list_del(&cr->q.list);
 			spin_unlock(&queue_lock);
-			detail->cache_put(cr->item, detail);
+			cache_put(cr->item, detail);
 			kfree(cr->buf);
 			kfree(cr);
 			return;
@@ -1192,7 +1192,7 @@ static int c_show(struct seq_file *m, void *p)
 
 	ifdebug(CACHE)
 		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
-			   cp->expiry_time, atomic_read(&cp->refcnt), cp->flags);
+			   cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
 	cache_get(cp);
 	if (cache_check(cd, cp, NULL))
 		/* cache_check does a cache_put on failure */
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 7e38621a20b7..11020c0b7db5 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -84,15 +84,15 @@ struct ip_map {
 };
 static struct cache_head	*ip_table[IP_HASHMAX];
 
-static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
+static void ip_map_put(struct kref *kref)
 {
+	struct cache_head *item = container_of(kref, struct cache_head, ref);
 	struct ip_map *im = container_of(item, struct ip_map,h);
-	if (cache_put(item, cd)) {
-		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags))
-			auth_domain_put(&im->m_client->h);
-		kfree(im);
-	}
+
+	if (test_bit(CACHE_VALID, &item->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &item->flags))
+		auth_domain_put(&im->m_client->h);
+	kfree(im);
 }
 
 #if IP_HASHBITS == 8
@@ -315,7 +315,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
 				 hash_ip((unsigned long)ipm->m_addr.s_addr));
 	if (!ch)
 		return -ENOMEM;
-	ip_map_put(ch, &ip_map_cache);
+	cache_put(ch, &ip_map_cache);
 	return 0;
 }
 
@@ -369,7 +369,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 		rv = &ipm->m_client->h;
 		kref_get(&rv->ref);
 	}
-	ip_map_put(&ipm->h, &ip_map_cache);
+	cache_put(&ipm->h, &ip_map_cache);
 	return rv;
 }
 
@@ -403,7 +403,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 		case 0:
 			rqstp->rq_client = &ipm->m_client->h;
 			kref_get(&rqstp->rq_client->ref);
-			ip_map_put(&ipm->h, &ip_map_cache);
+			cache_put(&ipm->h, &ip_map_cache);
 			break;
 	}
 	return SVC_OK;
-- 
cgit v1.2.3


From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 27 Mar 2006 01:15:10 -0800
Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global
 code static

We can now make some code static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c             | 13 ++++++++-----
 include/linux/nfsd/export.h  |  5 +----
 include/linux/sunrpc/cache.h |  1 -
 net/sunrpc/cache.c           |  2 +-
 net/sunrpc/sunrpc_syms.c     |  1 -
 5 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cc811a1094cb..c340be0a3f59 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,7 +57,7 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct kref *ref)
+static void expkey_put(struct kref *ref)
 {
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
@@ -87,6 +87,8 @@ static void expkey_request(struct cache_detail *cd,
 
 static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
+static struct cache_detail svc_expkey_cache;
+
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -255,7 +257,7 @@ static struct cache_head *expkey_alloc(void)
 		return NULL;
 }
 
-struct cache_detail svc_expkey_cache = {
+static struct cache_detail svc_expkey_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
 	.hash_table	= expkey_table,
@@ -345,7 +347,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_update(struct svc_export *new,
+					    struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
@@ -574,7 +577,7 @@ svc_export_lookup(struct svc_export *exp)
 		return NULL;
 }
 
-struct svc_export *
+static struct svc_export *
 svc_export_update(struct svc_export *new, struct svc_export *old)
 {
 	struct cache_head *ch;
@@ -593,7 +596,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
 }
 
 
-struct svc_expkey *
+static struct svc_expkey *
 exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_expkey key, *ek;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a6c08a47b25c..d2a8abb5011a 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -86,9 +86,6 @@ void			nfsd_export_shutdown(void);
 void			nfsd_export_flush(void);
 void			exp_readlock(void);
 void			exp_readunlock(void);
-struct svc_expkey *	exp_find_key(struct auth_domain *clp, 
-				     int fsid_type, u32 *fsidv,
-				     struct cache_req *reqp);
 struct svc_export *	exp_get_by_name(struct auth_domain *clp,
 					struct vfsmount *mnt,
 					struct dentry *dentry,
@@ -102,7 +99,7 @@ int			exp_rootfh(struct auth_domain *,
 int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
 int			nfserrno(int errno);
 
-extern struct cache_detail svc_export_cache, svc_expkey_cache;
+extern struct cache_detail svc_export_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index ad3f5cbdb770..b5612c958cce 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -163,7 +163,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 	kref_put(&h->ref, cd->cache_put);
 }
 
-extern void cache_init(struct cache_head *h);
 extern int cache_check(struct cache_detail *detail,
 		       struct cache_head *h, struct cache_req *rqstp);
 extern void cache_flush(void);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index dd81e5928172..3ac4193a78ed 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -37,7 +37,7 @@
 static void cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
-void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h)
 {
 	time_t now = get_seconds();
 	h->next = NULL;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 69b8238f3d10..769114f0f886 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup);
 EXPORT_SYMBOL(cache_check);
 EXPORT_SYMBOL(cache_flush);
 EXPORT_SYMBOL(cache_purge);
-EXPORT_SYMBOL(cache_init);
 EXPORT_SYMBOL(cache_register);
 EXPORT_SYMBOL(cache_unregister);
 EXPORT_SYMBOL(qword_add);
-- 
cgit v1.2.3


From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:59 -0800
Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat

Replace for_each_pgdat() with for_each_online_pgdat().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mm/pgtable.c   | 2 +-
 arch/ia64/mm/discontig.c | 4 ++--
 arch/ia64/mm/init.c      | 2 +-
 arch/m32r/mm/init.c      | 2 +-
 arch/powerpc/mm/mem.c    | 4 ++--
 arch/x86_64/mm/init.c    | 2 +-
 fs/buffer.c              | 2 +-
 mm/page_alloc.c          | 6 +++---
 mm/vmscan.c              | 6 +++---
 9 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 9db3242103be..2889567e21a1 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -36,7 +36,7 @@ void show_mem(void)
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 2f5e44862e91..384f1d7dce96 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -386,7 +386,7 @@ static void __init pgdat_insert(pg_data_t *pgdat)
 {
 	pg_data_t *prev = NULL, *next;
 
-	for_each_pgdat(next)
+	for_each_online_pgdat(next)
 		if (pgdat->node_id < next->node_id)
 			break;
 		else
@@ -560,7 +560,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ff4f31fcd330..2ef1151cde90 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -600,7 +600,7 @@ mem_init (void)
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
 	kclist_add(&kcore_kernel, _stext, _end - _stext);
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
 			totalram_pages += free_all_bootmem_node(pgdat);
 
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index c9e7dad860b7..2e0fe199ce38 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -47,7 +47,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long flags;
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index badac10d700c..5e435a9c3431 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -195,7 +195,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long flags;
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -351,7 +351,7 @@ void __init mem_init(void)
 	max_mapnr = max_pfn;
 	totalram_pages += free_all_bootmem();
 #endif
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			if (!pfn_valid(pgdat->node_start_pfn + i))
 				continue;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index b04415625442..e5f7f1c34462 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -72,7 +72,7 @@ void show_mem(void)
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pfn_to_page(pgdat->node_start_pfn + i);
 			total++;
diff --git a/fs/buffer.c b/fs/buffer.c
index d597758dd129..23f1f3a68077 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -493,7 +493,7 @@ static void free_more_memory(void)
 	wakeup_pdflush(1024);
 	yield();
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
 			try_to_free_pages(zones, GFP_NOFS);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8dc8f2735d22..ccc3713dd407 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1201,7 +1201,7 @@ unsigned int nr_free_highpages (void)
 	pg_data_t *pgdat;
 	unsigned int pages = 0;
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
 
 	return pages;
@@ -1343,7 +1343,7 @@ void get_zone_counts(unsigned long *active,
 	*active = 0;
 	*inactive = 0;
 	*free = 0;
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long l, m, n;
 		__get_zone_counts(&l, &m, &n, pgdat);
 		*active += l;
@@ -2482,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void)
 	struct pglist_data *pgdat;
 	int j, idx;
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		for (j = 0; j < MAX_NR_ZONES; j++) {
 			struct zone *zone = pgdat->node_zones + j;
 			unsigned long present_pages = zone->present_pages;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 78865c849f8f..acdf001d6941 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1305,7 +1305,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 repeat:
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long freed;
 
 		freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1335,7 +1335,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 	cpumask_t mask;
 
 	if (action == CPU_ONLINE) {
-		for_each_pgdat(pgdat) {
+		for_each_online_pgdat(pgdat) {
 			mask = node_to_cpumask(pgdat->node_id);
 			if (any_online_cpu(mask) != NR_CPUS)
 				/* One of our CPUs online: restore mask */
@@ -1351,7 +1351,7 @@ static int __init kswapd_init(void)
 	pg_data_t *pgdat;
 
 	swap_setup();
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pid_t pid;
 
 		pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
-- 
cgit v1.2.3


From 6a4d44c1f1108d6c9e8850e8cf166aaba0e56eae Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:55 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: holders/slaves subdirectory

Creating "slaves" and "holders" directories in /sys/block/<disk> and
creating "holders" directory under /sys/block/<disk>/<partition>

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/genhd.h |  7 +++++++
 2 files changed, 43 insertions(+)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f924f459bdb8..60523cea7136 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,6 +297,30 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
+#ifdef CONFIG_SYSFS
+static inline void partition_sysfs_add_subdir(struct hd_struct *p)
+{
+	struct kobject *k;
+
+	k = kobject_get(&p->kobj);
+	p->holder_dir = kobject_add_dir(k, "holders");
+	kobject_put(k);
+}
+
+static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+{
+	struct kobject *k;
+
+	k = kobject_get(&disk->kobj);
+	disk->holder_dir = kobject_add_dir(k, "holders");
+	disk->slave_dir = kobject_add_dir(k, "slaves");
+	kobject_put(k);
+}
+#else
+#define partition_sysfs_add_subdir(x)	do { } while (0)
+#define disk_sysfs_add_subdirs(x)	do { } while (0)
+#endif
+
 void delete_partition(struct gendisk *disk, int part)
 {
 	struct hd_struct *p = disk->part[part-1];
@@ -310,6 +334,10 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
+#ifdef CONFIG_SYSFS
+	if (p->holder_dir)
+		kobject_unregister(p->holder_dir);
+#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -337,6 +365,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 	p->kobj.parent = &disk->kobj;
 	p->kobj.ktype = &ktype_part;
 	kobject_register(&p->kobj);
+	partition_sysfs_add_subdir(p);
 	disk->part[part-1] = p;
 }
 
@@ -383,6 +412,7 @@ void register_disk(struct gendisk *disk)
 	if ((err = kobject_add(&disk->kobj)))
 		return;
 	disk_sysfs_symlinks(disk);
+ 	disk_sysfs_add_subdirs(disk);
 	kobject_uevent(&disk->kobj, KOBJ_ADD);
 
 	/* No minors to use for partitions */
@@ -483,6 +513,12 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
+#ifdef CONFIG_SYSFS
+	if (disk->holder_dir)
+		kobject_unregister(disk->holder_dir);
+	if (disk->slave_dir)
+		kobject_unregister(disk->slave_dir);
+#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fd647fde5ec1..eea61cc8fac1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -78,6 +78,9 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	struct kobject kobj;
+#ifdef CONFIG_SYSFS
+	struct kobject *holder_dir;
+#endif
 	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 };
@@ -114,6 +117,10 @@ struct gendisk {
 	int number;			/* more of the same */
 	struct device *driverfs_dev;
 	struct kobject kobj;
+#ifdef CONFIG_SYSFS
+	struct kobject *holder_dir;
+	struct kobject *slave_dir;
+#endif
 
 	struct timer_rand_state *random;
 	int policy;
-- 
cgit v1.2.3


From 100873687d81d4ce7b1299b447d33e87ba1e9583 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 27 Mar 2006 01:17:56 -0800
Subject: [PATCH]
 dm-md-dependency-tree-in-sysfs-holders-slaves-subdirectory-tidy

Remove all the CONFIG_SYSFS stuff.  That's supposed to all be implemented up
in header files.

Yes, the CONFIG_SYSFS=n data structures will be a little larger than
necessary, but that's a tradeoff we can decide to make.

Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 9 ---------
 include/linux/genhd.h | 4 ----
 2 files changed, 13 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 60523cea7136..af0cb4b9e784 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,7 +297,6 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
-#ifdef CONFIG_SYSFS
 static inline void partition_sysfs_add_subdir(struct hd_struct *p)
 {
 	struct kobject *k;
@@ -316,10 +315,6 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
 	disk->slave_dir = kobject_add_dir(k, "slaves");
 	kobject_put(k);
 }
-#else
-#define partition_sysfs_add_subdir(x)	do { } while (0)
-#define disk_sysfs_add_subdirs(x)	do { } while (0)
-#endif
 
 void delete_partition(struct gendisk *disk, int part)
 {
@@ -334,10 +329,8 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
-#ifdef CONFIG_SYSFS
 	if (p->holder_dir)
 		kobject_unregister(p->holder_dir);
-#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -513,12 +506,10 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
-#ifdef CONFIG_SYSFS
 	if (disk->holder_dir)
 		kobject_unregister(disk->holder_dir);
 	if (disk->slave_dir)
 		kobject_unregister(disk->slave_dir);
-#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index eea61cc8fac1..bd7db861041e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -78,9 +78,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	struct kobject kobj;
-#ifdef CONFIG_SYSFS
 	struct kobject *holder_dir;
-#endif
 	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 };
@@ -117,10 +115,8 @@ struct gendisk {
 	int number;			/* more of the same */
 	struct device *driverfs_dev;
 	struct kobject kobj;
-#ifdef CONFIG_SYSFS
 	struct kobject *holder_dir;
 	struct kobject *slave_dir;
-#endif
 
 	struct timer_rand_state *random;
 	int policy;
-- 
cgit v1.2.3


From 641dc636b0475582e48584340b774bd1e90d40d9 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:57 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: bd_claim_by_kobject

Adding bd_claim_by_kobject() function which takes kobject as additional
signature of holder device and creates sysfs symlinks between holder device
and claimed device.  bd_release_from_kobject() is a counterpart of
bd_claim_by_kobject.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c     | 297 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  10 ++
 2 files changed, 307 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5983d42df015..3f36df7e037c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -266,6 +266,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+		INIT_LIST_HEAD(&bdev->bd_holder_list);
+#endif
 		inode_init_once(&ei->vfs_inode);
 	}
 }
@@ -490,6 +493,300 @@ void bd_release(struct block_device *bdev)
 
 EXPORT_SYMBOL(bd_release);
 
+#ifdef CONFIG_SYSFS
+/*
+ * Functions for bd_claim_by_kobject / bd_release_from_kobject
+ *
+ *     If a kobject is passed to bd_claim_by_kobject()
+ *     and the kobject has a parent directory,
+ *     following symlinks are created:
+ *        o from the kobject to the claimed bdev
+ *        o from "holders" directory of the bdev to the parent of the kobject
+ *     bd_release_from_kobject() removes these symlinks.
+ *
+ *     Example:
+ *        If /dev/dm-0 maps to /dev/sda, kobject corresponding to
+ *        /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
+ *           /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
+ */
+
+static struct kobject *bdev_get_kobj(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(&bdev->bd_part->kobj);
+	else
+		return kobject_get(&bdev->bd_disk->kobj);
+}
+
+static struct kobject *bdev_get_holder(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(bdev->bd_part->holder_dir);
+	else
+		return kobject_get(bdev->bd_disk->holder_dir);
+}
+
+static void add_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_create_link(from, to, kobject_name(to));
+}
+
+static void del_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_remove_link(from, kobject_name(to));
+}
+
+/*
+ * 'struct bd_holder' contains pointers to kobjects symlinked by
+ * bd_claim_by_kobject.
+ * It's connected to bd_holder_list which is protected by bdev->bd_sem.
+ */
+struct bd_holder {
+	struct list_head list;	/* chain of holders of the bdev */
+	int count;		/* references from the holder */
+	struct kobject *sdir;	/* holder object, e.g. "/block/dm-0/slaves" */
+	struct kobject *hdev;	/* e.g. "/block/dm-0" */
+	struct kobject *hdir;	/* e.g. "/block/sda/holders" */
+	struct kobject *sdev;	/* e.g. "/block/sda" */
+};
+
+/*
+ * Get references of related kobjects at once.
+ * Returns 1 on success. 0 on failure.
+ *
+ * Should call bd_holder_release_dirs() after successful use.
+ */
+static int bd_holder_grab_dirs(struct block_device *bdev,
+			struct bd_holder *bo)
+{
+	if (!bdev || !bo)
+		return 0;
+
+	bo->sdir = kobject_get(bo->sdir);
+	if (!bo->sdir)
+		return 0;
+
+	bo->hdev = kobject_get(bo->sdir->parent);
+	if (!bo->hdev)
+		goto fail_put_sdir;
+
+	bo->sdev = bdev_get_kobj(bdev);
+	if (!bo->sdev)
+		goto fail_put_hdev;
+
+	bo->hdir = bdev_get_holder(bdev);
+	if (!bo->hdir)
+		goto fail_put_sdev;
+
+	return 1;
+
+fail_put_sdev:
+	kobject_put(bo->sdev);
+fail_put_hdev:
+	kobject_put(bo->hdev);
+fail_put_sdir:
+	kobject_put(bo->sdir);
+
+	return 0;
+}
+
+/* Put references of related kobjects at once. */
+static void bd_holder_release_dirs(struct bd_holder *bo)
+{
+	kobject_put(bo->hdir);
+	kobject_put(bo->sdev);
+	kobject_put(bo->hdev);
+	kobject_put(bo->sdir);
+}
+
+static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return NULL;
+
+	bo->count = 1;
+	bo->sdir = kobj;
+
+	return bo;
+}
+
+static void free_bd_holder(struct bd_holder *bo)
+{
+	kfree(bo);
+}
+
+/**
+ * add_bd_holder - create sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @bo:		preallocated and initialized by alloc_bd_holder()
+ *
+ * If there is no matching entry with @bo in @bdev->bd_holder_list,
+ * add @bo to the list, create symlinks.
+ *
+ * Returns 1 if @bo was added to the list.
+ * Returns 0 if @bo wasn't used by any reason and should be freed.
+ */
+static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+{
+	struct bd_holder *tmp;
+
+	if (!bo)
+		return 0;
+
+	list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
+		if (tmp->sdir == bo->sdir) {
+			tmp->count++;
+			return 0;
+		}
+	}
+
+	if (!bd_holder_grab_dirs(bdev, bo))
+		return 0;
+
+	add_symlink(bo->sdir, bo->sdev);
+	add_symlink(bo->hdir, bo->hdev);
+	list_add_tail(&bo->list, &bdev->bd_holder_list);
+	return 1;
+}
+
+/**
+ * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @kobj:	holder's kobject
+ *
+ * If there is matching entry with @kobj in @bdev->bd_holder_list
+ * and no other bd_claim() from the same kobject,
+ * remove the struct bd_holder from the list, delete symlinks for it.
+ *
+ * Returns a pointer to the struct bd_holder when it's removed from the list
+ * and ready to be freed.
+ * Returns NULL if matching claim isn't found or there is other bd_claim()
+ * by the same kobject.
+ */
+static struct bd_holder *del_bd_holder(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	list_for_each_entry(bo, &bdev->bd_holder_list, list) {
+		if (bo->sdir == kobj) {
+			bo->count--;
+			BUG_ON(bo->count < 0);
+			if (!bo->count) {
+				list_del(&bo->list);
+				del_symlink(bo->sdir, bo->sdev);
+				del_symlink(bo->hdir, bo->hdev);
+				bd_holder_release_dirs(bo);
+				return bo;
+			}
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * bd_claim_by_kobject - bd_claim() with additional kobject signature
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @kobj:	holder's kobject
+ *
+ * Do bd_claim() and if it succeeds, create sysfs symlinks between
+ * the bdev and the holder's kobject.
+ * Use bd_release_from_kobject() when relesing the claimed bdev.
+ *
+ * Returns 0 on success. (same as bd_claim())
+ * Returns errno on failure.
+ */
+static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
+				struct kobject *kobj)
+{
+	int res;
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return -EINVAL;
+
+	bo = alloc_bd_holder(kobj);
+	if (!bo)
+		return -ENOMEM;
+
+	down(&bdev->bd_sem);
+	res = bd_claim(bdev, holder);
+	if (res || !add_bd_holder(bdev, bo))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+
+	return res;
+}
+
+/**
+ * bd_release_from_kobject - bd_release() with additional kobject signature
+ *
+ * @bdev:	block device to be released
+ * @kobj:	holder's kobject
+ *
+ * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
+ */
+static void bd_release_from_kobject(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return;
+
+	down(&bdev->bd_sem);
+	bd_release(bdev);
+	if ((bo = del_bd_holder(bdev, kobj)))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+}
+
+/**
+ * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @disk:	holder's gendisk
+ *
+ * Call bd_claim_by_kobject() with getting @disk->slave_dir.
+ */
+int bd_claim_by_disk(struct block_device *bdev, void *holder,
+			struct gendisk *disk)
+{
+	return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
+}
+EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+
+/**
+ * bd_release_from_disk - wrapper function for bd_release_from_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @disk:	holder's gendisk
+ *
+ * Call bd_release_from_kobject() and put @disk->slave_dir.
+ */
+void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
+{
+	bd_release_from_kobject(bdev, disk->slave_dir);
+	kobject_put(disk->slave_dir);
+}
+EXPORT_SYMBOL_GPL(bd_release_from_disk);
+#endif
+
 /*
  * Tries to open block device by device number.  Use it ONLY if you
  * really do not have anything better - i.e. when you are behind a
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d9674946956..680d913350e7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -410,6 +410,9 @@ struct block_device {
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
+#ifdef CONFIG_SYSFS
+	struct list_head	bd_holder_list;
+#endif
 	struct block_device *	bd_contains;
 	unsigned		bd_block_size;
 	struct hd_struct *	bd_part;
@@ -1399,6 +1402,13 @@ extern int blkdev_get(struct block_device *, mode_t, unsigned);
 extern int blkdev_put(struct block_device *);
 extern int bd_claim(struct block_device *, void *);
 extern void bd_release(struct block_device *);
+#ifdef CONFIG_SYSFS
+extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
+extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+#else
+#define bd_claim_by_disk(bdev, holder, disk)	bd_claim(bdev, holder)
+#define bd_release_from_disk(bdev, disk)	bd_release(bdev)
+#endif
 
 /* fs/char_dev.c */
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
-- 
cgit v1.2.3


From b4cf1b72eec0e197257a5b07dc9ec53552cdd123 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:18:00 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: convert bd_sem to bd_mutex

Convert bd_sem to bd_mutex

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3f36df7e037c..17c76182f389 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -723,11 +723,11 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	res = bd_claim(bdev, holder);
 	if (res || !add_bd_holder(bdev, bo))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 
 	return res;
 }
@@ -748,11 +748,11 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 }
 
 /**
-- 
cgit v1.2.3


From 5149fa47ec90eb5e79e28f3a7fbcf29421524817 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Mon, 27 Mar 2006 14:26:26 +1100
Subject: [PATCH] powerpc: Cope with duplicate node & property names in
 /proc/device-tree

Various dodgy firmware might give us nodes and/or properties in the device
tree with conflicting names. That's generally ok, except for when we export
the device tree via /proc, so check when we're creating the proc device tree
and munge names accordingly.

Tested on a faked device tree with kexec, would be good if someone with
actual bogus firmware could try it, but just for completeness.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 fs/proc/proc_devtree.c | 103 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 596b4b4f1cc8..abdf068bc27f 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -52,7 +52,8 @@ static int property_read_proc(char *page, char **start, off_t off,
  * Add a property to a node
  */
 static struct proc_dir_entry *
-__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
+__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
+		const char *name)
 {
 	struct proc_dir_entry *ent;
 
@@ -60,14 +61,14 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
 	 * Unfortunately proc_register puts each new entry
 	 * at the beginning of the list.  So we rearrange them.
 	 */
-	ent = create_proc_read_entry(pp->name,
-				     strncmp(pp->name, "security-", 9)
+	ent = create_proc_read_entry(name,
+				     strncmp(name, "security-", 9)
 				     ? S_IRUGO : S_IRUSR, de,
 				     property_read_proc, pp);
 	if (ent == NULL)
 		return NULL;
 
-	if (!strncmp(pp->name, "security-", 9))
+	if (!strncmp(name, "security-", 9))
 		ent->size = 0; /* don't leak number of password chars */
 	else
 		ent->size = pp->length;
@@ -78,7 +79,7 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
 
 void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop)
 {
-	__proc_device_tree_add_prop(pde, prop);
+	__proc_device_tree_add_prop(pde, prop, prop->name);
 }
 
 void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
@@ -105,6 +106,69 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 	}
 }
 
+/*
+ * Various dodgy firmware might give us nodes and/or properties with
+ * conflicting names. That's generally ok, except for exporting via /proc,
+ * so munge names here to ensure they're unique.
+ */
+
+static int duplicate_name(struct proc_dir_entry *de, const char *name)
+{
+	struct proc_dir_entry *ent;
+	int found = 0;
+
+	spin_lock(&proc_subdir_lock);
+
+	for (ent = de->subdir; ent != NULL; ent = ent->next) {
+		if (strcmp(ent->name, name) == 0) {
+			found = 1;
+			break;
+		}
+	}
+
+	spin_unlock(&proc_subdir_lock);
+
+	return found;
+}
+
+static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
+		const char *name)
+{
+	char *fixed_name;
+	int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */
+	int i = 1, size;
+
+realloc:
+	fixed_name = kmalloc(fixup_len, GFP_KERNEL);
+	if (fixed_name == NULL) {
+		printk(KERN_ERR "device-tree: Out of memory trying to fixup "
+				"name \"%s\"\n", name);
+		return name;
+	}
+
+retry:
+	size = snprintf(fixed_name, fixup_len, "%s#%d", name, i);
+	size++; /* account for NULL */
+
+	if (size > fixup_len) {
+		/* We ran out of space, free and reallocate. */
+		kfree(fixed_name);
+		fixup_len = size;
+		goto realloc;
+	}
+
+	if (duplicate_name(de, fixed_name)) {
+		/* Multiple duplicates. Retry with a different offset. */
+		i++;
+		goto retry;
+	}
+
+	printk(KERN_WARNING "device-tree: Duplicate name in %s, "
+			"renamed to \"%s\"\n", np->full_name, fixed_name);
+
+	return fixed_name;
+}
+
 /*
  * Process a node, adding entries for its children and its properties.
  */
@@ -118,37 +182,30 @@ void proc_device_tree_add_node(struct device_node *np,
 
 	set_node_proc_entry(np, de);
 	for (child = NULL; (child = of_get_next_child(np, child));) {
+		/* Use everything after the last slash, or the full name */
 		p = strrchr(child->full_name, '/');
 		if (!p)
 			p = child->full_name;
 		else
 			++p;
+
+		if (duplicate_name(de, p))
+			p = fixup_name(np, de, p);
+
 		ent = proc_mkdir(p, de);
 		if (ent == 0)
 			break;
 		proc_device_tree_add_node(child, ent);
 	}
 	of_node_put(child);
+
 	for (pp = np->properties; pp != 0; pp = pp->next) {
-		/*
-		 * Yet another Apple device-tree bogosity: on some machines,
-		 * they have properties & nodes with the same name. Those
-		 * properties are quite unimportant for us though, thus we
-		 * simply "skip" them here, but we do have to check.
-		 */
-		spin_lock(&proc_subdir_lock);
-		for (ent = de->subdir; ent != NULL; ent = ent->next)
-			if (!strcmp(ent->name, pp->name))
-				break;
-		spin_unlock(&proc_subdir_lock);
-		if (ent != NULL) {
-			printk(KERN_WARNING "device-tree: property \"%s\" name"
-			       " conflicts with node in %s\n", pp->name,
-			       np->full_name);
-			continue;
-		}
+		p = pp->name;
+
+		if (duplicate_name(de, p))
+			p = fixup_name(np, de, p);
 
-		ent = __proc_device_tree_add_prop(de, pp);
+		ent = __proc_device_tree_add_prop(de, pp, p);
 		if (ent == 0)
 			break;
 	}
-- 
cgit v1.2.3


From e8222502ee6157e2713da9e0792c21f4ad458d50 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 28 Mar 2006 23:15:54 +1100
Subject: [PATCH] powerpc: Kill _machine and hard-coded platform numbers

This removes statically assigned platform numbers and reworks the
powerpc platform probe code to use a better mechanism.  With this,
board support files can simply declare a new machine type with a
macro, and implement a probe() function that uses the flattened
device-tree to detect if they apply for a given machine.

We now have a machine_is() macro that replaces the comparisons of
_machine with the various PLATFORM_* constants.  This commit also
changes various drivers to use the new macro instead of looking at
_machine.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 Documentation/powerpc/booting-without-of.txt |   5 +
 arch/powerpc/kernel/asm-offsets.c            |   2 -
 arch/powerpc/kernel/nvram_64.c               |   4 +-
 arch/powerpc/kernel/pci_32.c                 |   4 +-
 arch/powerpc/kernel/proc_ppc64.c             |   3 +-
 arch/powerpc/kernel/prom.c                   |  56 ++--
 arch/powerpc/kernel/prom_init.c              |  51 ++--
 arch/powerpc/kernel/rtas-proc.c              |   2 +-
 arch/powerpc/kernel/rtas.c                   |   3 +-
 arch/powerpc/kernel/setup-common.c           |  52 +++-
 arch/powerpc/kernel/setup_32.c               |  58 +---
 arch/powerpc/kernel/setup_64.c               |  56 +---
 arch/powerpc/kernel/traps.c                  |  35 +--
 arch/powerpc/kernel/vdso.c                   |   9 +-
 arch/powerpc/kernel/vmlinux.lds.S            | 381 +++++++++++++--------------
 arch/powerpc/mm/hash_utils_64.c              |   7 +-
 arch/powerpc/platforms/cell/setup.c          |  11 +-
 arch/powerpc/platforms/chrp/setup.c          |  13 +-
 arch/powerpc/platforms/iseries/setup.c       |   9 +-
 arch/powerpc/platforms/maple/setup.c         |   8 +-
 arch/powerpc/platforms/powermac/bootx_init.c |   4 +-
 arch/powerpc/platforms/powermac/feature.c    |   2 +-
 arch/powerpc/platforms/powermac/low_i2c.c    |   3 +
 arch/powerpc/platforms/powermac/nvram.c      |   2 +-
 arch/powerpc/platforms/powermac/pci.c        |   5 +-
 arch/powerpc/platforms/powermac/pfunc_base.c |   2 +
 arch/powerpc/platforms/powermac/setup.c      |  72 ++---
 arch/powerpc/platforms/powermac/time.c       |   4 +-
 arch/powerpc/platforms/pseries/eeh.c         |   2 +-
 arch/powerpc/platforms/pseries/pci.c         |   2 +-
 arch/powerpc/platforms/pseries/pci_dlpar.c   |   1 +
 arch/powerpc/platforms/pseries/reconfig.c    |   5 +-
 arch/powerpc/platforms/pseries/rtasd.c       |   3 +-
 arch/powerpc/platforms/pseries/setup.c       |  39 ++-
 arch/ppc/platforms/prep_setup.c              |  12 +-
 drivers/char/generic_nvram.c                 |   5 +-
 drivers/ide/pci/via82cxxx.c                  |   2 +-
 drivers/ide/ppc/pmac.c                       |   2 +-
 drivers/ieee1394/ohci1394.c                  |   4 +-
 drivers/macintosh/adb.c                      |   3 +-
 drivers/macintosh/adbhid.c                   |   4 +-
 drivers/macintosh/mediabay.c                 |   4 +-
 drivers/media/video/planb.c                  |   2 +-
 drivers/net/tulip/de4x5.c                    |   2 +-
 drivers/scsi/mesh.c                          |   2 +-
 drivers/usb/core/hcd-pci.c                   |   4 +-
 drivers/video/aty/aty128fb.c                 |   7 +-
 drivers/video/aty/atyfb_base.c               |   7 +-
 drivers/video/aty/radeon_pm.c                |   4 +-
 drivers/video/cirrusfb.c                     |   4 +-
 drivers/video/matrox/matroxfb_base.c         |   3 +-
 drivers/video/nvidia/nvidia.c                |   5 +-
 drivers/video/radeonfb.c                     |   2 +-
 drivers/video/riva/fbdev.c                   |   9 +-
 fs/partitions/mac.c                          |   3 +-
 include/asm-powerpc/machdep.h                |  26 +-
 include/asm-powerpc/pmac_feature.h           |   2 +-
 include/asm-powerpc/processor.h              |  37 +--
 include/asm-powerpc/prom.h                   |  14 +-
 include/asm-powerpc/vdso_datapage.h          |   3 +
 include/asm-ppc/machdep.h                    |  12 +
 sound/oss/dmasound/dmasound_awacs.c          |   2 +-
 sound/ppc/pmac.c                             |   2 +-
 63 files changed, 560 insertions(+), 543 deletions(-)

(limited to 'fs')

diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index ee551c6ea235..217e51768b87 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -719,6 +719,11 @@ address which can extend beyond that limit.
     - model : this is your board name/model
     - #address-cells : address representation for "root" devices
     - #size-cells: the size representation for "root" devices
+    - device_type : This property shouldn't be necessary. However, if
+      you decide to create a device_type for your root node, make sure it
+      is _not_ "chrp" unless your platform is a pSeries or PAPR compliant
+      one for 64-bit, or a CHRP-type machine for 32-bit as this will
+      matched by the kernel this way.
 
   Additionally, some recommended properties are:
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 882889b15926..54b48f330051 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -105,8 +105,6 @@ int main(void)
 	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
 	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
 	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
-	DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
-
 	/* paca */
 	DEFINE(PACA_SIZE, sizeof(struct paca_struct));
 	DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 6c3989f6247d..ada50aa5b600 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -160,7 +160,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
 	case IOC_NVRAM_GET_OFFSET: {
 		int part, offset;
 
-		if (_machine != PLATFORM_POWERMAC)
+		if (!machine_is(powermac))
 			return -EINVAL;
 		if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
 			return -EFAULT;
@@ -444,7 +444,7 @@ static int nvram_setup_partition(void)
 	 * in our nvram, as Apple defined partitions use pretty much
 	 * all of the space
 	 */
-	if (_machine == PLATFORM_POWERMAC)
+	if (machine_is(powermac))
 		return -ENOSPC;
 
 	/* see if we have an OS partition that meets our needs.
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 704c846b2b0f..b129d2e4b759 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -787,7 +787,7 @@ pci_busdev_to_OF_node(struct pci_bus *bus, int devfn)
 	 * fix has to be done by making the remapping per-host and always
 	 * filling the pci_to_OF map. --BenH
 	 */
-	if (_machine == _MACH_Pmac && busnr >= 0xf0)
+	if (machine_is(powermac) && busnr >= 0xf0)
 		busnr -= 0xf0;
 	else
 #endif
@@ -1728,7 +1728,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
 	 * (bus 0 is HT root), we return the AGP one instead.
 	 */
 #ifdef CONFIG_PPC_PMAC
-	if (_machine == _MACH_Pmac && machine_is_compatible("MacRISC4"))
+	if (machine_is(powermac) && machine_is_compatible("MacRISC4"))
 		if (bus == 0)
 			bus = 0xf0;
 #endif /* CONFIG_PPC_PMAC */
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
index 7ba42a405f41..3c2cf661f6d9 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
+#include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/rtas.h>
 #include <asm/uaccess.h>
@@ -51,7 +52,7 @@ static int __init proc_ppc64_create(void)
 	if (!root)
 		return 1;
 
-	if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+	if (!machine_is(pseries) && !machine_is(cell))
 		return 0;
 
 	if (!proc_mkdir("rtas", root))
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a2bc433f3610..4336390bcf34 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -383,14 +383,14 @@ static int __devinit finish_node_interrupts(struct device_node *np,
 			/* Apple uses bits in there in a different way, let's
 			 * only keep the real sense bit on macs
 			 */
-			if (_machine == PLATFORM_POWERMAC)
+			if (machine_is(powermac))
 				sense &= 0x1;
 			np->intrs[intrcount].sense = map_mpic_senses[sense];
 		}
 
 #ifdef CONFIG_PPC64
 		/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
-		if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
+		if (machine_is(powermac) && ic && ic->parent) {
 			char *name = get_property(ic->parent, "name", NULL);
 			if (name && !strcmp(name, "u3"))
 				np->intrs[intrcount].line += 128;
@@ -570,6 +570,18 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 	return rc;
 }
 
+unsigned long __init of_get_flat_dt_root(void)
+{
+	unsigned long p = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+
+	while(*((u32 *)p) == OF_DT_NOP)
+		p += 4;
+	BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE);
+	p += 4;
+	return _ALIGN(p + strlen((char *)p) + 1, 4);
+}
+
 /**
  * This  function can be used within scan_flattened_dt callback to get
  * access to properties
@@ -612,6 +624,25 @@ void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
 	} while(1);
 }
 
+int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
+{
+	const char* cp;
+	unsigned long cplen, l;
+
+	cp = of_get_flat_dt_prop(node, "compatible", &cplen);
+	if (cp == NULL)
+		return 0;
+	while (cplen > 0) {
+		if (strncasecmp(cp, compat, strlen(compat)) == 0)
+			return 1;
+		l = strlen(cp) + 1;
+		cp += l;
+		cplen -= l;
+	}
+
+	return 0;
+}
+
 static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
 				       unsigned long align)
 {
@@ -686,7 +717,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 #ifdef DEBUG
 				if ((strlen(p) + l + 1) != allocl) {
 					DBG("%s: p: %d, l: %d, a: %d\n",
-					    pathp, strlen(p), l, allocl);
+					    pathp, (int)strlen(p), l, allocl);
 				}
 #endif
 				p += strlen(p);
@@ -951,7 +982,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 static int __init early_init_dt_scan_chosen(unsigned long node,
 					    const char *uname, int depth, void *data)
 {
-	u32 *prop;
 	unsigned long *lprop;
 	unsigned long l;
 	char *p;
@@ -962,14 +992,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
-	/* get platform type */
-	prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
-	if (prop == NULL)
-		return 0;
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	_machine = *prop;
-#endif
-
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
 	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
@@ -996,15 +1018,15 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	 * set of RTAS infos now if available
 	 */
 	{
-		u64 *basep, *entryp;
+		u64 *basep, *entryp, *sizep;
 
 		basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
 		entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
-		prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
-		if (basep && entryp && prop) {
+		sizep = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
+		if (basep && entryp && sizep) {
 			rtas.base = *basep;
 			rtas.entry = *entryp;
-			rtas.size = *prop;
+			rtas.size = *sizep;
 		}
 	}
 #endif /* CONFIG_PPC_RTAS */
@@ -1775,7 +1797,7 @@ static int of_finish_dynamic_node(struct device_node *node)
 	/* We don't support that function on PowerMac, at least
 	 * not yet
 	 */
-	if (_machine == PLATFORM_POWERMAC)
+	if (machine_is(powermac))
 		return -ENODEV;
 
 	/* fix up new node's linux_phandle field */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 0d0887844501..d66c5e77fcff 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -180,6 +180,16 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
+/* Platforms codes are now obsolete in the kernel. Now only used within this
+ * file and ultimately gone too. Feel free to change them if you need, they
+ * are not shared with anything outside of this file anymore
+ */
+#define PLATFORM_PSERIES	0x0100
+#define PLATFORM_PSERIES_LPAR	0x0101
+#define PLATFORM_LPAR		0x0001
+#define PLATFORM_POWERMAC	0x0400
+#define PLATFORM_GENERIC	0x0500
+
 static int __initdata of_platform;
 
 static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
@@ -1492,7 +1502,10 @@ static int __init prom_find_machine_type(void)
 	int len, i = 0;
 #ifdef CONFIG_PPC64
 	phandle rtas;
+	int x;
 #endif
+
+	/* Look for a PowerMac */
 	len = prom_getprop(_prom->root, "compatible",
 			   compat, sizeof(compat)-1);
 	if (len > 0) {
@@ -1505,28 +1518,36 @@ static int __init prom_find_machine_type(void)
 			if (strstr(p, RELOC("Power Macintosh")) ||
 			    strstr(p, RELOC("MacRISC")))
 				return PLATFORM_POWERMAC;
-#ifdef CONFIG_PPC64
-			if (strstr(p, RELOC("Momentum,Maple")))
-				return PLATFORM_MAPLE;
-			if (strstr(p, RELOC("IBM,CPB")))
-				return PLATFORM_CELL;
-#endif
 			i += sl + 1;
 		}
 	}
 #ifdef CONFIG_PPC64
+	/* If not a mac, try to figure out if it's an IBM pSeries or any other
+	 * PAPR compliant platform. We assume it is if :
+	 *  - /device_type is "chrp" (please, do NOT use that for future
+	 *    non-IBM designs !
+	 *  - it has /rtas
+	 */
+	len = prom_getprop(_prom->root, "model",
+			   compat, sizeof(compat)-1);
+	if (len <= 0)
+		return PLATFORM_GENERIC;
+	compat[len] = 0;
+	if (strcmp(compat, "chrp"))
+		return PLATFORM_GENERIC;
+
 	/* Default to pSeries. We need to know if we are running LPAR */
 	rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
-	if (PHANDLE_VALID(rtas)) {
-		int x = prom_getproplen(rtas, "ibm,hypertas-functions");
-		if (x != PROM_ERROR) {
-			prom_printf("Hypertas detected, assuming LPAR !\n");
-			return PLATFORM_PSERIES_LPAR;
-		}
+	if (!PHANDLE_VALID(rtas))
+		return PLATFORM_GENERIC;
+	x = prom_getproplen(rtas, "ibm,hypertas-functions");
+	if (x != PROM_ERROR) {
+		prom_printf("Hypertas detected, assuming LPAR !\n");
+		return PLATFORM_PSERIES_LPAR;
 	}
 	return PLATFORM_PSERIES;
 #else
-	return PLATFORM_CHRP;
+	return PLATFORM_GENERIC;
 #endif
 }
 
@@ -2034,7 +2055,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 {	
        	struct prom_t *_prom;
 	unsigned long hdr;
-	u32 getprop_rval;
 	unsigned long offset = reloc_offset();
 
 #ifdef CONFIG_PPC32
@@ -2070,9 +2090,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 * between pSeries SMP and pSeries LPAR
 	 */
 	RELOC(of_platform) = prom_find_machine_type();
-	getprop_rval = RELOC(of_platform);
-	prom_setprop(_prom->chosen, "/chosen", "linux,platform",
-		     &getprop_rval, sizeof(getprop_rval));
 
 	/* Bail if this is a kdump kernel. */
 	if (PHYSICAL_START > 0)
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 1f03fb28cc0a..456286cf1d14 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -257,7 +257,7 @@ static int __init proc_rtas_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
+	if (!machine_is(pseries))
 		return 1;
 
 	rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 142d818a31a6..4b78ee0e5867 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -25,6 +25,7 @@
 #include <asm/hvcall.h>
 #include <asm/semaphore.h>
 #include <asm/machdep.h>
+#include <asm/firmware.h>
 #include <asm/page.h>
 #include <asm/param.h>
 #include <asm/system.h>
@@ -768,7 +769,7 @@ void __init rtas_initialize(void)
 	 * the stop-self token if any
 	 */
 #ifdef CONFIG_PPC64
-	if (_machine == PLATFORM_PSERIES_LPAR) {
+	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
 		rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b17630ad4ac7..3473cb9cb0ab 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -9,6 +9,9 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+
+#undef DEBUG
+
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/string.h>
@@ -41,6 +44,7 @@
 #include <asm/time.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
+#include <asm/firmware.h>
 #include <asm/btext.h>
 #include <asm/nvram.h>
 #include <asm/setup.h>
@@ -56,8 +60,6 @@
 
 #include "setup.h"
 
-#undef DEBUG
-
 #ifdef DEBUG
 #include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
@@ -65,10 +67,12 @@
 #define DBG(fmt...)
 #endif
 
-#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-EXPORT_SYMBOL(_machine);
-#endif
+/* The main machine-dep calls structure
+ */
+struct machdep_calls ppc_md;
+EXPORT_SYMBOL(ppc_md);
+struct machdep_calls *machine_id;
+EXPORT_SYMBOL(machine_id);
 
 unsigned long klimit = (unsigned long) _end;
 
@@ -168,7 +172,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */
 		seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
-
+		if (ppc_md.name)
+			seq_printf(m, "platform\t: %s\n", ppc_md.name);
 		if (ppc_md.show_cpuinfo != NULL)
 			ppc_md.show_cpuinfo(m);
 
@@ -387,7 +392,7 @@ void __init smp_setup_cpu_maps(void)
 	 * On pSeries LPAR, we need to know how many cpus
 	 * could possibly be added to this partition.
 	 */
-	if (_machine == PLATFORM_PSERIES_LPAR &&
+	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
 	    (dn = of_find_node_by_path("/rtas"))) {
 		int num_addr_cell, num_size_cell, maxcpus;
 		unsigned int *ireg;
@@ -456,3 +461,34 @@ static int __init early_xmon(char *p)
 }
 early_param("xmon", early_xmon);
 #endif
+
+void probe_machine(void)
+{
+	extern struct machdep_calls __machine_desc_start;
+	extern struct machdep_calls __machine_desc_end;
+
+	/*
+	 * Iterate all ppc_md structures until we find the proper
+	 * one for the current machine type
+	 */
+	DBG("Probing machine type ...\n");
+
+	for (machine_id = &__machine_desc_start;
+	     machine_id < &__machine_desc_end;
+	     machine_id++) {
+		DBG("  %s ...", machine_id->name);
+		memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
+		if (ppc_md.probe()) {
+			DBG(" match !\n");
+			break;
+		}
+		DBG("\n");
+	}
+	/* What can we do if we didn't find ? */
+	if (machine_id >= &__machine_desc_end) {
+		DBG("No suitable machine found !\n");
+		for (;;);
+	}
+
+	printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a2c89435abec..ae9c33d70731 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -67,10 +67,6 @@ unsigned int DMA_MODE_WRITE;
 int have_of = 1;
 
 #ifdef CONFIG_PPC_MULTIPLATFORM
-extern void prep_init(void);
-extern void pmac_init(void);
-extern void chrp_init(void);
-
 dev_t boot_dev;
 #endif /* CONFIG_PPC_MULTIPLATFORM */
 
@@ -82,9 +78,6 @@ unsigned long SYSRQ_KEY = 0x54;
 unsigned long vgacon_remap_base;
 #endif
 
-struct machdep_calls ppc_md;
-EXPORT_SYMBOL(ppc_md);
-
 /*
  * These are used in binfmt_elf.c to put aux entries on the stack
  * for each elf executable being started.
@@ -120,48 +113,6 @@ unsigned long __init early_init(unsigned long dt_ptr)
 	return KERNELBASE + offset;
 }
 
-#ifdef CONFIG_PPC_MULTIPLATFORM
-/*
- * The PPC_MULTIPLATFORM version of platform_init...
- */
-void __init platform_init(void)
-{
-	/* if we didn't get any bootinfo telling us what we are... */
-	if (_machine == 0) {
-		/* prep boot loader tells us if we're prep or not */
-		if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) )
-			_machine = _MACH_prep;
-	}
-
-#ifdef CONFIG_PPC_PREP
-	/* not much more to do here, if prep */
-	if (_machine == _MACH_prep) {
-		prep_init();
-		return;
-	}
-#endif
-
-#ifdef CONFIG_ADB
-	if (strstr(cmd_line, "adb_sync")) {
-		extern int __adb_probe_sync;
-		__adb_probe_sync = 1;
-	}
-#endif /* CONFIG_ADB */
-
-	switch (_machine) {
-#ifdef CONFIG_PPC_PMAC
-	case _MACH_Pmac:
-		pmac_init();
-		break;
-#endif
-#ifdef CONFIG_PPC_CHRP
-	case _MACH_chrp:
-		chrp_init();
-		break;
-#endif
-	}
-}
-#endif
 
 /*
  * Find out what kind of machine we're on and save any data we need
@@ -187,8 +138,12 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
 		strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line));
 #endif /* CONFIG_CMDLINE */
 
-	/* Base init based on machine type */
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	probe_machine();
+#else
+	/* Base init based on machine type. Obsoloete, please kill ! */
 	platform_init();
+#endif
 
 #ifdef CONFIG_6xx
 	if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
@@ -359,7 +314,4 @@ void __init setup_arch(char **cmdline_p)
 	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
 
 	paging_init();
-
-	/* this is for modules since _machine can be a define -- Cort */
-	ppc_md.ppc_machine = _machine;
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5b63a861ef43..6aea1fb74b69 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -95,11 +95,6 @@ int dcache_bsize;
 int icache_bsize;
 int ucache_bsize;
 
-/* The main machine-dep calls structure
- */
-struct machdep_calls ppc_md;
-EXPORT_SYMBOL(ppc_md);
-
 #ifdef CONFIG_MAGIC_SYSRQ
 unsigned long SYSRQ_KEY;
 #endif /* CONFIG_MAGIC_SYSRQ */
@@ -160,32 +155,6 @@ early_param("smt-enabled", early_smt_enabled);
 #define check_smt_enabled()
 #endif /* CONFIG_SMP */
 
-extern struct machdep_calls pSeries_md;
-extern struct machdep_calls pmac_md;
-extern struct machdep_calls maple_md;
-extern struct machdep_calls cell_md;
-extern struct machdep_calls iseries_md;
-
-/* Ultimately, stuff them in an elf section like initcalls... */
-static struct machdep_calls __initdata *machines[] = {
-#ifdef CONFIG_PPC_PSERIES
-	&pSeries_md,
-#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_PMAC
-	&pmac_md,
-#endif /* CONFIG_PPC_PMAC */
-#ifdef CONFIG_PPC_MAPLE
-	&maple_md,
-#endif /* CONFIG_PPC_MAPLE */
-#ifdef CONFIG_PPC_CELL
-	&cell_md,
-#endif
-#ifdef CONFIG_PPC_ISERIES
-	&iseries_md,
-#endif
-	NULL
-};
-
 /*
  * Early initialization entry point. This is called by head.S
  * with MMU translation disabled. We rely on the "feature" of
@@ -207,12 +176,10 @@ static struct machdep_calls __initdata *machines[] = {
 
 void __init early_setup(unsigned long dt_ptr)
 {
-	static struct machdep_calls **mach;
-
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
-	DBG(" -> early_setup()\n");
+ 	DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
 
 	/*
 	 * Do early initializations using the flattened device
@@ -229,22 +196,8 @@ void __init early_setup(unsigned long dt_ptr)
 	get_paca()->stab_real = __pa((u64)&initial_stab);
 	get_paca()->stab_addr = (u64)&initial_stab;
 
-	/*
-	 * Iterate all ppc_md structures until we find the proper
-	 * one for the current machine type
-	 */
-	DBG("Probing machine type for platform %x...\n", _machine);
-
-	for (mach = machines; *mach; mach++) {
-		if ((*mach)->probe(_machine))
-			break;
-	}
-	/* What can we do if we didn't find ? */
-	if (*mach == NULL) {
-		DBG("No suitable machine found !\n");
-		for (;;);
-	}
-	ppc_md = **mach;
+	/* Probe the machine type */
+	probe_machine();
 
 #ifdef CONFIG_CRASH_DUMP
 	kdump_setup();
@@ -346,7 +299,7 @@ static void __init initialize_cache_info(void)
 			const char *dc, *ic;
 
 			/* Then read cache informations */
-			if (_machine == PLATFORM_POWERMAC) {
+			if (machine_is(powermac)) {
 				dc = "d-cache-block-size";
 				ic = "i-cache-block-size";
 			} else {
@@ -490,7 +443,6 @@ void __init setup_system(void)
 	printk("ppc64_pft_size                = 0x%lx\n", ppc64_pft_size);
 	printk("ppc64_interrupt_controller    = 0x%ld\n",
 	       ppc64_interrupt_controller);
-	printk("platform                      = 0x%x\n", _machine);
 	printk("physicalMemorySize            = 0x%lx\n", lmb_phys_mem_size());
 	printk("ppc64_caches.dcache_line_size = 0x%x\n",
 	       ppc64_caches.dline_size);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 98660aedeeb7..27600c9432bc 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -97,7 +97,6 @@ static DEFINE_SPINLOCK(die_lock);
 int die(const char *str, struct pt_regs *regs, long err)
 {
 	static int die_counter, crash_dump_start = 0;
-	int nl = 0;
 
 	if (debugger(regs))
 		return 1;
@@ -106,7 +105,7 @@ int die(const char *str, struct pt_regs *regs, long err)
 	spin_lock_irq(&die_lock);
 	bust_spinlocks(1);
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		set_backlight_enable(1);
 		set_backlight_level(BACKLIGHT_MAX);
 	}
@@ -114,46 +113,18 @@ int die(const char *str, struct pt_regs *regs, long err)
 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	printk("PREEMPT ");
-	nl = 1;
 #endif
 #ifdef CONFIG_SMP
 	printk("SMP NR_CPUS=%d ", NR_CPUS);
-	nl = 1;
 #endif
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	printk("DEBUG_PAGEALLOC ");
-	nl = 1;
 #endif
 #ifdef CONFIG_NUMA
 	printk("NUMA ");
-	nl = 1;
 #endif
-#ifdef CONFIG_PPC64
-	switch (_machine) {
-	case PLATFORM_PSERIES:
-		printk("PSERIES ");
-		nl = 1;
-		break;
-	case PLATFORM_PSERIES_LPAR:
-		printk("PSERIES LPAR ");
-		nl = 1;
-		break;
-	case PLATFORM_ISERIES_LPAR:
-		printk("ISERIES LPAR ");
-		nl = 1;
-		break;
-	case PLATFORM_POWERMAC:
-		printk("POWERMAC ");
-		nl = 1;
-		break;
-	case PLATFORM_CELL:
-		printk("CELL ");
-		nl = 1;
-		break;
-	}
-#endif
-	if (nl)
-		printk("\n");
+	printk("%s\n", ppc_md.name ? "" : ppc_md.name);
+
 	print_modules();
 	show_regs(regs);
 	bust_spinlocks(0);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ec8370368423..573afb68d69e 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -33,6 +33,7 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
+#include <asm/firmware.h>
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 
@@ -667,7 +668,13 @@ void __init vdso_init(void)
 	vdso_data->version.major = SYSTEMCFG_MAJOR;
 	vdso_data->version.minor = SYSTEMCFG_MINOR;
 	vdso_data->processor = mfspr(SPRN_PVR);
-	vdso_data->platform = _machine;
+	/*
+	 * Fake the old platform number for pSeries and iSeries and add
+	 * in LPAR bit if necessary
+	 */
+	vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100;
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		vdso_data->platform |= 1;
 	vdso_data->physicalMemorySize = lmb_phys_mem_size();
 	vdso_data->dcache_size = ppc64_caches.dsize;
 	vdso_data->dcache_line_size = ppc64_caches.dline_size;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7fa7b15fd8e6..fe79c2584cb0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -1,9 +1,11 @@
 #include <linux/config.h>
 #ifdef CONFIG_PPC64
 #include <asm/page.h>
+#define PROVIDE32(x)	PROVIDE(__unused__##x)
 #else
 #define PAGE_SIZE	4096
 #define KERNELBASE	CONFIG_KERNEL_START
+#define PROVIDE32(x)	PROVIDE(x)
 #endif
 #include <asm-generic/vmlinux.lds.h>
 
@@ -18,43 +20,42 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-  /* Sections to be discarded. */
-  /DISCARD/ : {
-    *(.exitcall.exit)
-    *(.exit.data)
-  }
-
-  . = KERNELBASE;
-
-  /* Read-only sections, merged into text segment: */
-  .text : {
-    *(.text .text.*)
-    SCHED_TEXT
-    LOCK_TEXT
-    KPROBES_TEXT
-    *(.fixup)
-#ifdef CONFIG_PPC32
-    *(.got1)
-    __got2_start = .;
-    *(.got2)
-    __got2_end = .;
-#else
-    . = ALIGN(PAGE_SIZE);
-    _etext = .;
-#endif
-  }
-#ifdef CONFIG_PPC32
-  _etext = .;
-  PROVIDE (etext = .);
+	/* Sections to be discarded. */
+	/DISCARD/ : {
+	*(.exitcall.exit)
+	*(.exit.data)
+	}
 
-  RODATA
-  .fini      : { *(.fini)    } =0
-  .ctors     : { *(.ctors)   }
-  .dtors     : { *(.dtors)   }
+	. = KERNELBASE;
 
-  .fixup   : { *(.fixup) }
-#endif
+/*
+ * Text, read only data and other permanent read-only sections
+ */
+
+	/* Text and gots */
+	.text : {
+		*(.text .text.*)
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.fixup)
 
+#ifdef CONFIG_PPC32
+		*(.got1)
+		__got2_start = .;
+		*(.got2)
+		__got2_end = .;
+#endif /* CONFIG_PPC32 */
+
+		. = ALIGN(PAGE_SIZE);
+		_etext = .;
+		PROVIDE32 (etext = .);
+	}
+
+	/* Read-only data */
+	RODATA
+
+	/* Exception & bug tables */
 	__ex_table : {
 		__start___ex_table = .;
 		*(__ex_table)
@@ -67,192 +68,172 @@ SECTIONS
 		__stop___bug_table = .;
 	}
 
-#ifdef CONFIG_PPC64
+/*
+ * Init sections discarded at runtime
+ */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+
+	.init.text : {
+		_sinittext = .;
+		*(.init.text)
+		_einittext = .;
+	}
+
+	/* .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	 */
+	.exit.text : { *(.exit.text) }
+
+	.init.data : {
+		*(.init.data);
+		__vtop_table_begin = .;
+		*(.vtop_fixup);
+		__vtop_table_end = .;
+		__ptov_table_begin = .;
+		*(.ptov_fixup);
+		__ptov_table_end = .;
+	}
+
+	. = ALIGN(16);
+	.init.setup : {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	.initcall.init : {
+		__initcall_start = .;
+		*(.initcall1.init)
+		*(.initcall2.init)
+		*(.initcall3.init)
+		*(.initcall4.init)
+		*(.initcall5.init)
+		*(.initcall6.init)
+		*(.initcall7.init)
+		__initcall_end = .;
+		}
+
+	.con_initcall.init : {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	SECURITY_INIT
+
+	. = ALIGN(8);
 	__ftr_fixup : {
 		__start___ftr_fixup = .;
 		*(__ftr_fixup)
 		__stop___ftr_fixup = .;
 	}
 
-  RODATA
-#endif
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 
 #ifdef CONFIG_PPC32
-  /* Read-write section, merged into data segment: */
-  . = ALIGN(PAGE_SIZE);
-  _sdata = .;
-  .data    :
-  {
-    *(.data)
-    *(.data1)
-    *(.sdata)
-    *(.sdata2)
-    *(.got.plt) *(.got)
-    *(.dynamic)
-    CONSTRUCTORS
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  __nosave_begin = .;
-  .data_nosave : { *(.data.nosave) }
-  . = ALIGN(PAGE_SIZE);
-  __nosave_end = .;
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
-  _edata  =  .;
-  PROVIDE (edata = .);
-
-  . = ALIGN(8192);
-  .data.init_task : { *(.data.init_task) }
+	. = ALIGN(32);
+#else
+	. = ALIGN(128);
 #endif
+	.data.percpu : {
+		__per_cpu_start = .;
+		*(.data.percpu)
+		__per_cpu_end = .;
+	}
 
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);
-  __init_begin = .;
-  .init.text : {
-	_sinittext = .;
-	*(.init.text)
-	_einittext = .;
-  }
-#ifdef CONFIG_PPC32
-  /* .exit.text is discarded at runtime, not link time,
-     to deal with references from __bug_table */
-  .exit.text : { *(.exit.text) }
-#endif
-  .init.data : {
-    *(.init.data);
-    __vtop_table_begin = .;
-    *(.vtop_fixup);
-    __vtop_table_end = .;
-    __ptov_table_begin = .;
-    *(.ptov_fixup);
-    __ptov_table_end = .;
-  }
-
-  . = ALIGN(16);
-  .init.setup : {
-    __setup_start = .;
-    *(.init.setup)
-    __setup_end = .;
-  }
-
-  .initcall.init : {
-	__initcall_start = .;
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
-	__initcall_end = .;
-  }
-
-  .con_initcall.init : {
-    __con_initcall_start = .;
-    *(.con_initcall.init)
-    __con_initcall_end = .;
-  }
-
-  SECURITY_INIT
+	. = ALIGN(8);
+	.machine.desc : {
+		__machine_desc_start = . ;
+		*(.machine.desc)
+		__machine_desc_end = . ;
+	}
+
+	/* freed after init ends here */
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+/*
+ * And now the various read/write data
+ */
+
+	. = ALIGN(PAGE_SIZE);
+	_sdata = .;
 
 #ifdef CONFIG_PPC32
-  __start___ftr_fixup = .;
-  __ftr_fixup : { *(__ftr_fixup) }
-  __stop___ftr_fixup = .;
+	.data    :
+	{
+		*(.data)
+		*(.sdata)
+		*(.got.plt) *(.got)
+	}
 #else
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : {
-    __initramfs_start = .;
-    *(.init.ramfs)
-    __initramfs_end = .;
-  }
-#endif
+	.data : {
+		*(.data .data.rel* .toc1)
+		*(.branch_lt)
+	}
 
-#ifdef CONFIG_PPC32
-  . = ALIGN(32);
+	.opd : {
+		*(.opd)
+	}
+
+	.got : {
+		__toc_start = .;
+		*(.got)
+		*(.toc)
+	}
 #endif
-  .data.percpu : {
-    __per_cpu_start = .;
-    *(.data.percpu)
-    __per_cpu_end = .;
-  }
 
- . = ALIGN(PAGE_SIZE);
-#ifdef CONFIG_PPC64
- . = ALIGN(16384);
- __init_end = .;
- /* freed after init ends here */
-
- /* Read/write sections */
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(16384);
- _sdata = .;
- /* The initial task and kernel stack */
- .data.init_task : {
-      *(.data.init_task)
-      }
-
- . = ALIGN(PAGE_SIZE);
- .data.page_aligned : {
-      *(.data.page_aligned)
-      }
-
- .data.cacheline_aligned : {
-      *(.data.cacheline_aligned)
-      }
-
- .data : {
-      *(.data .data.rel* .toc1)
-      *(.branch_lt)
-      }
-
- .opd : {
-      *(.opd)
-      }
-
- .got : {
-      __toc_start = .;
-      *(.got)
-      *(.toc)
-      . = ALIGN(PAGE_SIZE);
-      _edata = .;
-      }
-
-  . = ALIGN(PAGE_SIZE);
+	. = ALIGN(PAGE_SIZE);
+	_edata  =  .;
+	PROVIDE32 (edata = .);
+
+	/* The initial task and kernel stack */
+#ifdef CONFIG_PPC32
+	. = ALIGN(8192);
 #else
-  __initramfs_start = .;
-  .init.ramfs : {
-    *(.init.ramfs)
-  }
-  __initramfs_end = .;
+	. = ALIGN(16384);
+#endif
+	.data.init_task : {
+		*(.data.init_task)
+	}
 
-  . = ALIGN(4096);
-  __init_end = .;
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : {
+		*(.data.page_aligned)
+	}
 
-  . = ALIGN(4096);
-  _sextratext = .;
-  _eextratext = .;
+	.data.cacheline_aligned : {
+		*(.data.cacheline_aligned)
+	}
 
-  __bss_start = .;
-#endif
+	. = ALIGN(PAGE_SIZE);
+	__data_nosave : {
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	}
 
-  .bss : {
-    __bss_start = .;
-   *(.sbss) *(.scommon)
-   *(.dynbss)
-   *(.bss)
-   *(COMMON)
-  __bss_stop = .;
-  }
+/*
+ * And finally the bss
+ */
+
+	.bss : {
+		__bss_start = .;
+		*(.sbss) *(.scommon)
+		*(.dynbss)
+		*(.bss)
+		*(COMMON)
+		__bss_stop = .;
+	}
 
-#ifdef CONFIG_PPC64
-  . = ALIGN(PAGE_SIZE);
-#endif
-  _end = . ;
-#ifdef CONFIG_PPC32
-  PROVIDE (end = .);
-#endif
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+	PROVIDE32 (end = .);
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 89b35c181314..c006d9039633 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -167,7 +167,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		 * normal insert callback here.
 		 */
 #ifdef CONFIG_PPC_ISERIES
-		if (_machine == PLATFORM_ISERIES_LPAR)
+		if (machine_is(iseries))
 			ret = iSeries_hpte_insert(hpteg, va,
 						  paddr,
 						  tmp_mode,
@@ -176,7 +176,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		else
 #endif
 #ifdef CONFIG_PPC_PSERIES
-		if (_machine & PLATFORM_LPAR)
+		if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR))
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
 						       paddr,
 						       tmp_mode,
@@ -295,8 +295,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if ((_machine != PLATFORM_ISERIES_LPAR) &&
-	    cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (cpu_has_feature(CPU_FTR_16M_PAGE) && !machine_is(iseries))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fec8e65b36ea..dac5d0365fde 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -195,9 +195,13 @@ static void __init cell_init_early(void)
 }
 
 
-static int __init cell_probe(int platform)
+static int __init cell_probe(void)
 {
-	if (platform != PLATFORM_CELL)
+	/* XXX This is temporary, the Cell maintainer will come up with
+	 * more appropriate detection logic
+	 */
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
 		return 0;
 
 	return 1;
@@ -212,7 +216,8 @@ static int cell_check_legacy_ioport(unsigned int baseport)
 	return -ENODEV;
 }
 
-struct machdep_calls __initdata cell_md = {
+define_machine(cell) {
+	.name			= "Cell",
 	.probe			= cell_probe,
 	.setup_arch		= cell_setup_arch,
 	.init_early		= cell_init_early,
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 9c718bb2305e..23a201718704 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -292,6 +292,10 @@ void __init chrp_setup_arch(void)
 
 	pci_create_OF_bus_map();
 
+#ifdef CONFIG_SMP
+	smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
 	/*
 	 * Print the banner, then scroll down so boot progress
 	 * can be printed.  -- Cort
@@ -506,8 +510,15 @@ chrp_init2(void)
 		ppc_md.progress("  Have fun!    ", 0x7777);
 }
 
-void __init chrp_init(void)
+static int __init chrp_probe(void)
 {
+ 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+ 					  "device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
+
 	ISA_DMA_THRESHOLD = ~0L;
 	DMA_MODE_READ = 0x44;
 	DMA_MODE_WRITE = 0x48;
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 155aa690e4bb..6ce8a404ba6b 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -675,9 +675,10 @@ static void iseries_dedicated_idle(void)
 void __init iSeries_init_IRQ(void) { }
 #endif
 
-static int __init iseries_probe(int platform)
+static int __init iseries_probe(void)
 {
-	if (PLATFORM_ISERIES_LPAR != platform)
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "IBM,iSeries"))
 		return 0;
 
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
@@ -686,7 +687,8 @@ static int __init iseries_probe(int platform)
 	return 1;
 }
 
-struct machdep_calls __initdata iseries_md = {
+define_machine(iseries) {
+	.name		= "iSeries",
 	.setup_arch	= iSeries_setup_arch,
 	.show_cpuinfo	= iSeries_show_cpuinfo,
 	.init_IRQ	= iSeries_init_IRQ,
@@ -930,7 +932,6 @@ void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size)
 
 	/* /chosen */
 	dt_start_node(dt, "chosen");
-	dt_prop_u32(dt, "linux,platform", PLATFORM_ISERIES_LPAR);
 	dt_prop_str(dt, "bootargs", cmd_line);
 	if (cmd_mem_limit)
 		dt_prop_u64(dt, "linux,memory-limit", cmd_mem_limit);
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 137d6063182b..24c0aef4ea39 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -259,9 +259,10 @@ static void __init maple_progress(char *s, unsigned short hex)
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-static int __init maple_probe(int platform)
+static int __init maple_probe(void)
 {
-	if (platform != PLATFORM_MAPLE)
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "Momentum,Maple"))
 		return 0;
 	/*
 	 * On U3, the DART (iommu) must be allocated now since it
@@ -274,7 +275,8 @@ static int __init maple_probe(int platform)
 	return 1;
 }
 
-struct machdep_calls __initdata maple_md = {
+define_machine(maple_md) {
+	.name			= "Maple",
 	.probe			= maple_probe,
 	.setup_arch		= maple_setup_arch,
 	.init_early		= maple_init_early,
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index a94571be65ca..eacbfd9beabc 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -161,9 +161,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size,
 static void __init bootx_add_chosen_props(unsigned long base,
 					  unsigned long *mem_end)
 {
-	u32 val = _MACH_Pmac;
-
-	bootx_dt_add_prop("linux,platform", &val, 4, mem_end);
+	u32 val;
 
 	if (bootx_info->kernelParamsOffset) {
 		char *args = (char *)((unsigned long)bootx_info) +
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index e49eddd5042d..a5063cd675c5 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2951,7 +2951,7 @@ static void *pmac_early_vresume_data;
 
 void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 	preempt_disable();
 	pmac_early_vresume_proc = proc;
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 87eb6bb7f0e7..e14f9ac55cf4 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -1457,6 +1457,9 @@ int __init pmac_i2c_init(void)
 		return 0;
 	i2c_inited = 1;
 
+	if (!machine_is(powermac))
+		return 0;
+
 	/* Probe keywest-i2c busses */
 	kw_i2c_probe();
 
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 3aa3477b86f7..262f967b880a 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -597,7 +597,7 @@ int __init pmac_nvram_init(void)
 	}
 
 #ifdef CONFIG_PPC32
-	if (_machine == _MACH_chrp && nvram_naddrs == 1) {
+	if (machine_is(chrp) && nvram_naddrs == 1) {
 		nvram_data = ioremap(r1.start, s1);
 		nvram_mult = 1;
 		ppc_md.nvram_read_val	= direct_nvram_read_byte;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index de3f30e6b333..f5d8d15d74fa 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1201,7 +1201,7 @@ void __init pmac_pcibios_after_init(void)
 #ifdef CONFIG_PPC32
 void pmac_pci_fixup_cardbus(struct pci_dev* dev)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 	/*
 	 * Fix the interrupt routing on the various cardbus bridges
@@ -1244,8 +1244,9 @@ void pmac_pci_fixup_pciata(struct pci_dev* dev)
         * On PowerMacs, we try to switch any PCI ATA controller to
 	* fully native mode
         */
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
+
 	/* Some controllers don't have the class IDE */
 	if (dev->vendor == PCI_VENDOR_ID_PROMISE)
 		switch(dev->device) {
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 9b7150f10414..a3bd3e728fa3 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -336,6 +336,8 @@ int __init pmac_pfunc_base_install(void)
 		return 0;
 	pfbase_inited = 1;
 
+	if (!machine_is(powermac))
+		return 0;
 
 	DBG("Installing base platform functions...\n");
 
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index c2696d00672a..4d15e396655c 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -350,6 +350,13 @@ static void __init pmac_setup_arch(void)
 		smp_ops = &psurge_smp_ops;
 #endif
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_ADB
+	if (strstr(cmd_line, "adb_sync")) {
+		extern int __adb_probe_sync;
+		__adb_probe_sync = 1;
+	}
+#endif /* CONFIG_ADB */
 }
 
 char *bootpath;
@@ -576,30 +583,6 @@ pmac_halt(void)
 	pmac_power_off();
 }
 
-#ifdef CONFIG_PPC32
-void __init pmac_init(void)
-{
-	/* isa_io_base gets set in pmac_pci_init */
-	isa_mem_base = PMAC_ISA_MEM_BASE;
-	pci_dram_offset = PMAC_PCI_DRAM_OFFSET;
-	ISA_DMA_THRESHOLD = ~0L;
-	DMA_MODE_READ = 1;
-	DMA_MODE_WRITE = 2;
-
-	ppc_md = pmac_md;
-
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
-        ppc_ide_md.default_io_base	= pmac_ide_get_base;
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
-#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
-
-	if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0);
-
-}
-#endif
-
 /* 
  * Early initialization.
  */
@@ -646,6 +629,12 @@ static int __init pmac_declare_of_platform_devices(void)
 {
 	struct device_node *np;
 
+	if (machine_is(chrp))
+		return -1;
+
+	if (!machine_is(powermac))
+		return 0;
+
 	np = of_find_node_by_name(NULL, "valkyrie");
 	if (np)
 		of_platform_device_create(np, "valkyrie", NULL);
@@ -666,12 +655,15 @@ device_initcall(pmac_declare_of_platform_devices);
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-static int __init pmac_probe(int platform)
+static int __init pmac_probe(void)
 {
-#ifdef CONFIG_PPC64
-	if (platform != PLATFORM_POWERMAC)
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Power Macintosh") &&
+	    !of_flat_dt_is_compatible(root, "MacRISC"))
 		return 0;
 
+#ifdef CONFIG_PPC64
 	/*
 	 * On U3, the DART (iommu) must be allocated now since it
 	 * has an impact on htab_initialize (due to the large page it
@@ -681,6 +673,23 @@ static int __init pmac_probe(int platform)
 	alloc_dart_table();
 #endif
 
+#ifdef CONFIG_PPC32
+	/* isa_io_base gets set in pmac_pci_init */
+	isa_mem_base = PMAC_ISA_MEM_BASE;
+	pci_dram_offset = PMAC_PCI_DRAM_OFFSET;
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 1;
+	DMA_MODE_WRITE = 2;
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
+        ppc_ide_md.default_io_base	= pmac_ide_get_base;
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
+
+#endif /* CONFIG_PPC32 */
+
 #ifdef CONFIG_PMAC_SMU
 	/*
 	 * SMU based G5s need some memory below 2Gb, at least the current
@@ -709,10 +718,8 @@ static int pmac_pci_probe_mode(struct pci_bus *bus)
 }
 #endif
 
-struct machdep_calls __initdata pmac_md = {
-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64)
-	.cpu_die		= generic_mach_cpu_die,
-#endif
+define_machine(powermac) {
+	.name			= "PowerMac",
 	.probe			= pmac_probe,
 	.setup_arch		= pmac_setup_arch,
 	.init_early		= pmac_init_early,
@@ -746,4 +753,7 @@ struct machdep_calls __initdata pmac_md = {
 	.pcibios_after_init	= pmac_pcibios_after_init,
 	.phys_mem_access_prot	= pci_phys_mem_access_prot,
 #endif
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64)
+	.cpu_die		= generic_mach_cpu_die,
+#endif
 };
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 5d9afa1fa02d..890758aa9667 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -336,10 +336,10 @@ static struct pmu_sleep_notifier time_sleep_notifier = {
  */
 void __init pmac_calibrate_decr(void)
 {
-#ifdef CONFIG_PM
+#if defined(CONFIG_PM) && defined(CONFIG_ADB_PMU)
 	/* XXX why here? */
 	pmu_register_sleep_notifier(&time_sleep_notifier);
-#endif /* CONFIG_PM */
+#endif
 
 	generic_calibrate_decr();
 
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 2ab9dcdfb415..9b2b1cb117b3 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1018,7 +1018,7 @@ static int __init eeh_init_proc(void)
 {
 	struct proc_dir_entry *e;
 
-	if (platform_is_pseries()) {
+	if (machine_is(pseries)) {
 		e = create_proc_entry("ppc64/eeh", 0, NULL);
 		if (e)
 			e->proc_fops = &proc_eeh_operations;
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 946ad59e3352..e97e67f5e079 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -120,7 +120,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
 	int i;
 	unsigned int reg;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return;
 
 	printk("Using INTC for W82c105 IDE controller.\n");
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index b3c2dcb1e4f0..6bfacc217085 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
+#include <asm/firmware.h>
 
 static struct pci_bus *
 find_bus_among_children(struct pci_bus *bus,
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 86cfa6ecdcf3..9a4b592e40bc 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -17,8 +17,9 @@
 #include <linux/proc_fs.h>
 
 #include <asm/prom.h>
-#include <asm/pSeries_reconfig.h>
+#include <asm/machdep.h>
 #include <asm/uaccess.h>
+#include <asm/pSeries_reconfig.h>
 
 
@@ -508,7 +509,7 @@ static int proc_ppc64_create_ofdt(void)
 {
 	struct proc_dir_entry *ent;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return 0;
 
 	ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index a6f628d4c9dc..fcc4d561a236 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -27,6 +27,7 @@
 #include <asm/prom.h>
 #include <asm/nvram.h>
 #include <asm/atomic.h>
+#include <asm/machdep.h>
 
 #if 0
 #define DEBUG(A...)	printk(KERN_ERR A)
@@ -481,7 +482,7 @@ static int __init rtas_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return 0;
 
 	/* No RTAS */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1b0c4c034a26..b2fbf8ba8fbb 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -372,24 +372,42 @@ static int pSeries_check_legacy_ioport(unsigned int baseport)
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-extern struct machdep_calls pSeries_md;
 
-static int __init pSeries_probe(int platform)
+static int __init pSeries_probe_hypertas(unsigned long node,
+					 const char *uname, int depth,
+					 void *data)
 {
-	if (platform != PLATFORM_PSERIES &&
-	    platform != PLATFORM_PSERIES_LPAR)
+	if (depth != 1 ||
+	    (strcmp(uname, "rtas") != 0 && strcmp(uname, "rtas@0") != 0))
+ 		return 0;
+
+	if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL) != NULL)
+ 		powerpc_firmware_features |= FW_FEATURE_LPAR;
+
+ 	return 1;
+}
+
+static int __init pSeries_probe(void)
+{
+ 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+ 					  "device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
 		return 0;
 
-	/* if we have some ppc_md fixups for LPAR to do, do
-	 * it here ...
-	 */
+	DBG("pSeries detected, looking for LPAR capability...\n");
 
-	if (platform == PLATFORM_PSERIES_LPAR)
-		powerpc_firmware_features |= FW_FEATURE_LPAR;
+	/* Now try to figure out if we are running on LPAR */
+	of_scan_flat_dt(pSeries_probe_hypertas, NULL);
+
+	DBG("Machine is%s LPAR !\n",
+	    (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
 
 	return 1;
 }
 
+
 DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
 
 static void pseries_dedicated_idle_sleep(void)
@@ -501,7 +519,8 @@ static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 }
 #endif
 
-struct machdep_calls __initdata pSeries_md = {
+define_machine(pseries) {
+	.name			= "pSeries",
 	.probe			= pSeries_probe,
 	.setup_arch		= pSeries_setup_arch,
 	.init_early		= pSeries_init_early,
diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c
index a0fc628ffb1e..fa46f8ce5c71 100644
--- a/arch/ppc/platforms/prep_setup.c
+++ b/arch/ppc/platforms/prep_setup.c
@@ -1067,15 +1067,13 @@ prep_map_io(void)
 static int __init
 prep_request_io(void)
 {
-	if (_machine == _MACH_prep) {
 #ifdef CONFIG_NVRAM
-		request_region(PREP_NVRAM_AS0, 0x8, "nvram");
+	request_region(PREP_NVRAM_AS0, 0x8, "nvram");
 #endif
-		request_region(0x00,0x20,"dma1");
-		request_region(0x40,0x20,"timer");
-		request_region(0x80,0x10,"dma page reg");
-		request_region(0xc0,0x20,"dma2");
-	}
+	request_region(0x00,0x20,"dma1");
+	request_region(0x40,0x20,"timer");
+	request_region(0x80,0x10,"dma page reg");
+	request_region(0xc0,0x20,"dma2");
 
 	return 0;
 }
diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c
index 1b5e01e6e129..43ff59816511 100644
--- a/drivers/char/generic_nvram.c
+++ b/drivers/char/generic_nvram.c
@@ -22,6 +22,9 @@
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
+#ifdef CONFIG_PPC_PMAC
+#include <asm/machdep.h>
+#endif
 
 #define NVRAM_SIZE	8192
 
@@ -92,7 +95,7 @@ static int nvram_ioctl(struct inode *inode, struct file *file,
 	case IOC_NVRAM_GET_OFFSET: {
 		int part, offset;
 
-		if (_machine != _MACH_Pmac)
+		if (!machine_is(powermac))
 			return -EINVAL;
 		if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
 			return -EFAULT;
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index c85b87cb59d1..3e677c4f8c28 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -440,7 +440,7 @@ static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
 
 
 #if defined(CONFIG_PPC_CHRP) && defined(CONFIG_PPC32)
-	if(_machine == _MACH_chrp && _chrp_type == _CHRP_Pegasos) {
+	if(machine_is(chrp) && _chrp_type == _CHRP_Pegasos) {
 		hwif->irq = hwif->channel ? 15 : 14;
 	}
 #endif
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index 5013b1285e22..78e30f803671 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1677,7 +1677,7 @@ MODULE_DEVICE_TABLE(pci, pmac_ide_pci_match);
 void __init
 pmac_ide_probe(void)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 
 #ifdef CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index b6b96fa04d62..314f35540034 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -3526,7 +3526,7 @@ static void ohci1394_pci_remove(struct pci_dev *pdev)
 static int ohci1394_pci_resume (struct pci_dev *pdev)
 {
 #ifdef CONFIG_PPC_PMAC
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		struct device_node *of_node;
 
 		/* Re-enable 1394 */
@@ -3545,7 +3545,7 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
 #ifdef CONFIG_PPC_PMAC
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		struct device_node *of_node;
 
 		/* Disable 1394 */
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index d2ead1776c16..781d93b0bbd5 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -42,6 +42,7 @@
 #include <asm/semaphore.h>
 #ifdef CONFIG_PPC
 #include <asm/prom.h>
+#include <asm/machdep.h>
 #endif
 
 
@@ -294,7 +295,7 @@ int __init adb_init(void)
 	int i;
 
 #ifdef CONFIG_PPC32
-	if ( (_machine != _MACH_chrp) && (_machine != _MACH_Pmac) )
+	if (!machine_is(chrp) && !machine_is(powermac))
 		return 0;
 #endif
 #ifdef CONFIG_MAC
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index c0b46bceb5df..0a4c680d4ac0 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -1206,8 +1206,8 @@ init_ms_a3(int id)
 static int __init adbhid_init(void)
 {
 #ifndef CONFIG_MAC
-	if ( (_machine != _MACH_chrp) && (_machine != _MACH_Pmac) )
-	    return 0;
+	if (!machine_is(chrp) && !machine_is(powermac))
+		return 0;
 #endif
 
 	led_request.complete = 1;
diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c
index 8dbf2852bae0..53c1c7909413 100644
--- a/drivers/macintosh/mediabay.c
+++ b/drivers/macintosh/mediabay.c
@@ -839,8 +839,8 @@ static int __init media_bay_init(void)
 		media_bays[i].cd_index		= -1;
 #endif
 	}
-	if (_machine != _MACH_Pmac)
-		return -ENODEV;
+	if (!machine_is(powermac))
+		return 0;
 
 	macio_register_driver(&media_bay_driver);	
 
diff --git a/drivers/media/video/planb.c b/drivers/media/video/planb.c
index 522e9ddeb089..d9e3cada52f4 100644
--- a/drivers/media/video/planb.c
+++ b/drivers/media/video/planb.c
@@ -2156,7 +2156,7 @@ static int find_planb(void)
 	struct pci_dev 		*pdev;
 	int rc;
 
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return 0;
 
 	planb_devices = find_devices("planb");
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index ee48bfd67349..46d087c5467b 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -4160,7 +4160,7 @@ get_hw_addr(struct net_device *dev)
     ** If the address starts with 00 a0, we have to bit-reverse
     ** each byte of the address.
     */
-    if ( (_machine & _MACH_Pmac) &&
+    if ( machine_is(powermac) &&
 	 (dev->dev_addr[0] == 0) &&
 	 (dev->dev_addr[1] == 0xa0) )
     {
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index d6d2125f9044..f852421002ef 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1748,7 +1748,7 @@ static int mesh_host_reset(struct scsi_cmnd *cmd)
 
 static void set_mesh_power(struct mesh_state *ms, int state)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 	if (state) {
 		pmac_call_feature(PMAC_FTR_MESH_ENABLE, macio_get_of_node(ms->mdev), 0, 1);
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index e0afb5ad29e5..0d2193b69235 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -296,7 +296,7 @@ done:
 
 #ifdef CONFIG_PPC_PMAC
 		/* Disable ASIC clocks for USB */
-		if (_machine == _MACH_Pmac) {
+		if (machine_is(powermac)) {
 			struct device_node	*of_node;
 
 			of_node = pci_device_to_OF_node (dev);
@@ -331,7 +331,7 @@ int usb_hcd_pci_resume (struct pci_dev *dev)
 
 #ifdef CONFIG_PPC_PMAC
 	/* Reenable ASIC clocks for USB */
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		struct device_node *of_node;
 
 		of_node = pci_device_to_OF_node (dev);
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 620c9a934e0e..f07be22e119d 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -67,6 +67,7 @@
 #include <asm/io.h>
 
 #ifdef CONFIG_PPC_PMAC
+#include <asm/machdep.h>
 #include <asm/pmac_feature.h>
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
@@ -1748,7 +1749,7 @@ static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id *
 
 	var = default_var;
 #ifdef CONFIG_PPC_PMAC
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		/* Indicate sleep capability */
 		if (par->chip_gen == rage_M3) {
 			pmac_call_feature(PMAC_FTR_DEVICE_CAN_WAKE, NULL, 0, 1);
@@ -2011,7 +2012,7 @@ static int aty128fb_blank(int blank, struct fb_info *fb)
 		return 0;
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if ((_machine == _MACH_Pmac) && blank)
+	if (machine_is(powermac) && blank)
 		set_backlight_enable(0);
 #endif /* CONFIG_PMAC_BACKLIGHT */
 
@@ -2029,7 +2030,7 @@ static int aty128fb_blank(int blank, struct fb_info *fb)
 		aty128_set_lcd_enable(par, par->lcd_on && !blank);
 	}
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if ((_machine == _MACH_Pmac) && !blank)
+	if (machine_is(powermac) && !blank)
 		set_backlight_enable(1);
 #endif /* CONFIG_PMAC_BACKLIGHT */
 	return 0;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 485be386a8ff..1b1f24e2bfbe 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -75,6 +75,7 @@
 #include "ati_ids.h"
 
 #ifdef __powerpc__
+#include <asm/machdep.h>
 #include <asm/prom.h>
 #include "../macmodes.h"
 #endif
@@ -2516,7 +2517,7 @@ static int __init aty_init(struct fb_info *info, const char *name)
 
 	memset(&var, 0, sizeof(var));
 #ifdef CONFIG_PPC
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		/*
 		 *  FIXME: The NVRAM stuff should be put in a Mac-specific file, as it
 		 *         applies to all Mac video cards
@@ -2671,7 +2672,7 @@ static int atyfb_blank(int blank, struct fb_info *info)
 		return 0;
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if ((_machine == _MACH_Pmac) && blank > FB_BLANK_NORMAL)
+	if (machine_is(powermac) && blank > FB_BLANK_NORMAL)
 		set_backlight_enable(0);
 #elif defined(CONFIG_FB_ATY_GENERIC_LCD)
 	if (par->lcd_table && blank > FB_BLANK_NORMAL &&
@@ -2703,7 +2704,7 @@ static int atyfb_blank(int blank, struct fb_info *info)
 	aty_st_le32(CRTC_GEN_CNTL, gen_cntl, par);
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if ((_machine == _MACH_Pmac) && blank <= FB_BLANK_NORMAL)
+	if (machine_is(powermac) && blank <= FB_BLANK_NORMAL)
 		set_backlight_enable(1);
 #elif defined(CONFIG_FB_ATY_GENERIC_LCD)
 	if (par->lcd_table && blank <= FB_BLANK_NORMAL &&
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index 5886a2f1323e..c7091761cef4 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -20,7 +20,7 @@
 #include <linux/agp_backend.h>
 
 #ifdef CONFIG_PPC_PMAC
-#include <asm/processor.h>
+#include <asm/machdep.h>
 #include <asm/prom.h>
 #include <asm/pmac_feature.h>
 #endif
@@ -2745,7 +2745,7 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk)
 		rinfo->pm_mode |= radeon_pm_off;
 	}
 #if defined(CONFIG_PPC_PMAC)
-	if (_machine == _MACH_Pmac && rinfo->of_node) {
+	if (machine_is(powermac) && rinfo->of_node) {
 		if (rinfo->is_mobility && rinfo->pm_reg &&
 		    rinfo->family <= CHIP_FAMILY_RV250)
 			rinfo->pm_mode |= radeon_pm_d2;
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 66d6f2f0a219..1103010af54a 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -60,8 +60,8 @@
 #include <asm/amigahw.h>
 #endif
 #ifdef CONFIG_PPC_PREP
-#include <asm/processor.h>
-#define isPReP (_machine == _MACH_prep)
+#include <asm/machdep.h>
+#define isPReP (machine_is(prep))
 #else
 #define isPReP 0
 #endif
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 951c9974a1d3..23c1827b2d0b 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -115,6 +115,7 @@
 #include <asm/uaccess.h>
 
 #ifdef CONFIG_PPC_PMAC
+#include <asm/machdep.h>
 unsigned char nvram_read_byte(int);
 static int default_vmode = VMODE_NVRAM;
 static int default_cmode = CMODE_NVRAM;
@@ -1833,7 +1834,7 @@ static int initMatrox2(WPMINFO struct board* b){
 	/* FIXME: Where to move this?! */
 #if defined(CONFIG_PPC_PMAC)
 #ifndef MODULE
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		struct fb_var_screeninfo var;
 		if (default_vmode <= 0 || default_vmode > VMODE_MAX)
 			default_vmode = VMODE_640_480_60;
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index a7c4e5e8ead6..7258b3245316 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -29,6 +29,7 @@
 #include <asm/pci-bridge.h>
 #endif
 #ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/machdep.h>
 #include <asm/backlight.h>
 #endif
 
@@ -1353,7 +1354,7 @@ static int nvidiafb_blank(int blank, struct fb_info *info)
 	NVWriteCrtc(par, 0x1a, vesa);
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (par->FlatPanel && _machine == _MACH_Pmac) {
+	if (par->FlatPanel && machine_is(powermac)) {
 		set_backlight_enable(!blank);
 	}
 #endif
@@ -1688,7 +1689,7 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd,
 	       info->fix.id,
 	       par->FbMapSize / (1024 * 1024), info->fix.smem_start);
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (par->FlatPanel && _machine == _MACH_Pmac)
+	if (par->FlatPanel && machine_is(powermac))
 		register_backlight_controller(&nvidia_backlight_controller,
 					      par, "mnca");
 #endif
diff --git a/drivers/video/radeonfb.c b/drivers/video/radeonfb.c
index db9fb9074dbc..04820fab964c 100644
--- a/drivers/video/radeonfb.c
+++ b/drivers/video/radeonfb.c
@@ -1596,7 +1596,7 @@ static int radeonfb_blank (int blank, struct fb_info *info)
 		return 0;
 		
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (rinfo->dviDisp_type == MT_LCD && _machine == _MACH_Pmac) {
+	if (rinfo->dviDisp_type == MT_LCD && machine_is(powermac)) {
 		set_backlight_enable(!blank);
 		return 0;
 	}
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 6c19ab6afb01..b7bd6bb2c77c 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -49,6 +49,7 @@
 #include <asm/pci-bridge.h>
 #endif
 #ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/machdep.h>
 #include <asm/backlight.h>
 #endif
 
@@ -1247,7 +1248,7 @@ static int rivafb_blank(int blank, struct fb_info *info)
 	CRTCout(par, 0x1a, vesa);
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if ( par->FlatPanel && _machine == _MACH_Pmac) {
+	if ( par->FlatPanel && machine_is(powermac)) {
 		set_backlight_enable(!blank);
 	}
 #endif
@@ -2037,9 +2038,9 @@ static int __devinit rivafb_probe(struct pci_dev *pd,
 		info->fix.smem_len / (1024 * 1024),
 		info->fix.smem_start);
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (default_par->FlatPanel && _machine == _MACH_Pmac)
-	register_backlight_controller(&riva_backlight_controller,
-						default_par, "mnca");
+	if (default_par->FlatPanel && machine_is(powermac))
+		register_backlight_controller(&riva_backlight_controller,
+					      default_par, "mnca");
 #endif
 	NVTRACE_LEAVE();
 	return 0;
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index bb22cdd0cb14..813292f21210 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -12,6 +12,7 @@
 #include "mac.h"
 
 #ifdef CONFIG_PPC_PMAC
+#include <asm/machdep.h>
 extern void note_bootable_part(dev_t dev, int part, int goodness);
 #endif
 
@@ -79,7 +80,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
 		 * If this is the first bootable partition, tell the
 		 * setup code, in case it wants to make this the root.
 		 */
-		if (_machine == _MACH_Pmac) {
+		if (machine_is(powermac)) {
 			int goodness = 0;
 
 			mac_fix_string(part->processor, 16);
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 758e47fe8c1e..5ed847680754 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -47,6 +47,7 @@ struct smp_ops_t {
 #endif
 
 struct machdep_calls {
+	char		*name;
 #ifdef CONFIG_PPC64
 	void            (*hpte_invalidate)(unsigned long slot,
 					   unsigned long va,
@@ -85,9 +86,9 @@ struct machdep_calls {
 	void		(*iommu_dev_setup)(struct pci_dev *dev);
 	void		(*iommu_bus_setup)(struct pci_bus *bus);
 	void		(*irq_bus_setup)(struct pci_bus *bus);
-#endif
+#endif /* CONFIG_PPC64 */
 
-	int		(*probe)(int platform);
+	int		(*probe)(void);
 	void		(*setup_arch)(void);
 	void		(*init_early)(void);
 	/* Optional, may be NULL. */
@@ -207,8 +208,6 @@ struct machdep_calls {
 	/* Called at then very end of pcibios_init() */
 	void (*pcibios_after_init)(void);
 
-	/* this is for modules, since _machine can be a define -- Cort */
-	int ppc_machine;
 #endif /* CONFIG_PPC32 */
 
 	/* Called to shutdown machine specific hardware not already controlled
@@ -244,7 +243,26 @@ struct machdep_calls {
 extern void power4_idle(void);
 extern void ppc6xx_idle(void);
 
+/*
+ * ppc_md contains a copy of the machine description structure for the
+ * current platform. machine_id contains the initial address where the
+ * description was found during boot.
+ */
 extern struct machdep_calls ppc_md;
+extern struct machdep_calls *machine_id;
+
+#define __machine_desc __attribute__ ((__section__ (".machine.desc")))
+
+#define define_machine(name) struct machdep_calls mach_##name __machine_desc =
+#define machine_is(name) \
+	({ \
+		extern struct machdep_calls mach_##name \
+			__attribute__((weak));		 \
+		machine_id == &mach_##name; \
+	})
+
+extern void probe_machine(void);
+
 extern char cmd_line[COMMAND_LINE_SIZE];
 
 #ifdef CONFIG_PPC_PMAC
diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h
index 3221628130c4..d3599cc9aa74 100644
--- a/include/asm-powerpc/pmac_feature.h
+++ b/include/asm-powerpc/pmac_feature.h
@@ -305,7 +305,7 @@ extern void pmac_feature_init(void);
 extern void pmac_set_early_video_resume(void (*proc)(void *data), void *data);
 extern void pmac_call_early_video_resume(void);
 
-#define PMAC_FTR_DEF(x) ((_MACH_Pmac << 16) | (x))
+#define PMAC_FTR_DEF(x) ((0x6660000) | (x))
 
 /* The AGP driver registers itself here */
 extern void pmac_register_agp_pm(struct pci_dev *bridge,
diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h
index 57643b5b782f..93f83efeb310 100644
--- a/include/asm-powerpc/processor.h
+++ b/include/asm-powerpc/processor.h
@@ -22,22 +22,6 @@
  * -- BenH.
  */
 
-/* Platforms codes (to be obsoleted) */
-#define PLATFORM_PSERIES	0x0100
-#define PLATFORM_PSERIES_LPAR	0x0101
-#define PLATFORM_ISERIES_LPAR	0x0201
-#define PLATFORM_LPAR		0x0001
-#define PLATFORM_POWERMAC	0x0400
-#define PLATFORM_MAPLE		0x0500
-#define PLATFORM_PREP		0x0600
-#define PLATFORM_CHRP		0x0700
-#define PLATFORM_CELL		0x1000
-
-/* Compat platform codes for 32 bits */
-#define _MACH_prep	PLATFORM_PREP
-#define _MACH_Pmac	PLATFORM_POWERMAC
-#define _MACH_chrp	PLATFORM_CHRP
-
 /* PREP sub-platform types see residual.h for these */
 #define _PREP_Motorola	0x01	/* motorola prep */
 #define _PREP_Firm	0x02	/* firmworks prep */
@@ -49,15 +33,14 @@
 #define _CHRP_IBM	0x05	/* IBM chrp, the longtrail and longtrail 2 */
 #define _CHRP_Pegasos	0x06	/* Genesi/bplan's Pegasos and Pegasos2 */
 
-#ifdef __KERNEL__
-#define platform_is_pseries()	(_machine == PLATFORM_PSERIES || \
-				 _machine == PLATFORM_PSERIES_LPAR)
+#if defined(__KERNEL__) && defined(CONFIG_PPC32)
+
+extern int _chrp_type;
 
 #ifdef CONFIG_PPC_PREP
 
 /* what kind of prep workstation we are */
 extern int _prep_type;
-extern int _chrp_type;
 
 /*
  * This is used to identify the board type from a given PReP board
@@ -69,18 +52,12 @@ extern unsigned char ucBoardRevMaj, ucBoardRevMin;
 
 #endif /* CONFIG_PPC_PREP */
 
-#if defined(CONFIG_PPC_MULTIPLATFORM)
-extern int _machine;
-
-#elif defined(CONFIG_PPC_ISERIES)
-/*
- * iSeries is soon to become MULTIPLATFORM hopefully ...
- */
-#define _machine PLATFORM_ISERIES_LPAR
-#else
+#ifndef CONFIG_PPC_MULTIPLATFORM
 #define _machine 0
 #endif /* CONFIG_PPC_MULTIPLATFORM */
-#endif /* __KERNEL__ */
+
+#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */
+
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 782e13a070a1..97ef1cd71a4d 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -149,12 +149,14 @@ extern struct device_node *of_node_get(struct device_node *node);
 extern void of_node_put(struct device_node *node);
 
 /* For scanning the flat device-tree at boot time */
-int __init of_scan_flat_dt(int (*it)(unsigned long node,
-				     const char *uname, int depth,
-				     void *data),
-			   void *data);
-void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size);
+extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
+					    const char *uname, int depth,
+					    void *data),
+				  void *data);
+extern void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+					unsigned long *size);
+extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name);
+extern unsigned long __init of_get_flat_dt_root(void);
 
 /* For updating the device tree at runtime */
 extern void of_attach_node(struct device_node *);
diff --git a/include/asm-powerpc/vdso_datapage.h b/include/asm-powerpc/vdso_datapage.h
index 7aa92086c3fb..8a94f0eba5e9 100644
--- a/include/asm-powerpc/vdso_datapage.h
+++ b/include/asm-powerpc/vdso_datapage.h
@@ -55,6 +55,9 @@ struct vdso_data {
 		__u32 minor;		/* Minor number			0x14 */
 	} version;
 
+	/* Note about the platform flags: it now only contains the lpar
+	 * bit. The actual platform number is dead and burried
+	 */
 	__u32 platform;			/* Platform flags		0x18 */
 	__u32 processor;		/* Processor type		0x1C */
 	__u64 processorCount;		/* # of physical processors	0x20 */
diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h
index 2723b423f675..e1a0a7b213d7 100644
--- a/include/asm-ppc/machdep.h
+++ b/include/asm-ppc/machdep.h
@@ -19,6 +19,18 @@ struct pci_dev;
 struct seq_file;
 struct file;
 
+/*
+ * This is for compatibility with ARCH=powerpc.
+ */
+#define machine_is(x)	__MACHINE_IS_##x
+#define __MACHINE_IS_powermac	0
+#define __MACHINE_IS_chrp	0
+#ifdef CONFIG_PPC_PREP
+#define __MACHINE_IS_prep	1
+#else
+#define __MACHINE_IS_prep	0
+#endif
+
 /* We export this macro for external modules like Alsa to know if
  * ppc_md.feature_call is implemented or not
  */
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 6ba8d6f45fe8..a8636adef2e6 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -2814,7 +2814,7 @@ int __init dmasound_awacs_init(void)
 	struct device_node *io = NULL, *info = NULL;
 	int vol, res;
 
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return -ENODEV;
 
 	awacs_subframe = 0;
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index aa57170101fd..f0794ef9d1ac 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -869,7 +869,7 @@ static int __init snd_pmac_detect(struct snd_pmac *chip)
 
 	u32 layout_id = 0;
 
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return -ENODEV;
 
 	chip->subframe = 0;
-- 
cgit v1.2.3


From a28af471b8946de052a0eb0c080d5457be93f168 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:26 -0800
Subject: [PATCH] fs/fat/: proper prototypes for two functions

Add proper prototypes for fat_cache_init() and fat_cache_destroy() in
msdos_fs.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/inode.c           | 3 ---
 include/linux/msdos_fs.h | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 404bfc9f7385..c1ce284f8a94 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1435,9 +1435,6 @@ out_fail:
 
 EXPORT_SYMBOL_GPL(fat_fill_super);
 
-int __init fat_cache_init(void);
-void fat_cache_destroy(void);
-
 static int __init init_fat_fs(void)
 {
 	int err;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 779e6a5744c7..53cee1581650 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -420,6 +420,9 @@ extern int date_dos2unix(unsigned short time, unsigned short date);
 extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
+int fat_cache_init(void);
+void fat_cache_destroy(void);
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From f5b95ff010d0a4e40f877277f8f0e62fcd83b5a8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:29 -0800
Subject: [PATCH] autofs4: proper prototype for autofs4_dentry_release()

Add a proper prototype for autofs4_dentry_release() to autofs_i.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 3 +++
 fs/autofs4/inode.c    | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 617fd7b37447..c70204a61268 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -230,3 +230,6 @@ static inline int __simple_empty(struct dentry *dentry)
 out:
 	return ret;
 }
+
+void autofs4_dentry_release(struct dentry *);
+
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4eddee4e76fc..fde78b110ddd 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -292,7 +292,6 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
-void autofs4_dentry_release(struct dentry *);
 static struct dentry_operations autofs4_sb_dentry_operations = {
 	.d_release      = autofs4_dentry_release,
 };
-- 
cgit v1.2.3


From 70674f95c0a2ea694d5c39f4e514f538a09be36f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 28 Mar 2006 01:56:33 -0800
Subject: [PATCH] Optimize select/poll by putting small data sets on the stack

Optimize select and poll by a using stack space for small fd sets

This brings back an old optimization from Linux 2.0.  Using the stack is
faster than kmalloc.  On a Intel P4 system it speeds up a select of a
single pty fd by about 13% (~4000 cycles -> ~3500)

It also saves memory because a daemon hanging in select or poll will
usually save one or two less pages.  This can add up - e.g.  if you have 10
daemons blocking in poll/select you save 40KB of memory.

I did a patch for this long ago, but it was never applied.  This version is
a reimplementation of the old patch that tries to be less intrusive.  I
only did the minimal changes needed for the stack allocation.

The cut off point before external memory is allocated is currently at
832bytes.  The system calls always allocate this much memory on the stack.

These 832 bytes are divided into 256 bytes frontend data (for the select
bitmaps of the pollfds) and the rest of the space for the wait queues used
by the low level drivers.  There are some extreme cases where this won't
work out for select and it falls back to allocating memory too early -
especially with very sparse large select bitmaps - but the majority of
processes who only have a small number of file descriptors should be ok.
[TBD: 832/256 might not be the best split for select or poll]

I suspect more optimizations might be possible, but they would be more
complicated.  One way would be to cache the select/poll context over
multiple system calls because typically the input values should be similar.
 Problem is when to flush the file descriptors out though.

Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/select.c          | 106 +++++++++++++++++++++++++++++++--------------------
 include/linux/poll.h |  17 +++++++++
 2 files changed, 81 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/select.c b/fs/select.c
index 1815a57d2255..d8b4f0722b8d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -29,12 +29,6 @@
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
-struct poll_table_entry {
-	struct file * filp;
-	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
-};
-
 struct poll_table_page {
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *pwq)
 	init_poll_funcptr(&pwq->pt, __pollwait);
 	pwq->error = 0;
 	pwq->table = NULL;
+	pwq->inline_index = 0;
 }
 
 EXPORT_SYMBOL(poll_initwait);
 
+static void free_poll_entry(struct poll_table_entry *entry)
+{
+	remove_wait_queue(entry->wait_address,&entry->wait);
+	fput(entry->filp);
+}
+
 void poll_freewait(struct poll_wqueues *pwq)
 {
 	struct poll_table_page * p = pwq->table;
+	int i;
+	for (i = 0; i < pwq->inline_index; i++)
+		free_poll_entry(pwq->inline_entries + i);
 	while (p) {
 		struct poll_table_entry * entry;
 		struct poll_table_page *old;
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *pwq)
 		entry = p->entry;
 		do {
 			entry--;
-			remove_wait_queue(entry->wait_address,&entry->wait);
-			fput(entry->filp);
+			free_poll_entry(entry);
 		} while (entry > p->entries);
 		old = p;
 		p = p->next;
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *pwq)
 
 EXPORT_SYMBOL(poll_freewait);
 
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-		       poll_table *_p)
+static struct poll_table_entry *poll_get_entry(poll_table *_p)
 {
 	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
 
+	if (p->inline_index < N_INLINE_POLL_ENTRIES)
+		return p->inline_entries + p->inline_index++;
+
 	if (!table || POLL_TABLE_FULL(table)) {
 		struct poll_table_page *new_table;
 
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		if (!new_table) {
 			p->error = -ENOMEM;
 			__set_current_state(TASK_RUNNING);
-			return;
+			return NULL;
 		}
 		new_table->entry = new_table->entries;
 		new_table->next = table;
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		table = new_table;
 	}
 
-	/* Add a new entry */
-	{
-		struct poll_table_entry * entry = table->entry;
-		table->entry = entry+1;
-	 	get_file(filp);
-	 	entry->filp = filp;
-		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
-		add_wait_queue(wait_address,&entry->wait);
-	}
+	return table->entry++;
+}
+
+/* Add a new entry */
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+				poll_table *p)
+{
+	struct poll_table_entry *entry = poll_get_entry(p);
+	if (!entry)
+		return;
+	get_file(filp);
+	entry->filp = filp;
+	entry->wait_address = wait_address;
+	init_waitqueue_entry(&entry->wait, current);
+	add_wait_queue(wait_address,&entry->wait);
 }
 
 #define FDS_IN(fds, n)		(fds->in + n)
@@ -284,16 +294,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 	return retval;
 }
 
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
-
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
  * like to be certain this leads to no problems. So I return
@@ -312,6 +312,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	char *bits;
 	int ret, size, max_fdset;
 	struct fdtable *fdt;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_fds[SELECT_STACK_ALLOC];
 
 	ret = -EINVAL;
 	if (n < 0)
@@ -332,7 +334,10 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	if (6*size < SELECT_STACK_ALLOC)
+		bits = stack_fds;
+	else
+		bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -367,7 +372,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 		ret = -EFAULT;
 
 out:
-	select_bits_free(bits, size);
+	if (bits != stack_fds)
+		kfree(bits);
 out_nofds:
 	return ret;
 }
@@ -619,6 +625,9 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 	return count;
 }
 
+#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
+			sizeof(struct pollfd))
+
 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 {
 	struct poll_wqueues table;
@@ -628,6 +637,9 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
  	struct poll_list *walk;
 	struct fdtable *fdt;
 	int max_fdset;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_pps[POLL_STACK_ALLOC];
+	struct poll_list *stack_pp = NULL;
 
 	/* Do a sanity check on nfds ... */
 	rcu_read_lock();
@@ -645,14 +657,23 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 	err = -ENOMEM;
 	while(i!=0) {
 		struct poll_list *pp;
-		pp = kmalloc(sizeof(struct poll_list)+
-				sizeof(struct pollfd)*
-				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
-					GFP_KERNEL);
-		if(pp==NULL)
-			goto out_fds;
+		int num, size;
+		if (stack_pp == NULL)
+			num = N_STACK_PPS;
+		else
+			num = POLLFD_PER_PAGE;
+		if (num > i)
+			num = i;
+		size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
+		if (!stack_pp)
+			stack_pp = pp = (struct poll_list *)stack_pps;
+		else {
+			pp = kmalloc(size, GFP_KERNEL);
+			if (!pp)
+				goto out_fds;
+		}
 		pp->next=NULL;
-		pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+		pp->len = num;
 		if (head == NULL)
 			head = pp;
 		else
@@ -660,7 +681,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 
 		walk = pp;
 		if (copy_from_user(pp->entries, ufds + nfds-i, 
-				sizeof(struct pollfd)*pp->len)) {
+				sizeof(struct pollfd)*num)) {
 			err = -EFAULT;
 			goto out_fds;
 		}
@@ -689,7 +710,8 @@ out_fds:
 	walk = head;
 	while(walk!=NULL) {
 		struct poll_list *pp = walk->next;
-		kfree(walk);
+		if (walk != stack_pp)
+			kfree(walk);
 		walk = pp;
 	}
 	poll_freewait(&table);
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 8e8f6098508a..51e1b56741fb 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -11,6 +11,15 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 
+/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
+   additional memory. */
+#define MAX_STACK_ALLOC 832
+#define FRONTEND_STACK_ALLOC	256
+#define SELECT_STACK_ALLOC	FRONTEND_STACK_ALLOC
+#define POLL_STACK_ALLOC	FRONTEND_STACK_ALLOC
+#define WQUEUES_STACK_ALLOC	(MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC)
+#define N_INLINE_POLL_ENTRIES	(WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry))
+
 struct poll_table_struct;
 
 /* 
@@ -33,6 +42,12 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 	pt->qproc = qproc;
 }
 
+struct poll_table_entry {
+	struct file * filp;
+	wait_queue_t wait;
+	wait_queue_head_t * wait_address;
+};
+
 /*
  * Structures and helpers for sys_poll/sys_poll
  */
@@ -40,6 +55,8 @@ struct poll_wqueues {
 	poll_table pt;
 	struct poll_table_page * table;
 	int error;
+	int inline_index;
+	struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
 };
 
 extern void poll_initwait(struct poll_wqueues *pwq);
-- 
cgit v1.2.3


From e4a1f129f9e43a5e5d28fe6d1b214246a398cdce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Mar 2006 01:56:34 -0800
Subject: [PATCH] use fget_light() in select/poll

Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/select.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/select.c b/fs/select.c
index d8b4f0722b8d..05cd199a1127 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -231,17 +231,18 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 			}
 
 			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+				int fput_needed;
 				if (i >= n)
 					break;
 				if (!(bit & all_bits))
 					continue;
-				file = fget(i);
+				file = fget_light(i, &fput_needed);
 				if (file) {
 					f_op = file->f_op;
 					mask = DEFAULT_POLLMASK;
 					if (f_op && f_op->poll)
 						mask = (*f_op->poll)(file, retval ? NULL : wait);
-					fput(file);
+					fput_light(file, fput_needed);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						retval++;
@@ -557,14 +558,15 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
 		fdp = fdpage+i;
 		fd = fdp->fd;
 		if (fd >= 0) {
-			struct file * file = fget(fd);
+			int fput_needed;
+			struct file * file = fget_light(fd, &fput_needed);
 			mask = POLLNVAL;
 			if (file != NULL) {
 				mask = DEFAULT_POLLMASK;
 				if (file->f_op && file->f_op->poll)
 					mask = file->f_op->poll(file, *pwait);
 				mask &= fdp->events | POLLERR | POLLHUP;
-				fput(file);
+				fput_light(file, fput_needed);
 			}
 			if (mask) {
 				*pwait = NULL;
-- 
cgit v1.2.3


From 68c3431ae22912be580c68d3955ef46515582943 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Tue, 28 Mar 2006 01:56:35 -0800
Subject: [PATCH] Fold select_bits_alloc/free into caller code.

Remove an unnecessary level of indirection in allocating and freeing select
bits, as per the select_bits_alloc() and select_bits_free() functions.
Both select.c and compat.c are updated.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index ef5a0771592d..7f8e26ea427c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1639,15 +1639,6 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  * This is a virtual copy of sys_select from fs/select.c and probably
  * should be compared to it from time to time
  */
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
 
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
@@ -1686,7 +1677,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -1720,7 +1711,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	compat_set_fd_set(n, exp, fds.res_ex);
 
 out:
-	select_bits_free(bits, size);
+	kfree(bits);
 out_nofds:
 	return ret;
 }
-- 
cgit v1.2.3


From 0a945022778f100115d0cb6234eb28fc1b15ccaf Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:37 -0800
Subject: [PATCH] for_each_possible_cpu: fixes for generic part

replaces for_each_cpu with for_each_possible_cpu().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/ll_rw_blk.c            | 2 +-
 fs/file.c                    | 2 +-
 fs/proc/proc_misc.c          | 2 +-
 include/asm-generic/percpu.h | 2 +-
 include/linux/genhd.h        | 4 ++--
 include/linux/kernel_stat.h  | 2 +-
 init/main.c                  | 2 +-
 kernel/rcutorture.c          | 4 ++--
 kernel/sched.c               | 8 ++++----
 mm/slab.c                    | 4 ++--
 mm/swap.c                    | 2 +-
 11 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 82469db25100..5a19e2eb5711 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3514,7 +3514,7 @@ int __init blk_dev_init(void)
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
diff --git a/fs/file.c b/fs/file.c
index bbc743314730..55f4e7022563 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -373,6 +373,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
 void __init files_defer_init(void)
 {
 	int i;
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		fdtable_defer_list_init(i);
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1e9ea37d457e..1edce0c34bfd 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -534,7 +534,7 @@ static int show_stat(struct seq_file *p, void *v)
 	if (wall_to_monotonic.tv_nsec)
 		--jif;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int j;
 
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 78cf45547e31..c0caf433a7d7 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -19,7 +19,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for_each_cpu(__i)					\
+	for_each_possible_cpu(__i)				\
 		memcpy((pcpudst)+__per_cpu_offset[__i],		\
 		       (src), (size));				\
 } while (0)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3c1b0294a742..10a27f29d692 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -152,14 +152,14 @@ struct disk_attribute {
 ({									\
 	typeof(gendiskp->dkstats->field) res = 0;			\
 	int i;								\
-	for_each_cpu(i)							\
+	for_each_possible_cpu(i)					\
 		res += per_cpu_ptr(gendiskp->dkstats, i)->field;	\
 	res;								\
 })
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 	int i;
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
 				sizeof (struct disk_stats));
 }		
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a484572c302e..b46249082cca 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -46,7 +46,7 @@ static inline int kstat_irqs(int irq)
 {
 	int cpu, sum = 0;
 
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		sum += kstat_cpu(cpu).irqs[irq];
 
 	return sum;
diff --git a/init/main.c b/init/main.c
index 64466ea1984c..4a2f0898dda1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -341,7 +341,7 @@ static void __init setup_per_cpu_areas(void)
 #endif
 	ptr = alloc_bootmem(size * nr_possible_cpus);
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 		ptr += size;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b4b362b5baf5..8154e7589d12 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -301,7 +301,7 @@ rcu_torture_printk(char *page)
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
 			pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
 			batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
@@ -535,7 +535,7 @@ rcu_torture_init(void)
 	atomic_set(&n_rcu_torture_error, 0);
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
 		atomic_set(&rcu_torture_wcount[i], 0);
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
 			per_cpu(rcu_torture_count, cpu)[i] = 0;
 			per_cpu(rcu_torture_batch, cpu)[i] = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 7854ee516b92..a9ecac398bb9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1625,7 +1625,7 @@ unsigned long nr_uninterruptible(void)
 {
 	unsigned long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += cpu_rq(i)->nr_uninterruptible;
 
 	/*
@@ -1642,7 +1642,7 @@ unsigned long long nr_context_switches(void)
 {
 	unsigned long long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += cpu_rq(i)->nr_switches;
 
 	return sum;
@@ -1652,7 +1652,7 @@ unsigned long nr_iowait(void)
 {
 	unsigned long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += atomic_read(&cpu_rq(i)->nr_iowait);
 
 	return sum;
@@ -6080,7 +6080,7 @@ void __init sched_init(void)
 	runqueue_t *rq;
 	int i, j, k;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		prio_array_t *array;
 
 		rq = cpu_rq(i);
diff --git a/mm/slab.c b/mm/slab.c
index 681837499d7d..4cbf8bb13557 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3311,7 +3311,7 @@ void *__alloc_percpu(size_t size)
 	 * and we have no way of figuring out how to fix the array
 	 * that we have allocated then....
 	 */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int node = cpu_to_node(i);
 
 		if (node_online(node))
@@ -3398,7 +3398,7 @@ void free_percpu(const void *objp)
 	/*
 	 * We allocate for all cpus so we cannot use for online cpu here.
 	 */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 	    kfree(p->ptrs[i]);
 	kfree(p);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 91b7e2026f69..88895c249bc9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -512,7 +512,7 @@ long percpu_counter_sum(struct percpu_counter *fbc)
 
 	spin_lock(&fbc->lock);
 	ret = fbc->count;
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		long *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
-- 
cgit v1.2.3


From 99ac48f54a91d02140c497edc31dc57d4bc5c85d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:41 -0800
Subject: [PATCH] mark f_ops const in the inode

Mark the f_ops members of inodes as const, as well as fix the
ripple-through this causes by places that copy this f_ops and then "do
stuff" with it.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/ppc/kernel/ppc_htab.c                |  2 +-
 drivers/char/drm/drm_fops.c               |  2 +-
 drivers/char/drm/i810_dma.c               |  2 +-
 drivers/char/drm/i830_dma.c               |  2 +-
 drivers/char/mem.c                        |  2 +-
 drivers/char/misc.c                       |  2 +-
 drivers/input/input.c                     |  2 +-
 drivers/isdn/capi/kcapi_proc.c            |  2 +-
 drivers/media/dvb/dvb-core/dvbdev.c       |  2 +-
 drivers/media/video/videodev.c            |  2 +-
 drivers/message/i2o/i2o_proc.c            |  2 +-
 drivers/oprofile/oprofilefs.c             |  6 +++---
 drivers/telephony/phonedev.c              |  2 +-
 drivers/usb/core/file.c                   |  6 +++---
 drivers/usb/gadget/inode.c                |  6 +++---
 fs/char_dev.c                             |  4 ++--
 fs/debugfs/inode.c                        |  2 +-
 fs/inode.c                                |  2 +-
 fs/nfsd/vfs.c                             |  2 +-
 fs/proc/generic.c                         |  2 +-
 fs/proc/internal.h                        |  2 +-
 fs/proc/proc_misc.c                       |  2 +-
 fs/select.c                               |  2 +-
 include/linux/cdev.h                      |  4 ++--
 include/linux/debugfs.h                   |  2 +-
 include/linux/fs.h                        |  6 +++---
 include/linux/input.h                     |  2 +-
 include/linux/miscdevice.h                |  2 +-
 include/linux/oprofile.h                  |  4 ++--
 include/linux/proc_fs.h                   |  4 ++--
 include/linux/sound.h                     | 12 ++++++------
 include/linux/sunrpc/stats.h              |  4 ++--
 include/linux/usb.h                       |  2 +-
 include/linux/videodev2.h                 |  2 +-
 include/sound/core.h                      |  6 +++---
 net/sunrpc/rpc_pipe.c                     |  2 +-
 net/sunrpc/stats.c                        |  4 ++--
 sound/core/init.c                         |  3 ++-
 sound/core/sound.c                        |  4 ++--
 sound/core/sound_oss.c                    |  2 +-
 sound/sound_core.c                        | 22 +++++++++++-----------
 42 files changed, 75 insertions(+), 74 deletions(-)

(limited to 'fs')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index b3962c3a0348..5be40aa483fd 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -103,7 +103,7 @@ spufs_setattr(struct dentry *dentry, struct iattr *attr)
 
 static int
 spufs_new_file(struct super_block *sb, struct dentry *dentry,
-		struct file_operations *fops, int mode,
+		const struct file_operations *fops, int mode,
 		struct spu_context *ctx)
 {
 	static struct inode_operations spufs_file_iops = {
diff --git a/arch/ppc/kernel/ppc_htab.c b/arch/ppc/kernel/ppc_htab.c
index 2f5c7650274f..9b84bffdefce 100644
--- a/arch/ppc/kernel/ppc_htab.c
+++ b/arch/ppc/kernel/ppc_htab.c
@@ -52,7 +52,7 @@ static int ppc_htab_open(struct inode *inode, struct file *file)
 	return single_open(file, ppc_htab_show, NULL);
 }
 
-struct file_operations ppc_htab_operations = {
+const struct file_operations ppc_htab_operations = {
 	.open		= ppc_htab_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c
index 641f7633878c..b7f7951c4587 100644
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -175,7 +175,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	drm_device_t *dev = NULL;
 	int minor = iminor(inode);
 	int err = -ENODEV;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 
 	DRM_DEBUG("\n");
 
diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
index ae0aa6d7e0bb..c658dde3633b 100644
--- a/drivers/char/drm/i810_dma.c
+++ b/drivers/char/drm/i810_dma.c
@@ -126,7 +126,7 @@ static int i810_map_buffer(drm_buf_t * buf, struct file *filp)
 	drm_device_t *dev = priv->head->dev;
 	drm_i810_buf_priv_t *buf_priv = buf->dev_private;
 	drm_i810_private_t *dev_priv = dev->dev_private;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	int retcode = 0;
 
 	if (buf_priv->currently_mapped == I810_BUF_MAPPED)
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 163f2cbfe60d..b0f815d8cea8 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -128,7 +128,7 @@ static int i830_map_buffer(drm_buf_t * buf, struct file *filp)
 	drm_device_t *dev = priv->head->dev;
 	drm_i830_buf_priv_t *buf_priv = buf->dev_private;
 	drm_i830_private_t *dev_priv = dev->dev_private;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	unsigned long virtual;
 	int retcode = 0;
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5245ba1649ed..66719f9d294c 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -899,7 +899,7 @@ static const struct {
 	unsigned int		minor;
 	char			*name;
 	umode_t			mode;
-	struct file_operations	*fops;
+	const struct file_operations	*fops;
 } devlist[] = { /* list of minor devices */
 	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
 	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 3e4c0414a01a..96eb2a709e21 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -129,7 +129,7 @@ static int misc_open(struct inode * inode, struct file * file)
 	int minor = iminor(inode);
 	struct miscdevice *c;
 	int err = -ENODEV;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 	
 	down(&misc_sem);
 	
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 4fe3da3c667a..f8af0945964e 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -923,7 +923,7 @@ void input_unregister_handler(struct input_handler *handler)
 static int input_open_file(struct inode *inode, struct file *file)
 {
 	struct input_handler *handler = input_table[iminor(inode) >> 5];
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 	int err;
 
 	/* No load-on-demand here? */
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 2cc8b27e4c3b..ca9dc00a45c4 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -233,7 +233,7 @@ static struct file_operations proc_applstats_ops = {
 };
 
 static void
-create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
 {
 	struct proc_dir_entry *entry;
 	entry = create_proc_entry(name, mode, NULL);
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 54f8b95717b0..96fe0ecae250 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -86,7 +86,7 @@ static int dvb_device_open(struct inode *inode, struct file *file)
 
 	if (dvbdev && dvbdev->fops) {
 		int err = 0;
-		struct file_operations *old_fops;
+		const struct file_operations *old_fops;
 
 		file->private_data = dvbdev;
 		old_fops = file->f_op;
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 75e3d41382f2..5f87dd5f1d0b 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -97,7 +97,7 @@ static int video_open(struct inode *inode, struct file *file)
 	unsigned int minor = iminor(inode);
 	int err = 0;
 	struct video_device *vfl;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 
 	if(minor>=VIDEO_NUM_DEVICES)
 		return -ENODEV;
diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c
index 2a0c42b8cda5..3d2e76eea93e 100644
--- a/drivers/message/i2o/i2o_proc.c
+++ b/drivers/message/i2o/i2o_proc.c
@@ -56,7 +56,7 @@
 typedef struct _i2o_proc_entry_t {
 	char *name;		/* entry name */
 	mode_t mode;		/* mode */
-	struct file_operations *fops;	/* open function */
+	const struct file_operations *fops;	/* open function */
 } i2o_proc_entry;
 
 /* global I2O /proc/i2o entry */
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index d6bae699749a..b62da9b0cbf0 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -130,7 +130,7 @@ static struct file_operations ulong_ro_fops = {
 
 
 static struct dentry * __oprofilefs_create_file(struct super_block * sb,
-	struct dentry * root, char const * name, struct file_operations * fops,
+	struct dentry * root, char const * name, const struct file_operations * fops,
 	int perm)
 {
 	struct dentry * dentry;
@@ -203,7 +203,7 @@ int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
 
  
 int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops)
+	char const * name, const struct file_operations * fops)
 {
 	if (!__oprofilefs_create_file(sb, root, name, fops, 0644))
 		return -EFAULT;
@@ -212,7 +212,7 @@ int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
 
 
 int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops, int perm)
+	char const * name, const struct file_operations * fops, int perm)
 {
 	if (!__oprofilefs_create_file(sb, root, name, fops, perm))
 		return -EFAULT;
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 7a6db1c5c8c5..e166fffea86b 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -49,7 +49,7 @@ static int phone_open(struct inode *inode, struct file *file)
 	unsigned int minor = iminor(inode);
 	int err = 0;
 	struct phone_device *p;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 
 	if (minor >= PHONE_NUM_DEVICES)
 		return -ENODEV;
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 37b13368c814..b263a54a13c0 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -24,15 +24,15 @@
 #include "usb.h"
 
 #define MAX_USB_MINORS	256
-static struct file_operations *usb_minors[MAX_USB_MINORS];
+static const struct file_operations *usb_minors[MAX_USB_MINORS];
 static DEFINE_SPINLOCK(minor_lock);
 
 static int usb_open(struct inode * inode, struct file * file)
 {
 	int minor = iminor(inode);
-	struct file_operations *c;
+	const struct file_operations *c;
 	int err = -ENODEV;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 
 	spin_lock (&minor_lock);
 	c = usb_minors[minor];
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index b44cfda76b61..3f618ce6998d 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1581,7 +1581,7 @@ restart:
 
 static struct inode *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		struct dentry **dentry_p);
 
 static int activate_ep_files (struct dev_data *dev)
@@ -1955,7 +1955,7 @@ module_param (default_perm, uint, 0644);
 
 static struct inode *
 gadgetfs_make_inode (struct super_block *sb,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		int mode)
 {
 	struct inode *inode = new_inode (sb);
@@ -1979,7 +1979,7 @@ gadgetfs_make_inode (struct super_block *sb,
  */
 static struct inode *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		struct dentry **dentry_p)
 {
 	struct dentry	*dentry;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 8c6eb04d31e2..b53dffa46bac 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -250,7 +250,7 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
 }
 
 int register_chrdev(unsigned int major, const char *name,
-		    struct file_operations *fops)
+		    const struct file_operations *fops)
 {
 	struct char_device_struct *cd;
 	struct cdev *cdev;
@@ -473,7 +473,7 @@ struct cdev *cdev_alloc(void)
 	return p;
 }
 
-void cdev_init(struct cdev *cdev, struct file_operations *fops)
+void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 {
 	memset(cdev, 0, sizeof *cdev);
 	INIT_LIST_HEAD(&cdev->list);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d4f1a2cddd47..85d166cdcae4 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -191,7 +191,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
  */
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
-				   struct file_operations *fops)
+				   const struct file_operations *fops)
 {
 	struct dentry *dentry = NULL;
 	int error;
diff --git a/fs/inode.c b/fs/inode.c
index 1fddf2803af8..32b7c3375021 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -104,7 +104,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 {
 	static struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
+	static const struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5320e5afaddb..31018333dc38 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -706,7 +706,7 @@ nfsd_close(struct file *filp)
  * after it.
  */
 static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-			      struct file_operations *fop)
+			      const struct file_operations *fop)
 {
 	struct inode *inode = dp->d_inode;
 	int (*fsync) (struct file *, struct dentry *, int);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 47b7a20d45eb..4ba03009cf72 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -560,7 +560,7 @@ static void proc_kill_inodes(struct proc_dir_entry *de)
 		struct file * filp = list_entry(p, struct file, f_u.fu_list);
 		struct dentry * dentry = filp->f_dentry;
 		struct inode * inode;
-		struct file_operations *fops;
+		const struct file_operations *fops;
 
 		if (dentry->d_op != &proc_dentry_operations)
 			continue;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 95a1cf32b838..0502f17b860d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -30,7 +30,7 @@ do {						\
 
 #endif
 
-extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f);
+extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
 extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
 extern int proc_tid_stat(struct task_struct *,  char *);
 extern int proc_tgid_stat(struct task_struct *, char *);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1edce0c34bfd..ef5a3323f4b5 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -731,7 +731,7 @@ static struct file_operations proc_sysrq_trigger_operations = {
 
 struct proc_dir_entry *proc_root_kcore;
 
-void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
 {
 	struct proc_dir_entry *entry;
 	entry = create_proc_entry(name, mode, NULL);
diff --git a/fs/select.c b/fs/select.c
index 05cd199a1127..b3a3a1326af6 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,7 +220,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
 			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
 			unsigned long res_in = 0, res_out = 0, res_ex = 0;
-			struct file_operations *f_op = NULL;
+			const struct file_operations *f_op = NULL;
 			struct file *file = NULL;
 
 			in = *inp++; out = *outp++; ex = *exp++;
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 8da37e29cb87..2216638962d2 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -5,13 +5,13 @@
 struct cdev {
 	struct kobject kobj;
 	struct module *owner;
-	struct file_operations *ops;
+	const struct file_operations *ops;
 	struct list_head list;
 	dev_t dev;
 	unsigned int count;
 };
 
-void cdev_init(struct cdev *, struct file_operations *);
+void cdev_init(struct cdev *, const struct file_operations *);
 
 struct cdev *cdev_alloc(void);
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4b0428e335be..176e2d371577 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -29,7 +29,7 @@ struct debugfs_blob_wrapper {
 #if defined(CONFIG_DEBUG_FS)
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
-				   struct file_operations *fops);
+				   const struct file_operations *fops);
 
 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 680d913350e7..ef355bc73714 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -496,7 +496,7 @@ struct inode {
 	struct mutex		i_mutex;
 	struct rw_semaphore	i_alloc_sem;
 	struct inode_operations	*i_op;
-	struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
+	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct super_block	*i_sb;
 	struct file_lock	*i_flock;
 	struct address_space	*i_mapping;
@@ -636,7 +636,7 @@ struct file {
 	} f_u;
 	struct dentry		*f_dentry;
 	struct vfsmount         *f_vfsmnt;
-	struct file_operations	*f_op;
+	const struct file_operations	*f_op;
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
@@ -1414,7 +1414,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
 extern int register_chrdev(unsigned int, const char *,
-			   struct file_operations *);
+			   const struct file_operations *);
 extern int unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern int chrdev_open(struct inode *, struct file *);
diff --git a/include/linux/input.h b/include/linux/input.h
index 6d4cc3c110d6..1d4e341b72e6 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -957,7 +957,7 @@ struct input_handler {
 	struct input_handle* (*connect)(struct input_handler *handler, struct input_dev *dev, struct input_device_id *id);
 	void (*disconnect)(struct input_handle *handle);
 
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	int minor;
 	char *name;
 
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 14ceebfc1efa..5b584dafb5a6 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -36,7 +36,7 @@ struct class_device;
 struct miscdevice  {
 	int minor;
 	const char *name;
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	struct list_head list;
 	struct device *dev;
 	struct class_device *class;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index b5b3197dfd4f..0d514b252454 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -84,10 +84,10 @@ void oprofile_add_trace(unsigned long eip);
  * the specified file operations.
  */
 int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops);
+	char const * name, const struct file_operations * fops);
 
 int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops, int perm);
+	char const * name, const struct file_operations * fops, int perm);
  
 /** Create a file for read/write access to an unsigned long. */
 int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index cb224cf653b1..6d03d025fcd5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -58,7 +58,7 @@ struct proc_dir_entry {
 	gid_t gid;
 	loff_t size;
 	struct inode_operations * proc_iops;
-	struct file_operations * proc_fops;
+	const struct file_operations * proc_fops;
 	get_info_t *get_info;
 	struct module *owner;
 	struct proc_dir_entry *next, *parent, *subdir;
@@ -189,7 +189,7 @@ static inline struct proc_dir_entry *proc_net_create(const char *name,
 }
 
 static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
-	mode_t mode, struct file_operations *fops)
+	mode_t mode, const struct file_operations *fops)
 {
 	struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net);
 	if (res)
diff --git a/include/linux/sound.h b/include/linux/sound.h
index 72b9af4c3fd4..f63d8342ffa3 100644
--- a/include/linux/sound.h
+++ b/include/linux/sound.h
@@ -30,12 +30,12 @@
  */
  
 struct device;
-extern int register_sound_special(struct file_operations *fops, int unit);
-extern int register_sound_special_device(struct file_operations *fops, int unit, struct device *dev);
-extern int register_sound_mixer(struct file_operations *fops, int dev);
-extern int register_sound_midi(struct file_operations *fops, int dev);
-extern int register_sound_dsp(struct file_operations *fops, int dev);
-extern int register_sound_synth(struct file_operations *fops, int dev);
+extern int register_sound_special(const struct file_operations *fops, int unit);
+extern int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev);
+extern int register_sound_mixer(const struct file_operations *fops, int dev);
+extern int register_sound_midi(const struct file_operations *fops, int dev);
+extern int register_sound_dsp(const struct file_operations *fops, int dev);
+extern int register_sound_synth(const struct file_operations *fops, int dev);
 
 extern void unregister_sound_special(int unit);
 extern void unregister_sound_mixer(int unit);
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index 0d6ed3c8bdc4..d93c24b47f3f 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -50,7 +50,7 @@ struct proc_dir_entry *	rpc_proc_register(struct rpc_stat *);
 void			rpc_proc_unregister(const char *);
 void			rpc_proc_zero(struct rpc_program *);
 struct proc_dir_entry *	svc_proc_register(struct svc_stat *,
-					  struct file_operations *);
+					  const struct file_operations *);
 void			svc_proc_unregister(const char *);
 
 void			svc_seq_show(struct seq_file *,
@@ -65,7 +65,7 @@ static inline void rpc_proc_unregister(const char *p) {}
 static inline void rpc_proc_zero(struct rpc_program *p) {}
 
 static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s,
-						       struct file_operations *f) { return NULL; }
+						       const struct file_operations *f) { return NULL; }
 static inline void svc_proc_unregister(const char *p) {}
 
 static inline void svc_seq_show(struct seq_file *seq,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 130d125fda12..e34e5e3dce52 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -615,7 +615,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	int minor_base;
 };
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2275bfec5b68..af2d6155d3fe 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -75,7 +75,7 @@ struct video_device
 	int minor;
 
 	/* device ops + callbacks */
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	void (*release)(struct video_device *vfd);
 
 
diff --git a/include/sound/core.h b/include/sound/core.h
index 144bdc2f217f..7f32c12b4a0a 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -186,7 +186,7 @@ struct snd_minor {
 	int type;			/* SNDRV_DEVICE_TYPE_XXX */
 	int card;			/* card number */
 	int device;			/* device number */
-	struct file_operations *f_ops;	/* file operations */
+	const struct file_operations *f_ops;	/* file operations */
 	void *private_data;		/* private data for f_ops->open */
 	char name[0];			/* device name (keep at the end of
 								structure) */
@@ -200,14 +200,14 @@ extern int snd_ecards_limit;
 void snd_request_card(int card);
 
 int snd_register_device(int type, struct snd_card *card, int dev,
-			struct file_operations *f_ops, void *private_data,
+			const struct file_operations *f_ops, void *private_data,
 			const char *name);
 int snd_unregister_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_minor_data(unsigned int minor, int type);
 
 #ifdef CONFIG_SND_OSSEMUL
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
-			    struct file_operations *f_ops, void *private_data,
+			    const struct file_operations *f_ops, void *private_data,
 			    const char *name);
 int snd_unregister_oss_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_oss_minor_data(unsigned int minor, int type);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index aa4158be9900..cc673dd8433f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -395,7 +395,7 @@ enum {
  */
 struct rpc_filelist {
 	char *name;
-	struct file_operations *i_fop;
+	const struct file_operations *i_fop;
 	int mode;
 };
 
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 790941e8af4d..dea529666d69 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(rpc_print_iostats);
  * Register/unregister RPC proc files
  */
 static inline struct proc_dir_entry *
-do_register(const char *name, void *data, struct file_operations *fops)
+do_register(const char *name, void *data, const struct file_operations *fops)
 {
 	struct proc_dir_entry *ent;
 
@@ -253,7 +253,7 @@ rpc_proc_unregister(const char *name)
 }
 
 struct proc_dir_entry *
-svc_proc_register(struct svc_stat *statp, struct file_operations *fops)
+svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
 {
 	return do_register(statp->program->pg_name, statp, fops);
 }
diff --git a/sound/core/init.c b/sound/core/init.c
index ad68761abba1..5bb8a8b23d51 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -223,7 +223,8 @@ int snd_card_disconnect(struct snd_card *card)
 	struct snd_monitor_file *mfile;
 	struct file *file;
 	struct snd_shutdown_f_ops *s_f_ops;
-	struct file_operations *f_ops, *old_f_ops;
+	struct file_operations *f_ops;
+	const struct file_operations *old_f_ops;
 	int err;
 
 	spin_lock(&card->files_lock);
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 4d28e5212611..108e430b5036 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -137,7 +137,7 @@ static int snd_open(struct inode *inode, struct file *file)
 {
 	unsigned int minor = iminor(inode);
 	struct snd_minor *mptr = NULL;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	int err = 0;
 
 	if (minor >= ARRAY_SIZE(snd_minors))
@@ -240,7 +240,7 @@ static int snd_kernel_minor(int type, struct snd_card *card, int dev)
  * Retrurns zero if successful, or a negative error code on failure.
  */
 int snd_register_device(int type, struct snd_card *card, int dev,
-			struct file_operations *f_ops, void *private_data,
+			const struct file_operations *f_ops, void *private_data,
 			const char *name)
 {
 	int minor;
diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
index 4023d3b406de..9055c6de9587 100644
--- a/sound/core/sound_oss.c
+++ b/sound/core/sound_oss.c
@@ -95,7 +95,7 @@ static int snd_oss_kernel_minor(int type, struct snd_card *card, int dev)
 }
 
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
-			    struct file_operations *f_ops, void *private_data,
+			    const struct file_operations *f_ops, void *private_data,
 			    const char *name)
 {
 	int minor = snd_oss_kernel_minor(type, card, dev);
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 394b53e20cb8..6f849720aef3 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -53,7 +53,7 @@
 struct sound_unit
 {
 	int unit_minor;
-	struct file_operations *unit_fops;
+	const struct file_operations *unit_fops;
 	struct sound_unit *next;
 	char name[32];
 };
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sound_class);
  *	join into it. Called with the lock asserted
  */
 
-static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, struct file_operations *fops, int index, int low, int top)
+static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, const struct file_operations *fops, int index, int low, int top)
 {
 	int n=low;
 
@@ -153,7 +153,7 @@ static DEFINE_SPINLOCK(sound_loader_lock);
  *	list. Acquires locks as needed
  */
 
-static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev)
+static int sound_insert_unit(struct sound_unit **list, const struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev)
 {
 	struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	int r;
@@ -237,7 +237,7 @@ static struct sound_unit *chains[SOUND_STEP];
  *	a negative error code is returned.
  */
  
-int register_sound_special_device(struct file_operations *fops, int unit,
+int register_sound_special_device(const struct file_operations *fops, int unit,
 				  struct device *dev)
 {
 	const int chain = unit % SOUND_STEP;
@@ -301,7 +301,7 @@ int register_sound_special_device(struct file_operations *fops, int unit,
  
 EXPORT_SYMBOL(register_sound_special_device);
 
-int register_sound_special(struct file_operations *fops, int unit)
+int register_sound_special(const struct file_operations *fops, int unit)
 {
 	return register_sound_special_device(fops, unit, NULL);
 }
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(register_sound_special);
  *	number is returned, on failure a negative error code is returned.
  */
 
-int register_sound_mixer(struct file_operations *fops, int dev)
+int register_sound_mixer(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[0], fops, dev, 0, 128,
 				 "mixer", S_IRUSR | S_IWUSR, NULL);
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(register_sound_mixer);
  *	number is returned, on failure a negative error code is returned.
  */
 
-int register_sound_midi(struct file_operations *fops, int dev)
+int register_sound_midi(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[2], fops, dev, 2, 130,
 				 "midi", S_IRUSR | S_IWUSR, NULL);
@@ -362,7 +362,7 @@ EXPORT_SYMBOL(register_sound_midi);
  *	and will always allocate them as a matching pair - eg dsp3/audio3
  */
 
-int register_sound_dsp(struct file_operations *fops, int dev)
+int register_sound_dsp(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[3], fops, dev, 3, 131,
 				 "dsp", S_IWUSR | S_IRUSR, NULL);
@@ -381,7 +381,7 @@ EXPORT_SYMBOL(register_sound_dsp);
  */
 
 
-int register_sound_synth(struct file_operations *fops, int dev)
+int register_sound_synth(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[9], fops, dev, 9, 137,
 				 "synth", S_IRUSR | S_IWUSR, NULL);
@@ -501,7 +501,7 @@ int soundcore_open(struct inode *inode, struct file *file)
 	int chain;
 	int unit = iminor(inode);
 	struct sound_unit *s;
-	struct file_operations *new_fops = NULL;
+	const struct file_operations *new_fops = NULL;
 
 	chain=unit&0x0F;
 	if(chain==4 || chain==5)	/* dsp/audio/dsp16 */
@@ -540,7 +540,7 @@ int soundcore_open(struct inode *inode, struct file *file)
 		 * switching ->f_op in the first place.
 		 */
 		int err = 0;
-		struct file_operations *old_fops = file->f_op;
+		const struct file_operations *old_fops = file->f_op;
 		file->f_op = new_fops;
 		spin_unlock(&sound_loader_lock);
 		if(file->f_op->open)
-- 
cgit v1.2.3


From 4b6f5d20b04dcbc3d888555522b90ba6d36c4106 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:42 -0800
Subject: [PATCH] Make most file operations structs in fs/ const

This is a conversion to make the various file_operations structs in fs/
const.  Basically a regexp job, with a few manual fixups

The goal is both to increase correctness (harder to accidentally write to
shared datastructures) and reducing the false sharing of cachelines with
things that get dirty in .data (while .rodata is nicely read only and thus
cache clean)

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/misc/ibmasm/ibmasmfs.c  |  2 +-
 fs/9p/v9fs_vfs.h                |  4 ++--
 fs/9p/vfs_dir.c                 |  2 +-
 fs/9p/vfs_file.c                |  2 +-
 fs/adfs/adfs.h                  |  4 ++--
 fs/adfs/dir.c                   |  2 +-
 fs/adfs/file.c                  |  2 +-
 fs/affs/affs.h                  |  6 +++---
 fs/affs/dir.c                   |  2 +-
 fs/affs/file.c                  |  2 +-
 fs/afs/dir.c                    |  2 +-
 fs/afs/internal.h               |  4 ++--
 fs/afs/mntpt.c                  |  2 +-
 fs/afs/proc.c                   | 10 +++++-----
 fs/autofs/autofs_i.h            |  2 +-
 fs/autofs/root.c                |  2 +-
 fs/autofs4/autofs_i.h           |  4 ++--
 fs/autofs4/root.c               |  4 ++--
 fs/bad_inode.c                  |  2 +-
 fs/befs/linuxvfs.c              |  2 +-
 fs/bfs/bfs.h                    |  4 ++--
 fs/bfs/dir.c                    |  2 +-
 fs/bfs/file.c                   |  2 +-
 fs/binfmt_misc.c                |  6 +++---
 fs/block_dev.c                  |  2 +-
 fs/char_dev.c                   |  2 +-
 fs/cifs/cifsfs.c                | 10 +++++-----
 fs/cifs/cifsfs.h                | 10 +++++-----
 fs/coda/dir.c                   |  2 +-
 fs/coda/file.c                  |  2 +-
 fs/coda/pioctl.c                |  2 +-
 fs/coda/psdev.c                 |  2 +-
 fs/configfs/configfs_internal.h |  6 +++---
 fs/configfs/dir.c               |  2 +-
 fs/configfs/file.c              |  2 +-
 fs/cramfs/inode.c               |  4 ++--
 fs/debugfs/file.c               |  4 ++--
 fs/devfs/base.c                 | 12 ++++++------
 fs/efs/dir.c                    |  2 +-
 fs/eventpoll.c                  |  2 +-
 fs/ext2/dir.c                   |  2 +-
 fs/ext2/ext2.h                  |  6 +++---
 fs/ext2/file.c                  |  4 ++--
 fs/ext3/dir.c                   |  2 +-
 fs/ext3/file.c                  |  2 +-
 fs/fat/dir.c                    |  2 +-
 fs/fat/file.c                   |  2 +-
 fs/fifo.c                       |  2 +-
 fs/freevxfs/vxfs_extern.h       |  2 +-
 fs/freevxfs/vxfs_lookup.c       |  2 +-
 fs/fuse/dev.c                   |  2 +-
 fs/fuse/dir.c                   |  2 +-
 fs/fuse/file.c                  |  6 +++---
 fs/fuse/fuse_i.h                |  2 +-
 fs/hfs/dir.c                    |  2 +-
 fs/hfs/hfs_fs.h                 |  2 +-
 fs/hfs/inode.c                  |  4 ++--
 fs/hfsplus/dir.c                |  2 +-
 fs/hfsplus/inode.c              |  2 +-
 fs/hostfs/hostfs_kern.c         |  4 ++--
 fs/hpfs/dir.c                   |  2 +-
 fs/hpfs/file.c                  |  2 +-
 fs/hpfs/hpfs_fn.h               |  4 ++--
 fs/hppfs/hppfs_kern.c           |  4 ++--
 fs/hugetlbfs/inode.c            |  4 ++--
 fs/inotify.c                    |  2 +-
 fs/isofs/dir.c                  |  2 +-
 fs/isofs/isofs.h                |  2 +-
 fs/jffs/inode-v23.c             |  8 ++++----
 fs/jffs2/dir.c                  |  2 +-
 fs/jffs2/file.c                 |  2 +-
 fs/jffs2/os-linux.h             |  4 ++--
 fs/jfs/file.c                   |  2 +-
 fs/jfs/jfs_inode.h              |  4 ++--
 fs/jfs/namei.c                  |  2 +-
 fs/libfs.c                      |  2 +-
 fs/minix/dir.c                  |  2 +-
 fs/minix/file.c                 |  2 +-
 fs/minix/minix.h                |  4 ++--
 fs/ncpfs/dir.c                  |  2 +-
 fs/ncpfs/file.c                 |  2 +-
 fs/nfs/dir.c                    |  2 +-
 fs/nfs/file.c                   |  2 +-
 fs/nfsd/nfsctl.c                |  4 ++--
 fs/nfsd/stats.c                 |  2 +-
 fs/ntfs/dir.c                   |  2 +-
 fs/ntfs/file.c                  |  4 ++--
 fs/ntfs/ntfs.h                  |  6 +++---
 fs/ocfs2/dlmglue.c              |  2 +-
 fs/ocfs2/file.c                 |  4 ++--
 fs/ocfs2/file.h                 |  4 ++--
 fs/openpromfs/inode.c           |  6 +++---
 fs/pipe.c                       |  6 +++---
 fs/proc/kcore.c                 |  2 +-
 fs/proc/kmsg.c                  |  2 +-
 fs/proc/vmcore.c                |  2 +-
 fs/qnx4/dir.c                   |  2 +-
 fs/qnx4/file.c                  |  2 +-
 fs/ramfs/file-mmu.c             |  2 +-
 fs/ramfs/file-nommu.c           |  2 +-
 fs/ramfs/internal.h             |  2 +-
 fs/read_write.c                 |  2 +-
 fs/reiserfs/dir.c               |  2 +-
 fs/reiserfs/file.c              |  2 +-
 fs/reiserfs/procfs.c            |  2 +-
 fs/romfs/inode.c                |  2 +-
 fs/smbfs/dir.c                  |  2 +-
 fs/smbfs/file.c                 |  2 +-
 fs/smbfs/proto.h                |  4 ++--
 fs/sysfs/bin.c                  |  2 +-
 fs/sysfs/dir.c                  |  2 +-
 fs/sysfs/file.c                 |  2 +-
 fs/sysfs/sysfs.h                |  6 +++---
 fs/sysv/dir.c                   |  2 +-
 fs/sysv/file.c                  |  2 +-
 fs/sysv/sysv.h                  |  4 ++--
 fs/udf/dir.c                    |  2 +-
 fs/udf/file.c                   |  2 +-
 fs/udf/udfdecl.h                |  4 ++--
 fs/ufs/dir.c                    |  2 +-
 fs/ufs/file.c                   |  2 +-
 fs/xfs/linux-2.6/xfs_file.c     |  6 +++---
 fs/xfs/linux-2.6/xfs_iops.h     |  6 +++---
 include/linux/coda_linux.h      |  6 +++---
 include/linux/crash_dump.h      |  2 +-
 include/linux/efs_fs.h          |  2 +-
 include/linux/ext3_fs.h         |  4 ++--
 include/linux/fs.h              | 20 ++++++++++----------
 include/linux/hugetlb.h         |  2 +-
 include/linux/msdos_fs.h        |  4 ++--
 include/linux/ncp_fs.h          |  4 ++--
 include/linux/nfs_fs.h          |  4 ++--
 include/linux/proc_fs.h         |  6 +++---
 include/linux/qnx4_fs.h         |  4 ++--
 include/linux/ramfs.h           |  2 +-
 include/linux/reiserfs_fs.h     |  4 ++--
 include/linux/ufs_fs.h          |  4 ++--
 net/nonet.c                     |  2 +-
 net/socket.c                    |  2 +-
 139 files changed, 225 insertions(+), 225 deletions(-)

(limited to 'fs')

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 5c550fcac2c4..26a230b6ff80 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -101,7 +101,7 @@ static struct super_operations ibmasmfs_s_ops = {
 	.drop_inode	= generic_delete_inode,
 };
 
-static struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
 
 static struct file_system_type ibmasmfs_type = {
 	.owner          = THIS_MODULE,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 43c9f7de0314..f867b8d3e973 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,8 +39,8 @@
 
 extern struct file_system_type v9fs_fs_type;
 extern struct address_space_operations v9fs_addr_operations;
-extern struct file_operations v9fs_file_operations;
-extern struct file_operations v9fs_dir_operations;
+extern const struct file_operations v9fs_file_operations;
+extern const struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 766f11f1215c..e32d5971039b 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -204,7 +204,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations v9fs_dir_operations = {
+const struct file_operations v9fs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = v9fs_dir_readdir,
 	.open = v9fs_file_open,
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 59e744163407..083dcfcd158e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -266,7 +266,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	return total;
 }
 
-struct file_operations v9fs_file_operations = {
+const struct file_operations v9fs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = v9fs_file_read,
 	.write = v9fs_file_write,
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index f6cd01352cc8..29217ff36d44 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -85,7 +85,7 @@ void __adfs_error(struct super_block *sb, const char *function,
 
 /* dir_*.c */
 extern struct inode_operations adfs_dir_inode_operations;
-extern struct file_operations adfs_dir_operations;
+extern const struct file_operations adfs_dir_operations;
 extern struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
@@ -94,7 +94,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
 
 /* file.c */
 extern struct inode_operations adfs_file_inode_operations;
-extern struct file_operations adfs_file_operations;
+extern const struct file_operations adfs_file_operations;
 
 static inline __u32 signed_asl(__u32 val, signed int shift)
 {
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0b4c3a028076..7b075fc397da 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -196,7 +196,7 @@ out:
 	return ret;
 }
 
-struct file_operations adfs_dir_operations = {
+const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= adfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 6af10885f9d6..1014b9f2117b 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -25,7 +25,7 @@
 
 #include "adfs.h"
 
-struct file_operations adfs_file_operations = {
+const struct file_operations adfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_mmap,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0c6799f2137a..a43a876742b8 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -192,9 +192,9 @@ extern void   affs_dir_truncate(struct inode *);
 extern struct inode_operations	 affs_file_inode_operations;
 extern struct inode_operations	 affs_dir_inode_operations;
 extern struct inode_operations   affs_symlink_inode_operations;
-extern struct file_operations	 affs_file_operations;
-extern struct file_operations	 affs_file_operations_ofs;
-extern struct file_operations	 affs_dir_operations;
+extern const struct file_operations	 affs_file_operations;
+extern const struct file_operations	 affs_file_operations_ofs;
+extern const struct file_operations	 affs_dir_operations;
 extern struct address_space_operations	 affs_symlink_aops;
 extern struct address_space_operations	 affs_aops;
 extern struct address_space_operations	 affs_aops_ofs;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 548efd0ee98c..5d9649fa1814 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -17,7 +17,7 @@
 
 static int affs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations affs_dir_operations = {
+const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= affs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index f72fb776ecdf..7076262af39b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -25,7 +25,7 @@ static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
 static int affs_file_open(struct inode *inode, struct file *filp);
 static int affs_file_release(struct inode *inode, struct file *filp);
 
-struct file_operations affs_file_operations = {
+const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5c61c24dab2a..a6dff6a4f204 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -32,7 +32,7 @@ static int afs_d_delete(struct dentry *dentry);
 static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, ino_t ino, unsigned dtype);
 
-struct file_operations afs_dir_file_operations = {
+const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.readdir	= afs_dir_readdir,
 };
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ab8f87c66319..72febdf9a35a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -64,7 +64,7 @@ extern struct cachefs_index_def afs_cache_cell_index_def;
  * dir.c
  */
 extern struct inode_operations afs_dir_inode_operations;
-extern struct file_operations afs_dir_file_operations;
+extern const struct file_operations afs_dir_file_operations;
 
 /*
  * file.c
@@ -105,7 +105,7 @@ extern struct cachefs_netfs afs_cache_netfs;
  * mntpt.c
  */
 extern struct inode_operations afs_mntpt_inode_operations;
-extern struct file_operations afs_mntpt_file_operations;
+extern const struct file_operations afs_mntpt_file_operations;
 extern struct afs_timer afs_mntpt_expiry_timer;
 extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 31ee06590de5..4e6eeb59b83c 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -32,7 +32,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
 
-struct file_operations afs_mntpt_file_operations = {
+const struct file_operations afs_mntpt_file_operations = {
 	.open		= afs_mntpt_open,
 };
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9c81b8f7eef0..101d21b6c037 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -37,7 +37,7 @@ static struct seq_operations afs_proc_cells_ops = {
 	.show	= afs_proc_cells_show,
 };
 
-static struct file_operations afs_proc_cells_fops = {
+static const struct file_operations afs_proc_cells_fops = {
 	.open		= afs_proc_cells_open,
 	.read		= seq_read,
 	.write		= afs_proc_cells_write,
@@ -53,7 +53,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 				       const char __user *buf,
 				       size_t size, loff_t *_pos);
 
-static struct file_operations afs_proc_rootcell_fops = {
+static const struct file_operations afs_proc_rootcell_fops = {
 	.open		= afs_proc_rootcell_open,
 	.read		= afs_proc_rootcell_read,
 	.write		= afs_proc_rootcell_write,
@@ -77,7 +77,7 @@ static struct seq_operations afs_proc_cell_volumes_ops = {
 	.show	= afs_proc_cell_volumes_show,
 };
 
-static struct file_operations afs_proc_cell_volumes_fops = {
+static const struct file_operations afs_proc_cell_volumes_fops = {
 	.open		= afs_proc_cell_volumes_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -101,7 +101,7 @@ static struct seq_operations afs_proc_cell_vlservers_ops = {
 	.show	= afs_proc_cell_vlservers_show,
 };
 
-static struct file_operations afs_proc_cell_vlservers_fops = {
+static const struct file_operations afs_proc_cell_vlservers_fops = {
 	.open		= afs_proc_cell_vlservers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -124,7 +124,7 @@ static struct seq_operations afs_proc_cell_servers_ops = {
 	.show	= afs_proc_cell_servers_show,
 };
 
-static struct file_operations afs_proc_cell_servers_fops = {
+static const struct file_operations afs_proc_cell_servers_fops = {
 	.open		= afs_proc_cell_servers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 990c28da5aec..a62327f1bdff 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -146,7 +146,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info
 
 extern struct inode_operations autofs_root_inode_operations;
 extern struct inode_operations autofs_symlink_inode_operations;
-extern struct file_operations autofs_root_operations;
+extern const struct file_operations autofs_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 870e2cf33016..9cac08d6a873 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -26,7 +26,7 @@ static int autofs_root_rmdir(struct inode *,struct dentry *);
 static int autofs_root_mkdir(struct inode *,struct dentry *,int);
 static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 
-struct file_operations autofs_root_operations = {
+const struct file_operations autofs_root_operations = {
 	.read		= generic_read_dir,
 	.readdir	= autofs_root_readdir,
 	.ioctl		= autofs_root_ioctl,
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c70204a61268..57c4903614e5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -176,8 +176,8 @@ extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
 extern struct inode_operations autofs4_indirect_root_inode_operations;
 extern struct inode_operations autofs4_direct_root_inode_operations;
-extern struct file_operations autofs4_dir_operations;
-extern struct file_operations autofs4_root_operations;
+extern const struct file_operations autofs4_dir_operations;
+extern const struct file_operations autofs4_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c8fe43a475e2..84e030c8ddd0 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t fil
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
-struct file_operations autofs4_root_operations = {
+const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
@@ -40,7 +40,7 @@ struct file_operations autofs4_root_operations = {
 	.ioctl		= autofs4_root_ioctl,
 };
 
-struct file_operations autofs4_dir_operations = {
+const struct file_operations autofs4_dir_operations = {
 	.open		= autofs4_dir_open,
 	.release	= autofs4_dir_close,
 	.read		= generic_read_dir,
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index e172180a1d8c..80599ae33966 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -22,7 +22,7 @@ static int return_EIO(void)
 
 #define EIO_ERROR ((void *) (return_EIO))
 
-static struct file_operations bad_file_ops =
+static const struct file_operations bad_file_ops =
 {
 	.llseek		= EIO_ERROR,
 	.aio_read	= EIO_ERROR,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 044a59587829..68ebd10f345d 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -64,7 +64,7 @@ static const struct super_operations befs_sops = {
 /* slab cache for befs_inode_info objects */
 static kmem_cache_t *befs_inode_cachep;
 
-static struct file_operations befs_dir_operations = {
+static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= befs_readdir,
 };
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1fbc53f14aba..9d791004b21c 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -49,11 +49,11 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 
 /* file.c */
 extern struct inode_operations bfs_file_inops;
-extern struct file_operations bfs_file_operations;
+extern const struct file_operations bfs_file_operations;
 extern struct address_space_operations bfs_aops;
 
 /* dir.c */
 extern struct inode_operations bfs_dir_inops;
-extern struct file_operations bfs_dir_operations;
+extern const struct file_operations bfs_dir_operations;
 
 #endif /* _FS_BFS_BFS_H */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5af928fa0449..26fad9621738 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -70,7 +70,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 	return 0;	
 }
 
-struct file_operations bfs_dir_operations = {
+const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 807723b65daf..d83cd74a2e4e 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -17,7 +17,7 @@
 #define dprintf(x...)
 #endif
 
-struct file_operations bfs_file_operations = {
+const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 6a7b730c206b..d73d75591a39 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -600,7 +600,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static struct file_operations bm_entry_operations = {
+static const struct file_operations bm_entry_operations = {
 	.read		= bm_entry_read,
 	.write		= bm_entry_write,
 };
@@ -668,7 +668,7 @@ out:
 	return count;
 }
 
-static struct file_operations bm_register_operations = {
+static const struct file_operations bm_register_operations = {
 	.write		= bm_register_write,
 };
 
@@ -715,7 +715,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 	return count;
 }
 
-static struct file_operations bm_status_operations = {
+static const struct file_operations bm_status_operations = {
 	.read		= bm_status_read,
 	.write		= bm_status_write,
 };
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 17c76182f389..af88c43043d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1087,7 +1087,7 @@ struct address_space_operations def_blk_aops = {
 	.direct_IO	= blkdev_direct_IO,
 };
 
-struct file_operations def_blk_fops = {
+const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b53dffa46bac..4e1b849f912f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -406,7 +406,7 @@ static void cdev_purge(struct cdev *cdev)
  * is contain the open that then fills in the correct operations
  * depending on the special file...
  */
-struct file_operations def_chr_fops = {
+const struct file_operations def_chr_fops = {
 	.open = chrdev_open,
 };
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6b99b51d6694..4bbc544857bc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -583,7 +583,7 @@ struct inode_operations cifs_symlink_inode_ops = {
 #endif 
 };
 
-struct file_operations cifs_file_ops = {
+const struct file_operations cifs_file_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -607,7 +607,7 @@ struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_ops = {
+const struct file_operations cifs_file_direct_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -626,7 +626,7 @@ struct file_operations cifs_file_direct_ops = {
 	.dir_notify = cifs_dir_notify,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
-struct file_operations cifs_file_nobrl_ops = {
+const struct file_operations cifs_file_nobrl_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -649,7 +649,7 @@ struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_nobrl_ops = {
+const struct file_operations cifs_file_direct_nobrl_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -668,7 +668,7 @@ struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_dir_ops = {
+const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 821a8eb22559..74f405ae4da3 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -61,10 +61,10 @@ extern struct inode_operations cifs_file_inode_ops;
 extern struct inode_operations cifs_symlink_inode_ops;
 
 /* Functions related to files and directories */
-extern struct file_operations cifs_file_ops;
-extern struct file_operations cifs_file_direct_ops; /* if directio mount */
-extern struct file_operations cifs_file_nobrl_ops;
-extern struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
+extern const struct file_operations cifs_file_ops;
+extern const struct file_operations cifs_file_direct_ops; /* if directio mount */
+extern const struct file_operations cifs_file_nobrl_ops;
+extern const struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
@@ -76,7 +76,7 @@ extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, struct dentry *, int);
 extern int cifs_flush(struct file *);
 extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
-extern struct file_operations cifs_dir_ops;
+extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
 extern int cifs_dir_notify(struct file *, unsigned long arg);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 54f76de8a686..71f2ea632e53 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -82,7 +82,7 @@ struct inode_operations coda_dir_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_dir_operations = {
+const struct file_operations coda_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= coda_readdir,
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 146a991d6eb5..7c2642431fa5 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -288,7 +288,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	return err;
 }
 
-struct file_operations coda_file_operations = {
+const struct file_operations coda_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= coda_file_read,
 	.write		= coda_file_write,
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 127714936c66..214822be87bd 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -36,7 +36,7 @@ struct inode_operations coda_ioctl_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_ioctl_operations = {
+const struct file_operations coda_ioctl_operations = {
 	.owner		= THIS_MODULE,
 	.ioctl		= coda_pioctl,
 };
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 98c74fe2e139..6c6771db36da 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -342,7 +342,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 }
 
 
-static struct file_operations coda_psdev_fops = {
+static const struct file_operations coda_psdev_fops = {
 	.owner		= THIS_MODULE,
 	.read		= coda_psdev_read,
 	.write		= coda_psdev_write,
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f70e46951b37..3f4ff7a242b9 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -72,9 +72,9 @@ extern void configfs_release_fs(void);
 
 extern struct rw_semaphore configfs_rename_sem;
 extern struct super_block * configfs_sb;
-extern struct file_operations configfs_dir_operations;
-extern struct file_operations configfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations configfs_dir_operations;
+extern const struct file_operations configfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations configfs_dir_inode_operations;
 extern struct inode_operations configfs_symlink_inode_operations;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ca60e3abef45..8ed9b06a9828 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1027,7 +1027,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations configfs_dir_operations = {
+const struct file_operations configfs_dir_operations = {
 	.open		= configfs_dir_open,
 	.release	= configfs_dir_close,
 	.llseek		= configfs_dir_lseek,
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3921920d8716..f499803743e0 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -322,7 +322,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations configfs_file_operations = {
+const struct file_operations configfs_file_operations = {
 	.read		= configfs_read_file,
 	.write		= configfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index acc1b2c10a86..9efcc3a164e8 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -29,7 +29,7 @@
 
 static struct super_operations cramfs_ops;
 static struct inode_operations cramfs_dir_inode_operations;
-static struct file_operations cramfs_directory_operations;
+static const struct file_operations cramfs_directory_operations;
 static struct address_space_operations cramfs_aops;
 
 static DEFINE_MUTEX(read_mutex);
@@ -512,7 +512,7 @@ static struct address_space_operations cramfs_aops = {
 /*
  * A directory can only readdir
  */
-static struct file_operations cramfs_directory_operations = {
+static const struct file_operations cramfs_directory_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= cramfs_readdir,
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 40c4fc973fad..66a505422e5c 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -39,7 +39,7 @@ static int default_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_file_operations = {
 	.read =		default_read_file,
 	.write =	default_write_file,
 	.open =		default_open,
@@ -213,7 +213,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
 	return count;
 }
 
-static struct file_operations fops_bool = {
+static const struct file_operations fops_bool = {
 	.read =		read_file_bool,
 	.write =	write_file_bool,
 	.open =		default_open,
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index b621521e09d4..52f5059c4f31 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -856,14 +856,14 @@ static int devfsd_close(struct inode *inode, struct file *file);
 #ifdef CONFIG_DEVFS_DEBUG
 static ssize_t stat_read(struct file *file, char __user *buf, size_t len,
 			 loff_t * ppos);
-static struct file_operations stat_fops = {
+static const struct file_operations stat_fops = {
 	.open = nonseekable_open,
 	.read = stat_read,
 };
 #endif
 
 /*  Devfs daemon file operations  */
-static struct file_operations devfsd_fops = {
+static const struct file_operations devfsd_fops = {
 	.open = nonseekable_open,
 	.read = devfsd_read,
 	.ioctl = devfsd_ioctl,
@@ -1842,8 +1842,8 @@ static int try_modload(struct devfs_entry *parent, struct fs_info *fs_info,
 
 static struct inode_operations devfs_iops;
 static struct inode_operations devfs_dir_iops;
-static struct file_operations devfs_fops;
-static struct file_operations devfs_dir_fops;
+static const struct file_operations devfs_fops;
+static const struct file_operations devfs_dir_fops;
 static struct inode_operations devfs_symlink_iops;
 
 static int devfs_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -2061,11 +2061,11 @@ static int devfs_open(struct inode *inode, struct file *file)
 	return err;
 }				/*  End Function devfs_open  */
 
-static struct file_operations devfs_fops = {
+static const struct file_operations devfs_fops = {
 	.open = devfs_open,
 };
 
-static struct file_operations devfs_dir_fops = {
+static const struct file_operations devfs_dir_fops = {
 	.read = generic_read_dir,
 	.readdir = devfs_readdir,
 };
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 777c614ff360..17f5b2d3c16a 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -10,7 +10,7 @@
 
 static int efs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations efs_dir_operations = {
+const struct file_operations efs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= efs_readdir,
 };
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e067a06c6464..242fe1a66ce5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -290,7 +290,7 @@ static kmem_cache_t *pwq_cache __read_mostly;
 static struct vfsmount *eventpoll_mnt __read_mostly;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+static const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0165388c425c..d672aa9f4061 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -658,7 +658,7 @@ not_empty:
 	return 0;
 }
 
-struct file_operations ext2_dir_operations = {
+const struct file_operations ext2_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext2_readdir,
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 11035ac7986f..9f74a62be555 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -154,12 +154,12 @@ extern void ext2_write_super (struct super_block *);
  */
 
 /* dir.c */
-extern struct file_operations ext2_dir_operations;
+extern const struct file_operations ext2_dir_operations;
 
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
-extern struct file_operations ext2_file_operations;
-extern struct file_operations ext2_xip_file_operations;
+extern const struct file_operations ext2_file_operations;
+extern const struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a484412fc782..509cceca04db 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,7 +39,7 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
  */
-struct file_operations ext2_file_operations = {
+const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -56,7 +56,7 @@ struct file_operations ext2_file_operations = {
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
-struct file_operations ext2_xip_file_operations = {
+const struct file_operations ext2_xip_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 38bd3f6ec147..f37528ed222e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -39,7 +39,7 @@ static int ext3_dx_readdir(struct file * filp,
 static int ext3_release_dir (struct inode * inode,
 				struct file * filp);
 
-struct file_operations ext3_dir_operations = {
+const struct file_operations ext3_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 59098ea56711..783a796220bb 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -105,7 +105,7 @@ force_commit:
 	return ret;
 }
 
-struct file_operations ext3_file_operations = {
+const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4095bc149eb1..698b85bb1dd4 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -741,7 +741,7 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 	return ret;
 }
 
-struct file_operations fat_dir_operations = {
+const struct file_operations fat_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= fat_readdir,
 	.ioctl		= fat_dir_ioctl,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 88aa1ae13f9f..1ee25232e6af 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -112,7 +112,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 	}
 }
 
-struct file_operations fat_file_operations = {
+const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fifo.c b/fs/fifo.c
index d13fcd3ec803..889f722ee36d 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -145,6 +145,6 @@ err_nocleanup:
  * is contain the open that then fills in the correct operations
  * depending on the access mode of the file...
  */
-struct file_operations def_fifo_fops = {
+const struct file_operations def_fifo_fops = {
 	.open		= fifo_open,	/* will set read or write pipe_fops */
 };
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 927acf70c591..1cf1fe8466a2 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void			vxfs_clear_inode(struct inode *);
 
 /* vxfs_lookup.c */
 extern struct inode_operations	vxfs_dir_inode_ops;
-extern struct file_operations	vxfs_dir_operations;
+extern const struct file_operations	vxfs_dir_operations;
 
 /* vxfs_olt.c */
 extern int			vxfs_read_olt(struct super_block *, u_long);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 554eb455722c..29cce456c7ce 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -56,7 +56,7 @@ struct inode_operations vxfs_dir_inode_ops = {
 	.lookup =		vxfs_lookup,
 };
 
-struct file_operations vxfs_dir_operations = {
+const struct file_operations vxfs_dir_operations = {
 	.readdir =		vxfs_readdir,
 };
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0c9a2ee54c91..23d1f52eb1b8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -922,7 +922,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations fuse_dev_operations = {
+const struct file_operations fuse_dev_operations = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= fuse_dev_read,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c72a8a97935c..256355b80256 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1170,7 +1170,7 @@ static struct inode_operations fuse_dir_inode_operations = {
 	.removexattr	= fuse_removexattr,
 };
 
-static struct file_operations fuse_dir_operations = {
+static const struct file_operations fuse_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= fuse_readdir,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6f05379b0a0d..975f2697e866 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
-static struct file_operations fuse_direct_io_file_operations;
+static const struct file_operations fuse_direct_io_file_operations;
 
 static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 			  struct fuse_open_out *outargp)
@@ -611,7 +611,7 @@ static int fuse_set_page_dirty(struct page *page)
 	return 0;
 }
 
-static struct file_operations fuse_file_operations = {
+static const struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -623,7 +623,7 @@ static struct file_operations fuse_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
-static struct file_operations fuse_direct_io_file_operations = {
+static const struct file_operations fuse_direct_io_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= fuse_direct_read,
 	.write		= fuse_direct_write,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4a83adfec968..a16a04fcf41e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -346,7 +346,7 @@ static inline u64 get_node_id(struct inode *inode)
 }
 
 /** Device operations */
-extern struct file_operations fuse_dev_operations;
+extern const struct file_operations fuse_dev_operations;
 
 /**
  * This is the single global spinlock which protects FUSE's structures
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 534e5a7480ef..7cd8cc03aea7 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -313,7 +313,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return res;
 }
 
-struct file_operations hfs_dir_operations = {
+const struct file_operations hfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfs_readdir,
 	.llseek		= generic_file_llseek,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 18ce47ab1b71..3ed8663a8db1 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -169,7 +169,7 @@ extern int hfs_cat_move(u32, struct inode *, struct qstr *,
 extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
 
 /* dir.c */
-extern struct file_operations hfs_dir_operations;
+extern const struct file_operations hfs_dir_operations;
 extern struct inode_operations hfs_dir_inode_operations;
 
 /* extent.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2c564701724f..2d4ced22201b 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -17,7 +17,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static struct file_operations hfs_file_operations;
+static const struct file_operations hfs_file_operations;
 static struct inode_operations hfs_file_inode_operations;
 
 /*================ Variable-like macros ================*/
@@ -601,7 +601,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 }
 
 
-static struct file_operations hfs_file_operations = {
+static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 01a6fe3a395c..1f9ece0de326 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -483,7 +483,7 @@ struct inode_operations hfsplus_dir_inode_operations = {
 	.rename		= hfsplus_rename,
 };
 
-struct file_operations hfsplus_dir_operations = {
+const struct file_operations hfsplus_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfsplus_readdir,
 	.ioctl          = hfsplus_ioctl,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9fbe4d2aeece..acf66dba3e01 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -280,7 +280,7 @@ static struct inode_operations hfsplus_file_inode_operations = {
 	.listxattr	= hfsplus_listxattr,
 };
 
-static struct file_operations hfsplus_file_operations = {
+static const struct file_operations hfsplus_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b3ad0bd0312f..bf0f8e16e433 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -384,7 +384,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
 }
 
-static struct file_operations hostfs_file_fops = {
+static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.sendfile	= generic_file_sendfile,
@@ -399,7 +399,7 @@ static struct file_operations hostfs_file_fops = {
 	.fsync		= hostfs_fsync,
 };
 
-static struct file_operations hostfs_dir_fops = {
+static const struct file_operations hostfs_dir_fops = {
 	.llseek		= generic_file_llseek,
 	.readdir	= hostfs_readdir,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 5591f9623aa2..ecc9180645ae 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -310,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	return ERR_PTR(-ENOENT);
 }
 
-struct file_operations hpfs_dir_ops =
+const struct file_operations hpfs_dir_ops =
 {
 	.llseek		= hpfs_dir_lseek,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7c995ac4081b..d3b9fffe45a1 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 	return retval;
 }
 
-struct file_operations hpfs_file_ops =
+const struct file_operations hpfs_file_ops =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 4c6473ab3b34..29b7a3e55173 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -240,7 +240,7 @@ void hpfs_set_dentry_operations(struct dentry *);
 /* dir.c */
 
 struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-extern struct file_operations hpfs_dir_ops;
+extern const struct file_operations hpfs_dir_ops;
 
 /* dnode.c */
 
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 /* file.c */
 
 int hpfs_file_fsync(struct file *, struct dentry *, int);
-extern struct file_operations hpfs_file_ops;
+extern const struct file_operations hpfs_file_ops;
 extern struct inode_operations hpfs_file_iops;
 extern struct address_space_operations hpfs_aops;
 
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index a44dc5897399..2ba20cdb5baa 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -558,7 +558,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	return(default_llseek(file, off, where));
 }
 
-static struct file_operations hppfs_file_fops = {
+static const struct file_operations hppfs_file_fops = {
 	.owner		= NULL,
 	.llseek		= hppfs_llseek,
 	.read		= hppfs_read,
@@ -609,7 +609,7 @@ static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return(0);
 }
 
-static struct file_operations hppfs_dir_fops = {
+static const struct file_operations hppfs_dir_fops = {
 	.owner		= NULL,
 	.readdir	= hppfs_readdir,
 	.open		= hppfs_dir_open,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 25fa8bba8cb5..3a5b4e923455 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -35,7 +35,7 @@
 
 static struct super_operations hugetlbfs_ops;
 static struct address_space_operations hugetlbfs_aops;
-struct file_operations hugetlbfs_file_operations;
+const struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
 static struct inode_operations hugetlbfs_inode_operations;
 
@@ -566,7 +566,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 		inode_init_once(&ei->vfs_inode);
 }
 
-struct file_operations hugetlbfs_file_operations = {
+const struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
diff --git a/fs/inotify.c b/fs/inotify.c
index f48a3dae0712..367c487c014b 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -920,7 +920,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 	return ret;
 }
 
-static struct file_operations inotify_fops = {
+static const struct file_operations inotify_fops = {
 	.poll           = inotify_poll,
 	.read           = inotify_read,
 	.release        = inotify_release,
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 7901ac9f97ab..5440ea292c69 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -16,7 +16,7 @@
 
 static int isofs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations isofs_dir_operations =
+const struct file_operations isofs_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= isofs_readdir,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 439a19b1bf3e..b87ba066f5e7 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -175,6 +175,6 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 }
 
 extern struct inode_operations isofs_dir_inode_operations;
-extern struct file_operations isofs_dir_operations;
+extern const struct file_operations isofs_dir_operations;
 extern struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 5a4519e834da..020cc097c539 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -55,9 +55,9 @@
 static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
 
 static struct super_operations jffs_ops;
-static struct file_operations jffs_file_operations;
+static const struct file_operations jffs_file_operations;
 static struct inode_operations jffs_file_inode_operations;
-static struct file_operations jffs_dir_operations;
+static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static struct address_space_operations jffs_address_operations;
 
@@ -1629,7 +1629,7 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
 }
 
 
-static struct file_operations jffs_file_operations =
+static const struct file_operations jffs_file_operations =
 {
 	.open		= generic_file_open,
 	.llseek		= generic_file_llseek,
@@ -1649,7 +1649,7 @@ static struct inode_operations jffs_file_inode_operations =
 };
 
 
-static struct file_operations jffs_dir_operations =
+static const struct file_operations jffs_dir_operations =
 {
 	.readdir	= jffs_readdir,
 };
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index a7bf9cb2567f..8bc7a5018e40 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -37,7 +37,7 @@ static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
 
-struct file_operations jffs2_dir_operations =
+const struct file_operations jffs2_dir_operations =
 {
 	.read =		generic_read_dir,
 	.readdir =	jffs2_readdir,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 935f273dc57b..9f4171213e58 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -38,7 +38,7 @@ int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return 0;
 }
 
-struct file_operations jffs2_file_operations =
+const struct file_operations jffs2_file_operations =
 {
 	.llseek =	generic_file_llseek,
 	.open =		generic_file_open,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 59e7a393200c..d307cf548625 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -159,11 +159,11 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c);
 void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
 
 /* dir.c */
-extern struct file_operations jffs2_dir_operations;
+extern const struct file_operations jffs2_dir_operations;
 extern struct inode_operations jffs2_dir_inode_operations;
 
 /* file.c */
-extern struct file_operations jffs2_file_operations;
+extern const struct file_operations jffs2_file_operations;
 extern struct inode_operations jffs2_file_inode_operations;
 extern struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, struct dentry *, int);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index e1ac6e497e2b..1c9745be5ada 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,7 +100,7 @@ struct inode_operations jfs_file_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_file_operations = {
+const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
 	.write		= generic_file_write,
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 095d471b9f9a..c30072674464 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,9 +35,9 @@ extern void jfs_set_inode_flags(struct inode *);
 
 extern struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
-extern struct file_operations jfs_dir_operations;
+extern const struct file_operations jfs_dir_operations;
 extern struct inode_operations jfs_file_inode_operations;
-extern struct file_operations jfs_file_operations;
+extern const struct file_operations jfs_file_operations;
 extern struct inode_operations jfs_symlink_inode_operations;
 extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 309cee575f7d..09ea03f62277 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1519,7 +1519,7 @@ struct inode_operations jfs_dir_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_dir_operations = {
+const struct file_operations jfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= jfs_readdir,
 	.fsync		= jfs_fsync,
diff --git a/fs/libfs.c b/fs/libfs.c
index 4fdeaceb892c..7145ba7a48d0 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -179,7 +179,7 @@ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t
 	return -EISDIR;
 }
 
-struct file_operations simple_dir_operations = {
+const struct file_operations simple_dir_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.llseek		= dcache_dir_lseek,
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 732502aabc05..69224d1fe043 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -14,7 +14,7 @@ typedef struct minix_dir_entry minix_dirent;
 
 static int minix_readdir(struct file *, void *, filldir_t);
 
-struct file_operations minix_dir_operations = {
+const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
 	.fsync		= minix_sync_file,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f1d77acb3f01..420b32882a10 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -15,7 +15,7 @@
  */
 int minix_sync_file(struct file *, struct dentry *, int);
 
-struct file_operations minix_file_operations = {
+const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e42a8bb89001..c55b77cdcc8e 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -81,8 +81,8 @@ extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern struct inode_operations minix_file_inode_operations;
 extern struct inode_operations minix_dir_inode_operations;
-extern struct file_operations minix_file_operations;
-extern struct file_operations minix_dir_operations;
+extern const struct file_operations minix_file_operations;
+extern const struct file_operations minix_dir_operations;
 extern struct dentry_operations minix_dentry_operations;
 
 static inline struct minix_sb_info *minix_sb(struct super_block *sb)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index cfd76f431dc0..f0860c602d8b 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -49,7 +49,7 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *);
 #define ncp_symlink NULL
 #endif
 		      
-struct file_operations ncp_dir_operations =
+const struct file_operations ncp_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= ncp_readdir,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index ebdad8f6398f..e6b7c67cf057 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -283,7 +283,7 @@ static int ncp_release(struct inode *inode, struct file *file) {
 	return 0;
 }
 
-struct file_operations ncp_file_operations =
+const struct file_operations ncp_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= ncp_file_read,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06c48b385c94..a23f34894167 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -54,7 +54,7 @@ static int nfs_rename(struct inode *, struct dentry *,
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
-struct file_operations nfs_dir_operations = {
+const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index dee49a0cb995..f1df2c8d9259 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -49,7 +49,7 @@ static int nfs_check_flags(int flags);
 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 
-struct file_operations nfs_file_operations = {
+const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c8960aff0968..3ef017b3b5bd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -134,7 +134,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size
 	return simple_transaction_read(file, buf, size, pos);
 }
 
-static struct file_operations transaction_ops = {
+static const struct file_operations transaction_ops = {
 	.write		= nfsctl_transaction_write,
 	.read		= nfsctl_transaction_read,
 	.release	= simple_transaction_release,
@@ -146,7 +146,7 @@ static int exports_open(struct inode *inode, struct file *file)
 	return seq_open(file, &nfs_exports_op);
 }
 
-static struct file_operations exports_operations = {
+static const struct file_operations exports_operations = {
 	.open		= exports_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1cf955bcc526..57265d563804 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -80,7 +80,7 @@ static int nfsd_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, nfsd_proc_show, NULL);
 }
 
-static struct file_operations nfsd_proc_fops = {
+static const struct file_operations nfsd_proc_fops = {
 	.owner = THIS_MODULE,
 	.open = nfsd_proc_open,
 	.read  = seq_read,
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9d9ed3fe371d..d1e2c6f9f05e 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1553,7 +1553,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_dir_ops = {
+const struct file_operations ntfs_dir_ops = {
 	.llseek		= generic_file_llseek,	/* Seek inside directory. */
 	.read		= generic_read_dir,	/* Return -EISDIR. */
 	.readdir	= ntfs_readdir,		/* Read directory contents. */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f5d057e4acc2..c63a83e8da98 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_file_ops = {
+const struct file_operations ntfs_file_ops = {
 	.llseek		= generic_file_llseek,	 /* Seek inside file. */
 	.read		= generic_file_read,	 /* Read from file. */
 	.aio_read	= generic_file_aio_read, /* Async read from file. */
@@ -2337,6 +2337,6 @@ struct inode_operations ntfs_file_inode_ops = {
 #endif /* NTFS_RW */
 };
 
-struct file_operations ntfs_empty_file_ops = {};
+const struct file_operations ntfs_empty_file_ops = {};
 
 struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 166142960b53..bf7b3d7c0930 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -60,13 +60,13 @@ extern struct kmem_cache *ntfs_index_ctx_cache;
 extern struct address_space_operations ntfs_aops;
 extern struct address_space_operations ntfs_mst_aops;
 
-extern struct  file_operations ntfs_file_ops;
+extern const struct  file_operations ntfs_file_ops;
 extern struct inode_operations ntfs_file_inode_ops;
 
-extern struct  file_operations ntfs_dir_ops;
+extern const struct  file_operations ntfs_dir_ops;
 extern struct inode_operations ntfs_dir_inode_ops;
 
-extern struct  file_operations ntfs_empty_file_ops;
+extern const struct  file_operations ntfs_empty_file_ops;
 extern struct inode_operations ntfs_empty_inode_ops;
 
 extern struct export_operations ntfs_export_ops;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 84f153aca692..64cd52860c87 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2017,7 +2017,7 @@ out:
 	return ret;
 }
 
-static struct file_operations ocfs2_dlm_debug_fops = {
+static const struct file_operations ocfs2_dlm_debug_fops = {
 	.open =		ocfs2_dlm_debug_open,
 	.release =	ocfs2_dlm_debug_release,
 	.read =		seq_read,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4b4cbadd5838..34e903a6a46b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ struct inode_operations ocfs2_special_file_iops = {
 	.getattr	= ocfs2_getattr,
 };
 
-struct file_operations ocfs2_fops = {
+const struct file_operations ocfs2_fops = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.sendfile	= generic_file_sendfile,
@@ -1188,7 +1188,7 @@ struct file_operations ocfs2_fops = {
 	.aio_write	= ocfs2_file_aio_write,
 };
 
-struct file_operations ocfs2_dops = {
+const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index a5ea33b24060..740c9e7ca599 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -26,8 +26,8 @@
 #ifndef OCFS2_FILE_H
 #define OCFS2_FILE_H
 
-extern struct file_operations ocfs2_fops;
-extern struct file_operations ocfs2_dops;
+extern const struct file_operations ocfs2_fops;
+extern const struct file_operations ocfs2_dops;
 extern struct inode_operations ocfs2_file_iops;
 extern struct inode_operations ocfs2_special_file_iops;
 struct ocfs2_alloc_context;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index aeb0106890e4..0f14276a2e51 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -581,17 +581,17 @@ int property_release (struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file_operations openpromfs_prop_ops = {
+static const struct file_operations openpromfs_prop_ops = {
 	.read		= property_read,
 	.write		= property_write,
 	.release	= property_release,
 };
 
-static struct file_operations openpromfs_nodenum_ops = {
+static const struct file_operations openpromfs_nodenum_ops = {
 	.read		= nodenum_read,
 };
 
-static struct file_operations openprom_operations = {
+static const struct file_operations openprom_operations = {
 	.read		= generic_read_dir,
 	.readdir	= openpromfs_readdir,
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index 4384c9290943..e2f4f1d9ffc2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -568,7 +568,7 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
  */
-struct file_operations read_fifo_fops = {
+const struct file_operations read_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
@@ -580,7 +580,7 @@ struct file_operations read_fifo_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-struct file_operations write_fifo_fops = {
+const struct file_operations write_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= pipe_write,
@@ -592,7 +592,7 @@ struct file_operations write_fifo_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-struct file_operations rdwr_fifo_fops = {
+const struct file_operations rdwr_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index adc2cd95169a..17f6e8fa1397 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -31,7 +31,7 @@ static int open_kcore(struct inode * inode, struct file * filp)
 
 static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
 
-struct file_operations proc_kcore_operations = {
+const struct file_operations proc_kcore_operations = {
 	.read		= read_kcore,
 	.open		= open_kcore,
 };
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 10d37bf25206..ff3b90b56e9d 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -47,7 +47,7 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 }
 
 
-struct file_operations proc_kmsg_operations = {
+const struct file_operations proc_kmsg_operations = {
 	.read		= kmsg_read,
 	.poll		= kmsg_poll,
 	.open		= kmsg_open,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 4063fb32f78c..7efa73d44c9a 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -172,7 +172,7 @@ static int open_vmcore(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations proc_vmcore_operations = {
+const struct file_operations proc_vmcore_operations = {
 	.read		= read_vmcore,
 	.open		= open_vmcore,
 };
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7a8f5595c26f..9031948fefd0 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -81,7 +81,7 @@ out:
 	return 0;
 }
 
-struct file_operations qnx4_dir_operations =
+const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index c33963fded9e..62af4b1348bd 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULL's here: the current defaults are ok for
  * the qnx4 filesystem.
  */
-struct file_operations qnx4_file_operations =
+const struct file_operations qnx4_file_operations =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ada2095b9ac..00a933eb820c 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -32,7 +32,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write	= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.read		= generic_file_read,
 	.write		= generic_file_write,
 	.mmap		= generic_file_mmap,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b1ca234068f6..f443a84b98a5 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -33,7 +33,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write		= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.mmap			= ramfs_nommu_mmap,
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
 	.read			= generic_file_read,
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 272c8a7120b0..313237631b49 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,5 @@
 
 
 extern struct address_space_operations ramfs_aops;
-extern struct file_operations ramfs_file_operations;
+extern const struct file_operations ramfs_file_operations;
 extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 34b1bf259efd..6256ca81a718 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -19,7 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-struct file_operations generic_ro_fops = {
+const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_readonly_mmap,
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d71ac6579289..973c819f8033 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -18,7 +18,7 @@ static int reiserfs_readdir(struct file *, void *, filldir_t);
 static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 			      int datasync);
 
-struct file_operations reiserfs_dir_operations = {
+const struct file_operations reiserfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = reiserfs_readdir,
 	.fsync = reiserfs_dir_fsync,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index d0c1e865963e..010094d14da6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1566,7 +1566,7 @@ static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
 	return generic_file_aio_write(iocb, buf, count, pos);
 }
 
-struct file_operations reiserfs_file_operations = {
+const struct file_operations reiserfs_file_operations = {
 	.read = generic_file_read,
 	.write = reiserfs_file_write,
 	.ioctl = reiserfs_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index ef6caed9336b..731688e1cfe3 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -470,7 +470,7 @@ static int r_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static struct file_operations r_file_operations = {
+static const struct file_operations r_file_operations = {
 	.open = r_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c2fc424d7d5c..9b9eda7b335c 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -463,7 +463,7 @@ static struct address_space_operations romfs_aops = {
 	.readpage = romfs_readpage
 };
 
-static struct file_operations romfs_dir_operations = {
+static const struct file_operations romfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= romfs_readdir,
 };
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 0424d06b147e..34c7a11d91f0 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -34,7 +34,7 @@ static int smb_rename(struct inode *, struct dentry *,
 static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
 static int smb_link(struct dentry *, struct inode *, struct dentry *);
 
-struct file_operations smb_dir_operations =
+const struct file_operations smb_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= smb_readdir,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 7042e62726a4..c56bd99a9701 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -401,7 +401,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return error;
 }
 
-struct file_operations smb_file_operations =
+const struct file_operations smb_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= smb_file_read,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index e866ec8660d0..47664597e6b1 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -35,7 +35,7 @@ extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const
 extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
 extern void smb_install_null_ops(struct smb_ops *ops);
 /* dir.c */
-extern struct file_operations smb_dir_operations;
+extern const struct file_operations smb_dir_operations;
 extern struct inode_operations smb_dir_inode_operations;
 extern struct inode_operations smb_dir_inode_operations_unix;
 extern void smb_new_dentry(struct dentry *dentry);
@@ -64,7 +64,7 @@ extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
 /* file.c */
 extern struct address_space_operations smb_file_aops;
-extern struct file_operations smb_file_operations;
+extern const struct file_operations smb_file_operations;
 extern struct inode_operations smb_file_inode_operations;
 /* ioctl.c */
 extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 78899eeab974..c16a93c353c0 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -163,7 +163,7 @@ static int release(struct inode * inode, struct file * file)
 	return 0;
 }
 
-struct file_operations bin_fops = {
+const struct file_operations bin_fops = {
 	.read		= read,
 	.write		= write,
 	.mmap		= mmap,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 9ee956864445..f26880a4785e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -503,7 +503,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations sysfs_dir_operations = {
+const struct file_operations sysfs_dir_operations = {
 	.open		= sysfs_dir_open,
 	.release	= sysfs_dir_close,
 	.llseek		= sysfs_dir_lseek,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5e83e7246788..830f76fa098c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -348,7 +348,7 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations sysfs_file_operations = {
+const struct file_operations sysfs_file_operations = {
 	.read		= sysfs_read_file,
 	.write		= sysfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index cf11d5b789d9..32958a7c50e9 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -21,9 +21,9 @@ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
 extern struct super_block * sysfs_sb;
-extern struct file_operations sysfs_dir_operations;
-extern struct file_operations sysfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations sysfs_dir_operations;
+extern const struct file_operations sysfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations sysfs_dir_inode_operations;
 extern struct inode_operations sysfs_symlink_inode_operations;
 
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index cce8b05cba5a..8c66e9270dd6 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -20,7 +20,7 @@
 
 static int sysv_readdir(struct file *, void *, filldir_t);
 
-struct file_operations sysv_dir_operations = {
+const struct file_operations sysv_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysv_readdir,
 	.fsync		= sysv_sync_file,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index da69abc06240..a59e303135fa 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULLs here: the current defaults are OK for
  * the coh filesystem.
  */
-struct file_operations sysv_file_operations = {
+const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index b7f9b4a42aab..393a480e4deb 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -159,8 +159,8 @@ extern ino_t sysv_inode_by_name(struct dentry *);
 extern struct inode_operations sysv_file_inode_operations;
 extern struct inode_operations sysv_dir_inode_operations;
 extern struct inode_operations sysv_fast_symlink_inode_operations;
-extern struct file_operations sysv_file_operations;
-extern struct file_operations sysv_dir_operations;
+extern const struct file_operations sysv_file_operations;
+extern const struct file_operations sysv_dir_operations;
 extern struct address_space_operations sysv_aops;
 extern struct super_operations sysv_sops;
 extern struct dentry_operations sysv_dentry_operations;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f5222527fe39..8c28efa3b8ff 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -42,7 +42,7 @@ static int do_udf_readdir(struct inode *, struct file *, filldir_t, void *);
 
 /* readdir and lookup functions */
 
-struct file_operations udf_dir_operations = {
+const struct file_operations udf_dir_operations = {
 	.read			= generic_read_dir,
 	.readdir		= udf_readdir,
 	.ioctl			= udf_ioctl,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index a6f2acc1f15c..e34b00e303f1 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -248,7 +248,7 @@ static int udf_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations udf_file_operations = {
+const struct file_operations udf_file_operations = {
 	.read			= generic_file_read,
 	.ioctl			= udf_ioctl,
 	.open			= generic_file_open,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1d5800e0cbe7..023e19ba5a2e 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -44,9 +44,9 @@ struct buffer_head;
 struct super_block;
 
 extern struct inode_operations udf_dir_inode_operations;
-extern struct file_operations udf_dir_operations;
+extern const struct file_operations udf_dir_operations;
 extern struct inode_operations udf_file_inode_operations;
-extern struct file_operations udf_file_operations;
+extern const struct file_operations udf_file_operations;
 extern struct address_space_operations udf_aops;
 extern struct address_space_operations udf_adinicb_aops;
 extern struct address_space_operations udf_symlink_aops;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 7c10c68902ae..1a561202d3f4 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -620,7 +620,7 @@ int ufs_empty_dir (struct inode * inode)
 	return 1;
 }
 
-struct file_operations ufs_dir_operations = {
+const struct file_operations ufs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ufs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 62ad481810ef..312fd3f86313 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -31,7 +31,7 @@
  * the ufs filesystem.
  */
  
-struct file_operations ufs_file_operations = {
+const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 185567a6a561..85997b1205f5 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -528,7 +528,7 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-struct file_operations xfs_file_operations = {
+const struct file_operations xfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -550,7 +550,7 @@ struct file_operations xfs_file_operations = {
 #endif
 };
 
-struct file_operations xfs_invis_file_operations = {
+const struct file_operations xfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -570,7 +570,7 @@ struct file_operations xfs_invis_file_operations = {
 };
 
 
-struct file_operations xfs_dir_file_operations = {
+const struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
 	.readdir	= xfs_file_readdir,
 	.unlocked_ioctl	= xfs_file_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index a8417d7af5f9..ad6173da5678 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,9 +22,9 @@ extern struct inode_operations xfs_inode_operations;
 extern struct inode_operations xfs_dir_inode_operations;
 extern struct inode_operations xfs_symlink_inode_operations;
 
-extern struct file_operations xfs_file_operations;
-extern struct file_operations xfs_dir_file_operations;
-extern struct file_operations xfs_invis_file_operations;
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+extern const struct file_operations xfs_invis_file_operations;
 
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index cc621ec409d8..b3ecf8f71d97 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -30,9 +30,9 @@ extern struct inode_operations coda_ioctl_inode_operations;
 extern struct address_space_operations coda_file_aops;
 extern struct address_space_operations coda_symlink_aops;
 
-extern struct file_operations coda_dir_operations;
-extern struct file_operations coda_file_operations;
-extern struct file_operations coda_ioctl_operations;
+extern const struct file_operations coda_dir_operations;
+extern const struct file_operations coda_file_operations;
+extern const struct file_operations coda_ioctl_operations;
 
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 534d750d922d..32503657f14f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -11,7 +11,7 @@
 extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
-extern struct file_operations proc_vmcore_operations;
+extern const struct file_operations proc_vmcore_operations;
 extern struct proc_dir_entry *proc_vmcore;
 
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h
index 28f368c526fb..fbfa6b52e2fb 100644
--- a/include/linux/efs_fs.h
+++ b/include/linux/efs_fs.h
@@ -37,7 +37,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb)
 struct statfs;
 
 extern struct inode_operations efs_dir_inode_operations;
-extern struct file_operations efs_dir_operations;
+extern const struct file_operations efs_dir_operations;
 extern struct address_space_operations efs_symlink_aops;
 
 extern void efs_read_inode(struct inode *);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 8bb4f842cded..3ade6a4e3bdd 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -833,11 +833,11 @@ do {								\
  */
 
 /* dir.c */
-extern struct file_operations ext3_dir_operations;
+extern const struct file_operations ext3_dir_operations;
 
 /* file.c */
 extern struct inode_operations ext3_file_inode_operations;
-extern struct file_operations ext3_file_operations;
+extern const struct file_operations ext3_file_operations;
 
 /* namei.c */
 extern struct inode_operations ext3_dir_inode_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ef355bc73714..408fe89498f4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1390,11 +1390,11 @@ extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
-extern struct file_operations def_blk_fops;
+extern const struct file_operations def_blk_fops;
 extern struct address_space_operations def_blk_aops;
-extern struct file_operations def_chr_fops;
-extern struct file_operations bad_sock_fops;
-extern struct file_operations def_fifo_fops;
+extern const struct file_operations def_chr_fops;
+extern const struct file_operations bad_sock_fops;
+extern const struct file_operations def_fifo_fops;
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1444,9 +1444,9 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
 extern void make_bad_inode(struct inode *);
 extern int is_bad_inode(struct inode *);
 
-extern struct file_operations read_fifo_fops;
-extern struct file_operations write_fifo_fops;
-extern struct file_operations rdwr_fifo_fops;
+extern const struct file_operations read_fifo_fops;
+extern const struct file_operations write_fifo_fops;
+extern const struct file_operations rdwr_fifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
@@ -1688,7 +1688,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
 				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
 
-extern struct file_operations generic_ro_fops;
+extern const struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
 
@@ -1744,9 +1744,9 @@ extern int simple_commit_write(struct file *file, struct page *page,
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
-extern struct file_operations simple_dir_operations;
+extern const struct file_operations simple_dir_operations;
 extern struct inode_operations simple_dir_inode_operations;
-struct tree_descr { char *name; struct file_operations *ops; int mode; };
+struct tree_descr { char *name; const struct file_operations *ops; int mode; };
 struct dentry *d_alloc_name(struct dentry *, const char *);
 extern int simple_fill_super(struct super_block *, int, struct tree_descr *);
 extern int simple_pin_fs(char *name, struct vfsmount **mount, int *count);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d6f1019625af..4c5e610fe442 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -154,7 +154,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
-extern struct file_operations hugetlbfs_file_operations;
+extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_zero_setup(size_t);
 int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 53cee1581650..d9035c73e5d1 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -334,7 +334,7 @@ extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
 		    unsigned long *mapped_blocks);
 
 /* fat/dir.c */
-extern struct file_operations fat_dir_operations;
+extern const struct file_operations fat_dir_operations;
 extern int fat_search_long(struct inode *inode, const unsigned char *name,
 			   int name_len, struct fat_slot_info *sinfo);
 extern int fat_dir_empty(struct inode *dir);
@@ -397,7 +397,7 @@ extern int fat_count_free_clusters(struct super_block *sb);
 /* fat/file.c */
 extern int fat_generic_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg);
-extern struct file_operations fat_file_operations;
+extern const struct file_operations fat_file_operations;
 extern struct inode_operations fat_file_inode_operations;
 extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index e01342568530..96dc237b8f03 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -209,7 +209,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *);
 
 /* linux/fs/ncpfs/dir.c */
 extern struct inode_operations ncp_dir_inode_operations;
-extern struct file_operations ncp_dir_operations;
+extern const struct file_operations ncp_dir_operations;
 int ncp_conn_logged_in(struct super_block *);
 int ncp_date_dos2unix(__le16 time, __le16 date);
 void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date);
@@ -230,7 +230,7 @@ void ncp_unlock_server(struct ncp_server *server);
 
 /* linux/fs/ncpfs/file.c */
 extern struct inode_operations ncp_file_inode_operations;
-extern struct file_operations ncp_file_operations;
+extern const struct file_operations ncp_file_operations;
 int ncp_make_open(struct inode *, int);
 
 /* linux/fs/ncpfs/mmap.c */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cbebd7d1b9e8..c71227dd4389 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -324,7 +324,7 @@ extern struct inode_operations nfs_file_inode_operations;
 #ifdef CONFIG_NFS_V3
 extern struct inode_operations nfs3_file_inode_operations;
 #endif /* CONFIG_NFS_V3 */
-extern struct file_operations nfs_file_operations;
+extern const struct file_operations nfs_file_operations;
 extern struct address_space_operations nfs_file_aops;
 
 static inline struct rpc_cred *nfs_file_cred(struct file *file)
@@ -371,7 +371,7 @@ extern struct inode_operations nfs_dir_inode_operations;
 #ifdef CONFIG_NFS_V3
 extern struct inode_operations nfs3_dir_inode_operations;
 #endif /* CONFIG_NFS_V3 */
-extern struct file_operations nfs_dir_operations;
+extern const struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 6d03d025fcd5..135871df9911 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -128,9 +128,9 @@ extern int proc_match(int, const char *,struct proc_dir_entry *);
 extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
-extern struct file_operations proc_kcore_operations;
-extern struct file_operations proc_kmsg_operations;
-extern struct file_operations ppc_htab_operations;
+extern const struct file_operations proc_kcore_operations;
+extern const struct file_operations proc_kmsg_operations;
+extern const struct file_operations ppc_htab_operations;
 
 /*
  * proc_tty.c
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index fc610bb0f733..27f49c85d5d6 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -118,8 +118,8 @@ extern struct buffer_head *qnx4_bread(struct inode *, int, int);
 
 extern struct inode_operations qnx4_file_inode_operations;
 extern struct inode_operations qnx4_dir_inode_operations;
-extern struct file_operations qnx4_file_operations;
-extern struct file_operations qnx4_dir_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
 extern int qnx4_is_free(struct super_block *sb, long block);
 extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
 extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 953b6df5d037..78ecfa28b1c2 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -15,7 +15,7 @@ extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 #endif
 
-extern struct file_operations ramfs_file_operations;
+extern const struct file_operations ramfs_file_operations;
 extern struct vm_operations_struct generic_file_vm_ops;
 
 #endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 912f1b7cb18f..5676c4210e2c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1960,7 +1960,7 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
 extern struct inode_operations reiserfs_dir_inode_operations;
 extern struct inode_operations reiserfs_symlink_inode_operations;
 extern struct inode_operations reiserfs_special_inode_operations;
-extern struct file_operations reiserfs_dir_operations;
+extern const struct file_operations reiserfs_dir_operations;
 
 /* tail_conversion.c */
 int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
@@ -1972,7 +1972,7 @@ void reiserfs_unmap_buffer(struct buffer_head *);
 
 /* file.c */
 extern struct inode_operations reiserfs_file_inode_operations;
-extern struct file_operations reiserfs_file_operations;
+extern const struct file_operations reiserfs_file_operations;
 extern struct address_space_operations reiserfs_address_space_operations;
 
 /* fix_nodes.c */
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index b0ffe4356e5a..843aeaaa79d4 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -895,7 +895,7 @@ extern void ufs_set_link(struct inode *, struct ufs_dir_entry *, struct buffer_h
 
 /* file.c */
 extern struct inode_operations ufs_file_inode_operations;
-extern struct file_operations ufs_file_operations;
+extern const struct file_operations ufs_file_operations;
 
 extern struct address_space_operations ufs_aops;
 
@@ -915,7 +915,7 @@ extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
 extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
 
 /* namei.c */
-extern struct file_operations ufs_dir_operations;
+extern const struct file_operations ufs_dir_operations;
         
 /* super.c */
 extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4)));
diff --git a/net/nonet.c b/net/nonet.c
index 1230f0ae832e..92e76640c7cd 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -19,7 +19,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 	return -ENXIO;
 }
 
-struct file_operations bad_sock_fops = {
+const struct file_operations bad_sock_fops = {
 	.owner = THIS_MODULE,
 	.open = sock_no_open,
 };
diff --git a/net/socket.c b/net/socket.c
index 5211ba270375..fcd77eac0ccf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -539,7 +539,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 	return -ENXIO;
 }
 
-struct file_operations bad_sock_fops = {
+const struct file_operations bad_sock_fops = {
 	.owner = THIS_MODULE,
 	.open = sock_no_open,
 };
-- 
cgit v1.2.3


From 7f927fcc2fd1575d01efb4b76665975007945690 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 28 Mar 2006 01:56:53 -0800
Subject: [PATCH] Typo fixes

Fix a lot of typos.  Eyeballed by jmc@ in OpenBSD.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/m68k/README.buddha         | 2 +-
 Documentation/networking/ifenslave.c     | 2 +-
 arch/arm/lib/copy_template.S             | 2 +-
 drivers/char/mxser.h                     | 2 +-
 drivers/char/synclink.c                  | 2 +-
 drivers/edac/edac_mc.c                   | 2 +-
 drivers/net/irda/nsc-ircc.c              | 2 +-
 drivers/net/sis900.c                     | 2 +-
 drivers/net/tulip/de4x5.c                | 2 +-
 drivers/net/tulip/pnic2.c                | 2 +-
 drivers/net/typhoon.c                    | 4 ++--
 drivers/net/wireless/orinoco.c           | 2 +-
 drivers/net/wireless/prism54/isl_ioctl.c | 2 +-
 drivers/scsi/3w-9xxx.c                   | 2 +-
 drivers/serial/8250.c                    | 2 +-
 drivers/serial/serial_txx9.c             | 2 +-
 drivers/serial/sunsu.c                   | 2 +-
 drivers/usb/host/ohci-s3c2410.c          | 2 +-
 drivers/usb/net/zaurus.c                 | 2 +-
 fs/mbcache.c                             | 2 +-
 include/asm-parisc/pdc.h                 | 2 +-
 include/asm-sh/addrspace.h               | 2 +-
 include/asm-sparc64/floppy.h             | 2 +-
 include/linux/fb.h                       | 2 +-
 mm/mempolicy.c                           | 2 +-
 net/irda/af_irda.c                       | 6 +++---
 sound/pci/rme32.c                        | 8 ++++----
 sound/pci/rme96.c                        | 8 ++++----
 sound/pci/rme9652/hdspm.c                | 2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c          | 2 +-
 30 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha
index bf802ffc98ad..ef484a719bb9 100644
--- a/Documentation/m68k/README.buddha
+++ b/Documentation/m68k/README.buddha
@@ -29,7 +29,7 @@ address is written to $4a, then the whole Byte is written to
 $48, while it doesn't matter how often you're writing to $4a
 as  long as $48 is not touched.  After $48 has been written,
 the  whole card disappears from $e8 and is mapped to the new
-address just written.  Make shure $4a is written before $48,
+address just written.  Make sure $4a is written before $48,
 otherwise your chance is only 1:16 to find the board :-).
 
 The local memory-map is even active when mapped to $e8:
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 545447ac503a..a12059886755 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -87,7 +87,7 @@
  *	   would fail and generate an error message in the system log.
  * 	 - For opt_c: slave should not be set to the master's setting
  *	   while it is running. It was already set during enslave. To
- *	   simplify things, it is now handeled separately.
+ *	   simplify things, it is now handled separately.
  *
  *    - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
  *	 - Code cleanup and style changes
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 838e435e4922..cab355c0c1f7 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -236,7 +236,7 @@
 
 
 /*
- * Abort preanble and completion macros.
+ * Abort preamble and completion macros.
  * If a fixup handler is required then those macros must surround it.
  * It is assumed that the fixup code will handle the private part of
  * the exit macro.
diff --git a/drivers/char/mxser.h b/drivers/char/mxser.h
index e7fd0b08e0b7..7e188a4d602a 100644
--- a/drivers/char/mxser.h
+++ b/drivers/char/mxser.h
@@ -118,7 +118,7 @@
 
 // enable CTS interrupt
 #define MOXA_MUST_IER_ECTSI		0x80
-// eanble RTS interrupt
+// enable RTS interrupt
 #define MOXA_MUST_IER_ERTSI		0x40
 // enable Xon/Xoff interrupt
 #define MOXA_MUST_IER_XINT		0x20
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index abb03e52fe12..fee2aca3f6a5 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -5996,7 +5996,7 @@ static void usc_set_async_mode( struct mgsl_struct *info )
 	 * <15..8>	?		RxFIFO IRQ Request Level
 	 *
 	 * Note: For async mode the receive FIFO level must be set
-	 * to 0 to aviod the situation where the FIFO contains fewer bytes
+	 * to 0 to avoid the situation where the FIFO contains fewer bytes
 	 * than the trigger level and no more data is expected.
 	 *
 	 * <7>		0		Exited Hunt IA (Interrupt Arm)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 905f58ba8e16..ea06e3a4dc35 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -2044,7 +2044,7 @@ static int __init edac_mc_init(void)
 	 */
 	clear_pci_parity_errors();
 
-	/* Create the MC sysfs entires */
+	/* Create the MC sysfs entries */
 	if (edac_sysfs_memctrl_setup()) {
 		edac_printk(KERN_ERR, EDAC_MC,
 			"Error initializing sysfs code\n");
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 9aa074b44dd3..cc7ff8f00e42 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -812,7 +812,7 @@ static int nsc_ircc_init_39x(nsc_chip_t *chip, chipio_t *info)
 	int cfg_base = info->cfg_base;
 	int enabled;
 
-	/* User is shure about his config... accept it. */
+	/* User is sure about his config... accept it. */
 	IRDA_DEBUG(2, "%s(): nsc_ircc_init_39x (user settings): "
 		   "io=0x%04x, irq=%d, dma=%d\n", 
 		   __FUNCTION__, info->fir_base, info->irq, info->dma);
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index 8429ceb01389..b82191d2bee1 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -2283,7 +2283,7 @@ static void set_rx_mode(struct net_device *net_dev)
 	int i, table_entries;
 	u32 rx_mode;
 
-	/* 635 Hash Table entires = 256(2^16) */
+	/* 635 Hash Table entries = 256(2^16) */
 	if((sis_priv->chipset_rev >= SIS635A_900_REV) ||
 			(sis_priv->chipset_rev == SIS900B_900_REV))
 		table_entries = 16;
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index ee48bfd67349..d1a86a080a65 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -513,7 +513,7 @@ struct mii_phy {
     u_char  *rst;           /* Start of reset sequence in SROM           */
     u_int mc;               /* Media Capabilities                        */
     u_int ana;              /* NWay Advertisement                        */
-    u_int fdx;              /* Full DupleX capabilites for each media    */
+    u_int fdx;              /* Full DupleX capabilities for each media   */
     u_int ttm;              /* Transmit Threshold Mode for each media    */
     u_int mci;              /* 21142 MII Connector Interrupt info        */
 };
diff --git a/drivers/net/tulip/pnic2.c b/drivers/net/tulip/pnic2.c
index 55f4a9a631bc..ab985023fcca 100644
--- a/drivers/net/tulip/pnic2.c
+++ b/drivers/net/tulip/pnic2.c
@@ -199,7 +199,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5)
 	               /* negotiation ended successfully */
 
 	               /* get the link partners reply and mask out all but
-                        * bits 24-21 which show the partners capabilites
+                        * bits 24-21 which show the partners capabilities
                         * and match those to what we advertised
                         *
                         * then begin to interpret the results of the negotiation.
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index cde35dd87906..c1ce87a5f8d3 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -208,7 +208,7 @@ static const struct typhoon_card_info typhoon_card_info[] __devinitdata = {
 };
 
 /* Notes on the new subsystem numbering scheme:
- * bits 0-1 indicate crypto capabilites: (0) variable, (1) DES, or (2) 3DES
+ * bits 0-1 indicate crypto capabilities: (0) variable, (1) DES, or (2) 3DES
  * bit 4 indicates if this card has secured firmware (we don't support it)
  * bit 8 indicates if this is a (0) copper or (1) fiber card
  * bits 12-16 indicate card type: (0) client and (1) server
@@ -788,7 +788,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 	/* we have two rings to choose from, but we only use txLo for now
 	 * If we start using the Hi ring as well, we'll need to update
 	 * typhoon_stop_runtime(), typhoon_interrupt(), typhoon_num_free_tx(),
-	 * and TXHI_ENTIRES to match, as well as update the TSO code below
+	 * and TXHI_ENTRIES to match, as well as update the TSO code below
 	 * to get the right DMA address
 	 */
 	txRing = &tp->txLoRing;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 6fd0bf736830..8dfdfbd5966c 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -3858,7 +3858,7 @@ static int orinoco_ioctl_setscan(struct net_device *dev,
 	unsigned long flags;
 
 	/* Note : you may have realised that, as this is a SET operation,
-	 * this is priviledged and therefore a normal user can't
+	 * this is privileged and therefore a normal user can't
 	 * perform scanning.
 	 * This is not an error, while the device perform scanning,
 	 * traffic doesn't flow, so it's a perfect DoS...
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index e5bb9f5ae429..989599ad33ef 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -747,7 +747,7 @@ prism54_get_essid(struct net_device *ndev, struct iw_request_info *info,
 
 	if (essid->length) {
 		dwrq->flags = 1;	/* set ESSID to ON for Wireless Extensions */
-		/* if it is to big, trunk it */
+		/* if it is too big, trunk it */
 		dwrq->length = min((u8)IW_ESSID_MAX_SIZE, essid->length);
 	} else {
 		dwrq->flags = 0;
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 0ab26d01877b..0d2b447c50ed 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1026,7 +1026,7 @@ static void twa_free_request_id(TW_Device_Extension *tw_dev, int request_id)
 	tw_dev->free_tail = (tw_dev->free_tail + 1) % TW_Q_LENGTH;
 } /* End twa_free_request_id() */
 
-/* This function will get parameter table entires from the firmware */
+/* This function will get parameter table entries from the firmware */
 static void *twa_get_param(TW_Device_Extension *tw_dev, int request_id, int table_id, int parameter_id, int parameter_size_bytes)
 {
 	TW_Command_Full *full_command_packet;
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 5996d3cd0ed8..674b15c78f68 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1528,7 +1528,7 @@ static int serial8250_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	serial8250_clear_fifos(up);
 
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index b848b7d94412..3bdee64d1a99 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -483,7 +483,7 @@ static int serial_txx9_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	sio_set(up, TXX9_SIFCR,
 		TXX9_SIFCR_TFRST | TXX9_SIFCR_RFRST | TXX9_SIFCR_FRSTE);
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 9fe2283d91e5..1c4396c2962d 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -641,7 +641,7 @@ static int sunsu_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	if (uart_config[up->port.type].flags & UART_CLEAR_FIFO) {
 		serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index 372527a83593..682bf2215660 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -158,7 +158,7 @@ static int ohci_s3c2410_hub_control (
 		"s3c2410_hub_control(%p,0x%04x,0x%04x,0x%04x,%p,%04x)\n",
 		hcd, typeReq, wValue, wIndex, buf, wLength);
 
-	/* if we are only an humble host without any special capabilites
+	/* if we are only an humble host without any special capabilities
 	 * process the request straight away and exit */
 
 	if (info == NULL) {
diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c
index 9c5ab251370c..f7ac9d6b9856 100644
--- a/drivers/usb/net/zaurus.c
+++ b/drivers/usb/net/zaurus.c
@@ -217,7 +217,7 @@ static int blan_mdlm_bind(struct usbnet *dev, struct usb_interface *intf)
 			 * with devices that use it and those that don't.
 			 */
 			if ((detail->bDetailData[1] & ~0x02) != 0x01) {
-				/* bmDataCapabilites == 0 would be fine too,
+				/* bmDataCapabilities == 0 would be fine too,
 				 * but framing is minidriver-coupled for now.
 				 */
 bad_detail:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 73e754fea2d8..e4fde1ab22cd 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -311,7 +311,7 @@ fail:
 /*
  * mb_cache_shrink()
  *
- * Removes all cache entires of a device from the cache. All cache entries
+ * Removes all cache entries of a device from the cache. All cache entries
  * currently in use cannot be freed, and thus remain in the cache. All others
  * are freed.
  *
diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h
index 8e23e4c674f6..0a3face6c480 100644
--- a/include/asm-parisc/pdc.h
+++ b/include/asm-parisc/pdc.h
@@ -333,7 +333,7 @@ struct pdc_model {		/* for PDC_MODEL */
 	unsigned long curr_key;
 };
 
-/* Values for PDC_MODEL_CAPABILITES non-equivalent virtual aliasing support */
+/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
 
 #define PDC_MODEL_IOPDIR_FDC            (1 << 2)        /* see sba_iommu.c */
 #define PDC_MODEL_NVA_MASK		(3 << 4)
diff --git a/include/asm-sh/addrspace.h b/include/asm-sh/addrspace.h
index dbb05d1a26d1..720afc11c2ca 100644
--- a/include/asm-sh/addrspace.h
+++ b/include/asm-sh/addrspace.h
@@ -13,7 +13,7 @@
 
 #include <asm/cpu/addrspace.h>
 
-/* Memory segments (32bit Priviledged mode addresses)  */
+/* Memory segments (32bit Privileged mode addresses)  */
 #define P0SEG		0x00000000
 #define P1SEG		0x80000000
 #define P2SEG		0xa0000000
diff --git a/include/asm-sparc64/floppy.h b/include/asm-sparc64/floppy.h
index 49d49a285943..6a95d5d0c576 100644
--- a/include/asm-sparc64/floppy.h
+++ b/include/asm-sparc64/floppy.h
@@ -738,7 +738,7 @@ static unsigned long __init sun_floppy_init(void)
 		if (!sun_floppy_types[0] && sun_floppy_types[1]) {
 			/*
 			 * Set the drive exchange bit in FCR on NS87303,
-			 * make shure other bits are sane before doing so.
+			 * make sure other bits are sane before doing so.
 			 */
 			ns87303_modify(config, FER, FER_EDM, 0);
 			ns87303_modify(config, ASC, ASC_DRV2_SEL, 0);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2cb19e6503aa..d03fadfcafe3 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -734,7 +734,7 @@ struct fb_tile_ops {
 
 /* A driver may set this flag to indicate that it does want a set_par to be
  * called every time when fbcon_switch is executed. The advantage is that with
- * this flag set you can really be shure that set_par is always called before
+ * this flag set you can really be sure that set_par is always called before
  * any of the functions dependant on the correct hardware state or altering
  * that state, even if you are using some broken X releases. The disadvantage
  * is that it introduces unwanted delays to every console switch if set_par
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4f71cfd29c6f..dec8249e972d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -912,7 +912,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 	/*
 	 * Check if this process has the right to modify the specified
 	 * process. The right exists if the process has administrative
-	 * capabilities, superuser priviledges or the same
+	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
 	if ((current->euid != task->suid) && (current->euid != task->uid) &&
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 759445648667..627b11342233 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1302,7 +1302,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return -ENOTCONN;
 
-	/* Check that we don't send out to big frames */
+	/* Check that we don't send out too big frames */
 	if (len > self->max_data_size) {
 		IRDA_DEBUG(2, "%s(), Chopping frame from %zd to %d bytes!\n",
 			   __FUNCTION__, len, self->max_data_size);
@@ -1546,7 +1546,7 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	IRDA_ASSERT(self != NULL, return -1;);
 
 	/*
-	 * Check that we don't send out to big frames. This is an unreliable
+	 * Check that we don't send out too big frames. This is an unreliable
 	 * service, so we have no fragmentation and no coalescence
 	 */
 	if (len > self->max_data_size) {
@@ -1642,7 +1642,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	}
 
 	/*
-	 * Check that we don't send out to big frames. This is an unreliable
+	 * Check that we don't send out too big frames. This is an unreliable
 	 * service, so we have no fragmentation and no coalescence
 	 */
 	if (len > self->max_data_size) {
diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c
index 0cbef5fe6c63..ab78544bf042 100644
--- a/sound/pci/rme32.c
+++ b/sound/pci/rme32.c
@@ -313,7 +313,7 @@ static int snd_rme32_capture_copy(struct snd_pcm_substream *substream, int chann
 }
 
 /*
- * SPDIF I/O capabilites (half-duplex mode)
+ * SPDIF I/O capabilities (half-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_spdif_info = {
 	.info =		(SNDRV_PCM_INFO_MMAP_IOMEM |
@@ -339,7 +339,7 @@ static struct snd_pcm_hardware snd_rme32_spdif_info = {
 };
 
 /*
- * ADAT I/O capabilites (half-duplex mode)
+ * ADAT I/O capabilities (half-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_adat_info =
 {
@@ -364,7 +364,7 @@ static struct snd_pcm_hardware snd_rme32_adat_info =
 };
 
 /*
- * SPDIF I/O capabilites (full-duplex mode)
+ * SPDIF I/O capabilities (full-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_spdif_fd_info = {
 	.info =		(SNDRV_PCM_INFO_MMAP |
@@ -390,7 +390,7 @@ static struct snd_pcm_hardware snd_rme32_spdif_fd_info = {
 };
 
 /*
- * ADAT I/O capabilites (full-duplex mode)
+ * ADAT I/O capabilities (full-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_adat_fd_info =
 {
diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
index 0e694b011dcc..6c2a9f4a7659 100644
--- a/sound/pci/rme96.c
+++ b/sound/pci/rme96.c
@@ -359,7 +359,7 @@ snd_rme96_capture_copy(struct snd_pcm_substream *substream,
 }
 
 /*
- * Digital output capabilites (S/PDIF)
+ * Digital output capabilities (S/PDIF)
  */
 static struct snd_pcm_hardware snd_rme96_playback_spdif_info =
 {
@@ -388,7 +388,7 @@ static struct snd_pcm_hardware snd_rme96_playback_spdif_info =
 };
 
 /*
- * Digital input capabilites (S/PDIF)
+ * Digital input capabilities (S/PDIF)
  */
 static struct snd_pcm_hardware snd_rme96_capture_spdif_info =
 {
@@ -417,7 +417,7 @@ static struct snd_pcm_hardware snd_rme96_capture_spdif_info =
 };
 
 /*
- * Digital output capabilites (ADAT)
+ * Digital output capabilities (ADAT)
  */
 static struct snd_pcm_hardware snd_rme96_playback_adat_info =
 {
@@ -442,7 +442,7 @@ static struct snd_pcm_hardware snd_rme96_playback_adat_info =
 };
 
 /*
- * Digital input capabilites (ADAT)
+ * Digital input capabilities (ADAT)
  */
 static struct snd_pcm_hardware snd_rme96_capture_adat_info =
 {
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 980b9cd689dd..b5538efd146b 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -2256,7 +2256,7 @@ static int snd_hdspm_create_controls(struct snd_card *card, struct hdspm * hdspm
 	}
 
 	/* Channel playback mixer as default control 
-	   Note: the whole matrix would be 128*HDSPM_MIXER_CHANNELS Faders, thats to big for any alsamixer 
+	   Note: the whole matrix would be 128*HDSPM_MIXER_CHANNELS Faders, thats too big for any alsamixer
 	   they are accesible via special IOCTL on hwdep
 	   and the mixer 2dimensional mixer control */
 
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 315855082fe1..fe67a92e2a1a 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -404,7 +404,7 @@ static void usX2Y_usbpcm_subs_startup(struct snd_usX2Y_substream *subs)
 	struct usX2Ydev * usX2Y = subs->usX2Y;
 	usX2Y->prepare_subs = subs;
 	subs->urb[0]->start_frame = -1;
-	smp_wmb();	// Make shure above modifications are seen by i_usX2Y_subs_startup()
+	smp_wmb();	// Make sure above modifications are seen by i_usX2Y_subs_startup()
 	usX2Y_urbs_set_complete(usX2Y, i_usX2Y_usbpcm_subs_startup);
 }
 
-- 
cgit v1.2.3


From c41564b5af328ea4600b26119f6c9c8e1eb5c28b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 08:55:14 +1000
Subject: [XFS] We really suck at spulling.  Thanks to Chris Pascoe for fixing
 all these typos.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25539a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/mrlock.h      |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c    | 14 +++++++-------
 fs/xfs/linux-2.6/xfs_export.h  |  2 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |  2 +-
 fs/xfs/linux-2.6/xfs_vfs.h     |  2 +-
 fs/xfs/quota/xfs_dquot_item.c  |  2 +-
 fs/xfs/quota/xfs_qm.c          | 14 +++++++-------
 fs/xfs/quota/xfs_qm_syscalls.c |  2 +-
 fs/xfs/quota/xfs_trans_dquot.c |  2 +-
 fs/xfs/xfs_acl.c               |  2 +-
 fs/xfs/xfs_ag.h                |  2 +-
 fs/xfs/xfs_alloc.c             |  6 +++---
 fs/xfs/xfs_alloc.h             |  2 +-
 fs/xfs/xfs_attr.c              |  6 +++---
 fs/xfs/xfs_attr_leaf.c         |  4 ++--
 fs/xfs/xfs_behavior.c          |  4 ++--
 fs/xfs/xfs_behavior.h          |  4 ++--
 fs/xfs/xfs_buf_item.c          |  4 ++--
 fs/xfs/xfs_cap.h               |  2 +-
 fs/xfs/xfs_da_btree.c          |  2 +-
 fs/xfs/xfs_dir2_block.c        |  2 +-
 fs/xfs/xfs_dir2_leaf.c         |  2 +-
 fs/xfs/xfs_dir2_node.c         |  2 +-
 fs/xfs/xfs_dir_leaf.c          |  2 +-
 fs/xfs/xfs_fsops.c             |  2 +-
 fs/xfs/xfs_ialloc.c            |  2 +-
 fs/xfs/xfs_iget.c              |  2 +-
 fs/xfs/xfs_inode.c             | 14 +++++++-------
 fs/xfs/xfs_inode_item.c        |  2 +-
 fs/xfs/xfs_itable.c            |  4 ++--
 fs/xfs/xfs_itable.h            |  2 +-
 fs/xfs/xfs_log.c               | 22 +++++++++++-----------
 fs/xfs/xfs_log.h               |  2 +-
 fs/xfs/xfs_log_recover.c       |  4 ++--
 fs/xfs/xfs_mount.c             |  4 ++--
 fs/xfs/xfs_mount.h             |  2 +-
 fs/xfs/xfs_quota.h             |  4 ++--
 fs/xfs/xfs_trans.c             |  6 +++---
 fs/xfs/xfs_trans.h             |  2 +-
 fs/xfs/xfs_trans_inode.c       |  2 +-
 fs/xfs/xfs_vfsops.c            | 10 +++++-----
 fs/xfs/xfs_vnodeops.c          | 12 ++++++------
 42 files changed, 93 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 16b44c3c2362..1b262b790d9c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -79,7 +79,7 @@ static inline void mrdemote(mrlock_t *mrp)
  * Debug-only routine, without some platform-specific asm code, we can
  * now only answer requests regarding whether we hold the lock for write
  * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positivies, so don't do that.
+ * Note: means !ismrlocked would give false positives, so don't do that.
  */
 static inline int ismrlocked(mrlock_t *mrp, int type)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c02f7c5b7462..62b4553fb604 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -372,7 +372,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
  * assumes that all buffers on the page are started at the same time.
  *
  * The fix is two passes across the ioend list - one to start writeback on the
- * bufferheads, and then the second one submit them for I/O.
+ * buffer_heads, and then submit them for I/O on the second pass.
  */
 STATIC void
 xfs_submit_ioend(
@@ -699,7 +699,7 @@ xfs_convert_page(
 
 	/*
 	 * page_dirty is initially a count of buffers on the page before
-	 * EOF and is decrememted as we move each into a cleanable state.
+	 * EOF and is decremented as we move each into a cleanable state.
 	 *
 	 * Derivation:
 	 *
@@ -842,7 +842,7 @@ xfs_cluster_write(
  * page if possible.
  * The bh->b_state's cannot know if any of the blocks or which block for
  * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only vaild if the page itself isn't completely uptodate.  Some layers
+ * only valid if the page itself isn't completely uptodate.  Some layers
  * may clear the page dirty flag prior to calling write page, under the
  * assumption the entire page will be written out; by not writing out the
  * whole page the page can be reused before all valid dirty data is
@@ -892,7 +892,7 @@ xfs_page_state_convert(
 
 	/*
 	 * page_dirty is initially a count of buffers on the page before
-	 * EOF and is decrememted as we move each into a cleanable state.
+	 * EOF and is decremented as we move each into a cleanable state.
 	 *
 	 * Derivation:
 	 *
@@ -1339,9 +1339,9 @@ xfs_end_io_direct(
 	/*
 	 * Non-NULL private data means we need to issue a transaction to
 	 * convert a range from unwritten to written extents.  This needs
-	 * to happen from process contect but aio+dio I/O completion
+	 * to happen from process context but aio+dio I/O completion
 	 * happens from irq context so we need to defer it to a workqueue.
-	 * This is not nessecary for synchronous direct I/O, but we do
+	 * This is not necessary for synchronous direct I/O, but we do
 	 * it anyway to keep the code uniform and simpler.
 	 *
 	 * The core direct I/O code might be changed to always call the
@@ -1358,7 +1358,7 @@ xfs_end_io_direct(
 	}
 
 	/*
-	 * blockdev_direct_IO can return an error even afer the I/O
+	 * blockdev_direct_IO can return an error even after the I/O
 	 * completion handler was called.  Thus we need to protect
 	 * against double-freeing.
 	 */
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index e5b0559700a4..e794ca4efc76 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -54,7 +54,7 @@
  * Note, the NFS filehandle also includes an fsid portion which
  * may have an inode number in it.  That number is hardcoded to
  * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesytem with 64bit
+ * practice this means when exporting an XFS filesystem with 64bit
  * inodes you should either export the mountpoint (rather than
  * a subdirectory) or use the "fsid" export option.
  */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 0169360475c4..84ddf1893894 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -681,7 +681,7 @@ start:
 		eventsent = 1;
 
 		/*
-		 * The iolock was dropped and reaquired in XFS_SEND_DATA
+		 * The iolock was dropped and reacquired in XFS_SEND_DATA
 		 * so we have to recheck the size when appending.
 		 * We will only "goto start;" once, since having sent the
 		 * event prevents another call to XFS_SEND_DATA, which is
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 8fed356db055..841200c03092 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,7 +92,7 @@ typedef enum {
 #define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE		0x0100  /* quiesce fileystem for a snapshot */
+#define SYNC_QUIESCE		0x0100  /* quiesce filesystem for a snapshot */
 
 typedef int	(*vfs_mount_t)(bhv_desc_t *,
 				struct xfs_mount_args *, struct cred *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index e4e5f05b841b..546f48af882a 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -221,7 +221,7 @@ xfs_qm_dqunpin_wait(
  * as possible.
  *
  * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * search the buffer cache can be a time consuming thing, and AIL_LOCK is a
  * spinlock.
  */
 STATIC void
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1fb757ef3f41..73c1e5e80c07 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -289,7 +289,7 @@ xfs_qm_rele_quotafs_ref(
 
 /*
  * This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * structure and start the global quota manager (xfs_Gqm) if it hasn't done
  * so already.	Note that the superblock has not been read in yet.
  */
 void
@@ -807,7 +807,7 @@ xfs_qm_dqattach_one(
  * Given a udquot and gdquot, attach a ptr to the group dquot in the
  * udquot as a hint for future lookups. The idea sounds simple, but the
  * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
+ * already and getting rid of that gets us into lock ordering constraints.
  * The process is complicated more by the fact that the dquots may or may not
  * be locked on entry.
  */
@@ -1094,10 +1094,10 @@ xfs_qm_sync(
 			}
 			/*
 			 * If we can't grab the flush lock then if the caller
-			 * really wanted us to give this our best shot,
+			 * really wanted us to give this our best shot, so
 			 * see if we can give a push to the buffer before we wait
 			 * on the flush lock. At this point, we know that
-			 * eventhough the dquot is being flushed,
+			 * even though the dquot is being flushed,
 			 * it has (new) dirty data.
 			 */
 			xfs_qm_dqflock_pushbuf_wait(dqp);
@@ -1491,7 +1491,7 @@ xfs_qm_reset_dqcounts(
 		/*
 		 * Do a sanity check, and if needed, repair the dqblk. Don't
 		 * output any warnings because it's perfectly possible to
-		 * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
 		 */
 		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
 				      "xfs_quotacheck");
@@ -1580,7 +1580,7 @@ xfs_qm_dqiterate(
 
 	error = 0;
 	/*
-	 * This looks racey, but we can't keep an inode lock across a
+	 * This looks racy, but we can't keep an inode lock across a
 	 * trans_reserve. But, this gets called during quotacheck, and that
 	 * happens only at mount time which is single threaded.
 	 */
@@ -1824,7 +1824,7 @@ xfs_qm_dqusage_adjust(
 	 * we have to start from the beginning anyway.
 	 * Once we're done, we'll log all the dquot bufs.
 	 *
-	 * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
 	 */
 	if (XFS_IS_UQUOTA_ON(mp)) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 676884394aae..c55db463bbf2 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -912,7 +912,7 @@ xfs_qm_export_dquot(
 
 	/*
 	 * Internally, we don't reset all the timers when quota enforcement
-	 * gets turned off. No need to confuse the userlevel code,
+	 * gets turned off. No need to confuse the user level code,
 	 * so return zeroes in that case.
 	 */
 	if (! XFS_IS_QUOTA_ENFORCED(mp)) {
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 3290975d31f7..d8e131ec0aa8 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -804,7 +804,7 @@ xfs_trans_reserve_quota_bydquots(
 	}
 
 	/*
-	 * Didnt change anything critical, so, no need to log
+	 * Didn't change anything critical, so, no need to log
 	 */
 	return (0);
 }
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4ff0f4e41c61..2539af34eb63 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -395,7 +395,7 @@ xfs_acl_allow_set(
  * The access control process to determine the access permission:
  *	if uid == file owner id, use the file owner bits.
  *	if gid == file owner group id, use the file group bits.
- *	scan ACL for a maching user or group, and use matched entry
+ *	scan ACL for a matching user or group, and use matched entry
  *	permission. Use total permissions of all matching group entries,
  *	until all acl entries are exhausted. The final permission produced
  *	by matching acl entry or entries needs to be & with group permission.
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a96e2ffce0cc..dc2361dd740a 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -179,7 +179,7 @@ typedef struct xfs_perag
 {
 	char		pagf_init;	/* this agf's entry is initialized */
 	char		pagi_init;	/* this agi's entry is initialized */
-	char		pagf_metadata;	/* the agf is prefered to be metadata */
+	char		pagf_metadata;	/* the agf is preferred to be metadata */
 	char		pagi_inodeok;	/* The agi is ok for inodes */
 	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f4328e1e2a74..64ee07db0d5e 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -511,7 +511,7 @@ STATIC void
 xfs_alloc_trace_busy(
 	char		*name,		/* function tag string */
 	char		*str,		/* additional string */
-	xfs_mount_t	*mp,		/* file system mount poing */
+	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
 	xfs_agblock_t	agbno,		/* a.g. relative block number */
 	xfs_extlen_t	len,		/* length of extent */
@@ -1843,7 +1843,7 @@ xfs_alloc_fix_freelist(
 	} else
 		agbp = NULL;
 
-	/* If this is a metadata prefered pag and we are user data
+	/* If this is a metadata preferred pag and we are user data
 	 * then try somewhere else if we are not being asked to
 	 * try harder at this point
 	 */
@@ -2458,7 +2458,7 @@ error0:
 /*
  * AG Busy list management
  * The busy list contains block ranges that have been freed but whose
- * transacations have not yet hit disk.  If any block listed in a busy
+ * transactions have not yet hit disk.  If any block listed in a busy
  * list is reused, the transaction that freed it must be forced to disk
  * before continuing to use the block.
  *
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 3546dea27b7d..2d1f8928b267 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -68,7 +68,7 @@ typedef struct xfs_alloc_arg {
 	xfs_alloctype_t	otype;		/* original allocation type */
 	char		wasdel;		/* set if allocation was prev delayed */
 	char		wasfromfl;	/* set if allocation is from freelist */
-	char		isfl;		/* set if is freelist blocks - !actg */
+	char		isfl;		/* set if is freelist blocks - !acctg */
 	char		userdata;	/* set if this is user data */
 } xfs_alloc_arg_t;
 
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 093fac476bda..b6e1e02bbb28 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -294,7 +294,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
 	xfs_trans_ihold(args.trans, dp);
 
 	/*
-	 * If the attribute list is non-existant or a shortform list,
+	 * If the attribute list is non-existent or a shortform list,
 	 * upgrade it to a single-leaf-block attribute list.
 	 */
 	if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
@@ -1584,7 +1584,7 @@ out:
  * Fill in the disk block numbers in the state structure for the buffers
  * that are attached to the state structure.
  * This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commit's has released these buffers.
+ * after some set of transaction commits have released these buffers.
  */
 STATIC int
 xfs_attr_fillstate(xfs_da_state_t *state)
@@ -1631,7 +1631,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 /*
  * Reattach the buffers to the state structure based on the disk block
  * numbers stored in the state structure.
- * This is done after some set of transaction commit's has released those
+ * This is done after some set of transaction commits have released those
  * buffers from our grip.
  */
 STATIC int
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 717682747bd2..9462be86aa14 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -524,7 +524,7 @@ xfs_attr_shortform_compare(const void *a, const void *b)
 
 /*
  * Copy out entries of shortform attribute lists for attr_list().
- * Shortform atrtribute lists are not stored in hashval sorted order.
+ * Shortform attribute lists are not stored in hashval sorted order.
  * If the output buffer is not large enough to hold them all, then we
  * we have to calculate each entries' hashvalue and sort them before
  * we can begin returning them to the user.
@@ -1541,7 +1541,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
index 9880adae3938..f4fe3715a803 100644
--- a/fs/xfs/xfs_behavior.c
+++ b/fs/xfs/xfs_behavior.c
@@ -31,7 +31,7 @@
  * The behavior chain is ordered based on the 'position' number which
  * lives in the first field of the ops vector (higher numbers first).
  *
- * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Attempts to insert duplicate ops result in an EINVAL return code.
  * Otherwise, return 0 to indicate success.
  */
 int
@@ -84,7 +84,7 @@ bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
 
 /*
  * Remove a behavior descriptor from a position in a behavior chain;
- * the postition is guaranteed not to be the first position.
+ * the position is guaranteed not to be the first position.
  * Should only be called by the bhv_remove() macro.
  */
 void
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 2cd89bb5ab10..1d8ff103201c 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -39,7 +39,7 @@
  * behaviors is synchronized with operations-in-progress (oip's) so that
  * the oip's always see a consistent view of the chain.
  *
- * The term "interpostion" is used to refer to the act of inserting
+ * The term "interposition" is used to refer to the act of inserting
  * a behavior such that it interposes on (i.e., is inserted in front
  * of) a particular other behavior.  A key example of this is when a
  * system implementing distributed single system image wishes to
@@ -51,7 +51,7 @@
  *
  * Behavior synchronization is logic which is necessary under certain
  * circumstances that there is no conflict between ongoing operations
- * traversing the behavior chain and those dunamically modifying the
+ * traversing the behavior chain and those dynamically modifying the
  * behavior chain.  Because behavior synchronization adds extra overhead
  * to virtual operation invocation, we want to restrict, as much as
  * we can, the requirement for this extra code, to those situations
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 07e2324152b1..5fed15682dda 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -98,12 +98,12 @@ xfs_buf_item_flush_log_debug(
 }
 
 /*
- * This function is called to verify that our caller's have logged
+ * This function is called to verify that our callers have logged
  * all the bytes that they changed.
  *
  * It does this by comparing the original copy of the buffer stored in
  * the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which miscompare are set in the bli_logged
+ * and ensuring that all bytes which mismatch are set in the bli_logged
  * array of the buf log item.
  */
 STATIC void
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
index 433ec537f9bd..d0035c6e9514 100644
--- a/fs/xfs/xfs_cap.h
+++ b/fs/xfs/xfs_cap.h
@@ -38,7 +38,7 @@ typedef struct xfs_cap_set {
 /*
  * For Linux, we take the bitfields directly from capability.h
  * and no longer attempt to keep this attribute ondisk compatible
- * with IRIX.  Since this attribute is only set on exectuables,
+ * with IRIX.  Since this attribute is only set on executables,
  * it just doesn't make much sense to try.  We do use a different
  * named attribute though, to avoid confusion.
  */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4bae3a76c678..8988b9051175 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -840,7 +840,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index bd5cee6aa51a..972ded595476 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -533,7 +533,7 @@ xfs_dir2_block_getdents(
 
 	/*
 	 * Reached the end of the block.
-	 * Set the offset to a nonexistent block 1 and return.
+	 * Set the offset to a non-existent block 1 and return.
 	 */
 	*eofp = 1;
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 08648b18265c..0f5e2f2ce6ec 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -515,7 +515,7 @@ xfs_dir2_leaf_addname(
 			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
-			 * Copy entries down to copver the stale entry
+			 * Copy entries down to cover the stale entry
 			 * and make room for the new entry.
 			 */
 			if (highstale - index > 0)
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index af556f16a0c7..ac511ab9c52d 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -830,7 +830,7 @@ xfs_dir2_leafn_rebalance(
 		state->inleaf = 1;
 		blk2->index = 0;
 		cmn_err(CE_ALERT,
-			"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: "
+			"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
 			"blk1->index %d\n",
 			blk1->index);
 	}
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index ee88751c3be6..6d711869262f 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -1341,7 +1341,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 56caa88713ab..dfa3527b20a7 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -477,7 +477,7 @@ xfs_fs_counts(
  *
  * xfs_reserve_blocks is called to set m_resblks
  * in the in-core mount table. The number of unused reserved blocks
- * is kept in m_resbls_avail.
+ * is kept in m_resblks_avail.
  *
  * Reserve the requested number of blocks if available. Otherwise return
  * as many as possible to satisfy the request. The actual number
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0024892841a3..20c39a6e6a0c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1023,7 +1023,7 @@ xfs_difree(
 	rec.ir_freecount++;
 
 	/*
-	 * When an inode cluster is free, it becomes elgible for removal
+	 * When an inode cluster is free, it becomes eligible for removal
 	 */
 	if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
 	    (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3ce35a6f700b..bb33113eef9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -509,7 +509,7 @@ retry:
 		} else {
 			/*
 			 * If the inode is not fully constructed due to
-			 * filehandle mistmatches wait for the inode to go
+			 * filehandle mismatches wait for the inode to go
 			 * away and try again.
 			 *
 			 * iget_locked will call __wait_on_freeing_inode
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 88a517fad07b..48146bdc6bdd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -160,7 +160,7 @@ xfs_inotobp(
 	xfs_dinode_t	*dip;
 
 	/*
-	 * Call the space managment code to find the location of the
+	 * Call the space management code to find the location of the
 	 * inode on disk.
 	 */
 	imap.im_blkno = 0;
@@ -837,7 +837,7 @@ xfs_dic2xflags(
 
 /*
  * Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode coresponding to the given
+ * to a newly allocated in-core inode corresponding to the given
  * inode number.
  *
  * Initialize the inode's attributes and extent pointers if it
@@ -2723,7 +2723,7 @@ xfs_ipin(
 /*
  * Decrement the pin count of the given inode, and wake up
  * anyone in xfs_iwait_unpin() if the count goes to 0.  The
- * inode must have been previoulsy pinned with a call to xfs_ipin().
+ * inode must have been previously pinned with a call to xfs_ipin().
  */
 void
 xfs_iunpin(
@@ -3690,7 +3690,7 @@ void
 xfs_iext_add(
 	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_extnum_t	idx,		/* index to begin adding exts */
-	int		ext_diff)	/* nubmer of extents to add */
+	int		ext_diff)	/* number of extents to add */
 {
 	int		byte_diff;	/* new bytes being added */
 	int		new_size;	/* size of extents after adding */
@@ -4038,7 +4038,7 @@ xfs_iext_remove_indirect(
 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
 	xfs_extnum_t	nex1;		/* number of extents before idx */
 	xfs_extnum_t	nex2;		/* extents after idx + count */
-	int		nlists;		/* entries in indirecton array */
+	int		nlists;		/* entries in indirection array */
 	int		page_idx = idx;	/* index in target extent list */
 
 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
@@ -4291,9 +4291,9 @@ xfs_iext_bno_to_ext(
 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
 	xfs_bmbt_rec_t	*ep = NULL;	/* pointer to target extent */
 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
-	int		high;		/* upper boundry in search */
+	int		high;		/* upper boundary in search */
 	xfs_extnum_t	idx = 0;	/* index of target extent */
-	int		low;		/* lower boundry in search */
+	int		low;		/* lower boundary in search */
 	xfs_extnum_t	nextents;	/* number of file extents */
 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 36aa1fcb90a5..7497a481b2f5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -580,7 +580,7 @@ xfs_inode_item_unpin_remove(
  * been or is in the process of being flushed, then (ideally) we'd like to
  * see if the inode's buffer is still incore, and if so give it a nudge.
  * We delay doing so until the pushbuf routine, though, to avoid holding
- * the AIL lock across a call to the blackhole which is the buffercache.
+ * the AIL lock across a call to the blackhole which is the buffer cache.
  * Also we don't want to sleep in any device strategy routines, which can happen
  * if we do the subsequent bawrite in here.
  */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 32247b6bfee7..94068d014f27 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -272,7 +272,7 @@ xfs_bulkstat(
 	size_t			statstruct_size, /* sizeof struct filling */
 	char			__user *ubuffer, /* buffer with inode stats */
 	int			flags,	/* defined in xfs_itable.h */
-	int			*done)	/* 1 if there're more stats to get */
+	int			*done)	/* 1 if there are more stats to get */
 {
 	xfs_agblock_t		agbno=0;/* allocation group block number */
 	xfs_buf_t		*agbp;	/* agi header buffer */
@@ -676,7 +676,7 @@ xfs_bulkstat_single(
 	xfs_mount_t		*mp,	/* mount point for filesystem */
 	xfs_ino_t		*lastinop, /* inode to return */
 	char			__user *buffer, /* buffer with inode stats */
-	int			*done)	/* 1 if there're more stats to get */
+	int			*done)	/* 1 if there are more stats to get */
 {
 	int			count;	/* count value for bulkstat call */
 	int			error;	/* return value */
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 047d834ed210..11eb4e1b18c4 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -60,7 +60,7 @@ xfs_bulkstat(
 	size_t		statstruct_size,/* sizeof struct that we're filling */
 	char		__user *ubuffer,/* buffer with inode stats */
 	int		flags,		/* flag to control access method */
-	int		*done);		/* 1 if there're more stats to get */
+	int		*done);		/* 1 if there are more stats to get */
 
 int
 xfs_bulkstat_single(
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9176995160ed..32e841d2f26d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -59,7 +59,7 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
 				int		num_bblks);
 STATIC int	 xlog_space_left(xlog_t *log, int cycle, int bytes);
 STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void	 xlog_unalloc_log(xlog_t *log);
+STATIC void	 xlog_dealloc_log(xlog_t *log);
 STATIC int	 xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
 			    int nentries, xfs_log_ticket_t tic,
 			    xfs_lsn_t *start_lsn,
@@ -304,7 +304,7 @@ xfs_log_done(xfs_mount_t	*mp,
 	if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
 	    (flags & XFS_LOG_REL_PERM_RESERV)) {
 		/*
-		 * Release ticket if not permanent reservation or a specifc
+		 * Release ticket if not permanent reservation or a specific
 		 * request has been made to release a permanent reservation.
 		 */
 		xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
@@ -511,7 +511,7 @@ xfs_log_mount(xfs_mount_t	*mp,
 			vfsp->vfs_flag |= VFS_RDONLY;
 		if (error) {
 			cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
-			xlog_unalloc_log(mp->m_log);
+			xlog_dealloc_log(mp->m_log);
 			return error;
 		}
 	}
@@ -667,7 +667,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 *
 		 * Go through the motions of sync'ing and releasing
 		 * the iclog, even though no I/O will actually happen,
-		 * we need to wait for other log I/O's that may already
+		 * we need to wait for other log I/Os that may already
 		 * be in progress.  Do this as a separate section of
 		 * code so we'll know if we ever get stuck here that
 		 * we're in this odd situation of trying to unmount
@@ -704,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 void
 xfs_log_unmount_dealloc(xfs_mount_t *mp)
 {
-	xlog_unalloc_log(mp->m_log);
+	xlog_dealloc_log(mp->m_log);
 }
 
 /*
@@ -1492,7 +1492,7 @@ xlog_sync(xlog_t		*log,
 		ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
 		ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
 
-		/* account for internal log which does't start at block #0 */
+		/* account for internal log which doesn't start at block #0 */
 		XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
 		XFS_BUF_WRITE(bp);
 		if ((error = XFS_bwrite(bp))) {
@@ -1506,10 +1506,10 @@ xlog_sync(xlog_t		*log,
 
 
 /*
- * Unallocate a log structure
+ * Deallocate a log structure
  */
 void
-xlog_unalloc_log(xlog_t *log)
+xlog_dealloc_log(xlog_t *log)
 {
 	xlog_in_core_t	*iclog, *next_iclog;
 	xlog_ticket_t	*tic, *next_tic;
@@ -1539,7 +1539,7 @@ xlog_unalloc_log(xlog_t *log)
 	if ((log->l_ticket_cnt != log->l_ticket_tcnt)  &&
 	    !XLOG_FORCED_SHUTDOWN(log)) {
 		xfs_fs_cmn_err(CE_WARN, log->l_mp,
-			"xlog_unalloc_log: (cnt: %d, total: %d)",
+			"xlog_dealloc_log: (cnt: %d, total: %d)",
 			log->l_ticket_cnt, log->l_ticket_tcnt);
 		/* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
 
@@ -1562,7 +1562,7 @@ xlog_unalloc_log(xlog_t *log)
 #endif
 	log->l_mp->m_log = NULL;
 	kmem_free(log, sizeof(xlog_t));
-}	/* xlog_unalloc_log */
+}	/* xlog_dealloc_log */
 
 /*
  * Update counters atomically now that memcpy is done.
@@ -2829,7 +2829,7 @@ xlog_state_release_iclog(xlog_t		*log,
 
 	/*
 	 * We let the log lock go, so it's possible that we hit a log I/O
-	 * error or someother SHUTDOWN condition that marks the iclog
+	 * error or some other SHUTDOWN condition that marks the iclog
 	 * as XLOG_STATE_IOERROR before the bwrite. However, we know that
 	 * this iclog has consistent data, so we ignore IOERROR
 	 * flags after this point.
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 4b2ac88dbb83..eacb3d4987f2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -27,7 +27,7 @@
 
 #ifdef __KERNEL__
 /*
- * By comparing each compnent, we don't have to worry about extra
+ * By comparing each component, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
 static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index add13f507ed2..1f0016b0b4ec 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -583,7 +583,7 @@ xlog_find_head(
 		 *        x | x ... | x - 1 | x
 		 * Another case that fits this picture would be
 		 *        x | x + 1 | x ... | x
-		 * In this case the head really is somwhere at the end of the
+		 * In this case the head really is somewhere at the end of the
 		 * log, as one of the latest writes at the beginning was
 		 * incomplete.
 		 * One more case is
@@ -2799,7 +2799,7 @@ xlog_recover_do_trans(
 		 * we don't need to worry about the block number being
 		 * truncated in > 1 TB buffers because in user-land,
 		 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
-		 * the blkno's will get through the user-mode buffer
+		 * the blknos will get through the user-mode buffer
 		 * cache properly.  The only bad case is o32 kernels
 		 * where xfs_daddr_t is 32-bits but mount will warn us
 		 * off a > 1 TB filesystem before we get here.
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 20e8abc16d18..72e7e78bfff8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -393,7 +393,7 @@ xfs_initialize_perag(
 				break;
 			}
 
-			/* This ag is prefered for inodes */
+			/* This ag is preferred for inodes */
 			pag = &mp->m_perag[index];
 			pag->pagi_inodeok = 1;
 			if (index < max_metadata)
@@ -1728,7 +1728,7 @@ xfs_mount_log_sbunit(
  * We cannot use the hotcpu_register() function because it does
  * not allow notifier instances. We need a notifier per filesystem
  * as we need to be able to identify the filesystem to balance
- * the counters out. This is acheived by having a notifier block
+ * the counters out. This is achieved by having a notifier block
  * embedded in the xfs_mount_t and doing pointer magic to get the
  * mount pointer from the notifier block address.
  */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index ebd73960e9db..66cbee79864e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -379,7 +379,7 @@ typedef struct xfs_mount {
 #endif
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
-	int			m_sinoalign;	/* stripe unit inode alignmnt */
+	int			m_sinoalign;	/* stripe unit inode alignment */
 	int			m_attr_magicpct;/* 37% of the blocksize */
 	int			m_dir_magicpct;	/* 37% of the dir blocksize */
 	__uint8_t		m_mk_sharedro;	/* mark shared ro on unmount */
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 82a08baf437b..4f6a034de7f7 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -31,7 +31,7 @@
 typedef __uint32_t	xfs_dqid_t;
 
 /*
- * Eventhough users may not have quota limits occupying all 64-bits,
+ * Even though users may not have quota limits occupying all 64-bits,
  * they may need 64-bit accounting. Hence, 64-bit quota-counters,
  * and quota-limits. This is a waste in the common case, but hey ...
  */
@@ -246,7 +246,7 @@ typedef struct xfs_qoff_logformat {
 #ifdef __KERNEL__
 /*
  * This check is done typically without holding the inode lock;
- * that may seem racey, but it is harmless in the context that it is used.
+ * that may seem racy, but it is harmless in the context that it is used.
  * The inode cannot go inactive as long a reference is kept, and
  * therefore if dquot(s) were attached, they'll stay consistent.
  * If, for example, the ownership of the inode changes while
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 2918956553a5..8d056cef5d1f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -490,7 +490,7 @@ xfs_trans_mod_sb(
 	case XFS_TRANS_SB_RES_FREXTENTS:
 		/*
 		 * The allocation has already been applied to the
-		 * in-core superblocks's counter.  This should only
+		 * in-core superblock's counter.  This should only
 		 * be applied to the on-disk superblock.
 		 */
 		ASSERT(delta < 0);
@@ -611,7 +611,7 @@ xfs_trans_apply_sb_deltas(
 
 	if (whole)
 		/*
-		 * Log the whole thing, the fields are discontiguous.
+		 * Log the whole thing, the fields are noncontiguous.
 		 */
 		xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_sb_t) - 1);
 	else
@@ -669,7 +669,7 @@ xfs_trans_unreserve_and_mod_sb(
 	/*
 	 * Apply any superblock modifications to the in-core version.
 	 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
-	 * explicity NOT applied to the in-core superblock.
+	 * explicitly NOT applied to the in-core superblock.
 	 * The idea is that that has already been done.
 	 */
 	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e48befa4e337..100d9a4b38ee 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -354,7 +354,7 @@ typedef struct xfs_trans {
 	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
 						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
-	struct xfs_dquot_acct   *t_dqinfo;	/* accting info for dquots */
+	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
 	xfs_trans_callback_t	t_callback;	/* transaction callback */
 	void			*t_callarg;	/* callback arg */
 	unsigned int		t_flags;	/* misc flags */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index e341409172d2..7c5894d59f81 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -272,7 +272,7 @@ xfs_trans_log_inode(
 	 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
 	 * routines in the eventual clearing of the ilf_fields bits.
 	 * See the big comment in xfs_iflush() for an explanation of
-	 * this coorination mechanism.
+	 * this coordination mechanism.
 	 */
 	flags |= ip->i_itemp->ili_last_fields;
 	ip->i_itemp->ili_format.ilf_fields |= flags;
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index d4ec4dfaf19c..504d2a80747a 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -880,10 +880,10 @@ xfs_statvfs(
  *		       determine if they should be flushed sync, async, or
  *		       delwri.
  *      SYNC_CLOSE   - This flag is passed when the system is being
- *		       unmounted.  We should sync and invalidate everthing.
+ *		       unmounted.  We should sync and invalidate everything.
  *      SYNC_FSDATA  - This indicates that the caller would like to make
  *		       sure the superblock is safe on disk.  We can ensure
- *		       this by simply makeing sure the log gets flushed
+ *		       this by simply making sure the log gets flushed
  *		       if SYNC_BDFLUSH is set, and by actually writing it
  *		       out otherwise.
  *
@@ -908,7 +908,7 @@ xfs_sync(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
@@ -1090,7 +1090,7 @@ xfs_sync_inodes(
 		 * If this is just vfs_sync() or pflushd() calling
 		 * then we can skip inodes for which it looks like
 		 * there is nothing to do.  Since we don't have the
-		 * inode locked this is racey, but these are periodic
+		 * inode locked this is racy, but these are periodic
 		 * calls so it doesn't matter.  For the others we want
 		 * to know for sure, so we at least try to lock them.
 		 */
@@ -1429,7 +1429,7 @@ xfs_sync_inodes(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 0f0a64e81db9..de49601919c1 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -848,7 +848,7 @@ xfs_setattr(
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 * This is slightly sub-optimal in that truncates require
-	 * two sync transactions instead of one for wsync filesytems.
+	 * two sync transactions instead of one for wsync filesystems.
 	 * One for the truncate and one for the timestamps since we
 	 * don't want to change the timestamps unless we're sure the
 	 * truncate worked.  Truncates are less than 1% of the laddis
@@ -1170,7 +1170,7 @@ xfs_fsync(
 
 		/*
 		 * If this inode is on the RT dev we need to flush that
-		 * cache aswell.
+		 * cache as well.
 		 */
 		if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
 			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
@@ -1380,7 +1380,7 @@ xfs_inactive_symlink_rmt(
 	 */
 	ntp = xfs_trans_dup(tp);
 	/*
-	 * Commit the transaction containing extent freeing and EFD's.
+	 * Commit the transaction containing extent freeing and EFDs.
 	 * If we get an error on the commit here or on the reserve below,
 	 * we need to unlock the inode since the new transaction doesn't
 	 * have the inode attached.
@@ -2023,7 +2023,7 @@ xfs_create(
 	XFS_QM_DQRELE(mp, gdqp);
 
 	/*
-	 * Propogate the fact that the vnode changed after the
+	 * Propagate the fact that the vnode changed after the
 	 * xfs_inode locks have been released.
 	 */
 	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
@@ -2370,7 +2370,7 @@ xfs_remove(
 	 * for a log reservation. Since we'll have to wait for the
 	 * inactive code to complete before returning from xfs_iget,
 	 * we need to make sure that we don't have log space reserved
-	 * when we call xfs_iget.  Instead we get an unlocked referece
+	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
 	error = xfs_get_dir_entry(dentry, &ip);
@@ -3020,7 +3020,7 @@ xfs_rmdir(
 	 * for a log reservation.  Since we'll have to wait for the
 	 * inactive code to complete before returning from xfs_iget,
 	 * we need to make sure that we don't have log space reserved
-	 * when we call xfs_iget.  Instead we get an unlocked referece
+	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
 	error = xfs_get_dir_entry(dentry, &cdp);
-- 
cgit v1.2.3


From e0edd5962bd83d319aaa50b39580dc30299a7fe3 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 08:55:47 +1000
Subject: [XFS] Fix compiler warning and small code inconsistencies in compat
 ioctl32 land.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25590a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b6321abd9a81..251bfe451a3f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -72,7 +72,7 @@ xfs_ioctl32_flock(
 	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
 	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
 		return -EFAULT;
-	
+
 	return (unsigned long)p;
 }
 
@@ -107,11 +107,15 @@ xfs_ioctl32_bulkstat(
 #endif
 
 STATIC long
-xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(
+	int		mode,
+	struct file	*file,
+	unsigned	cmd,
+	unsigned long	arg)
 {
+	struct inode	*inode = file->f_dentry->d_inode;
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
-	struct		inode *inode = f->f_dentry->d_inode;
-	vnode_t		*vp = vn_to_inode(inode);
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
@@ -189,7 +193,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 
-	VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+	VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error);
 	VMODIFY(vp);
 
 	return error;
@@ -197,18 +201,18 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 
 long
 xfs_file_compat_ioctl(
-	struct file		*f,
+	struct file		*file,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return xfs_compat_ioctl(0, f, cmd, arg);
+	return xfs_compat_ioctl(0, file, cmd, arg);
 }
 
 long
 xfs_file_compat_invis_ioctl(
-	struct file		*f,
+	struct file		*file,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+	return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
 }
-- 
cgit v1.2.3


From 3c674e74238cb2484169e3f84f687c66887086b6 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 09:26:15 +1000
Subject: Fixes a regression from the recent "remove ->get_blocks() support"
 change.  inode->i_blkbits should be used when making a get_block_t request of
 a filesystem instead of dio->blkbits, as that does not indicate the
 filesystem block size all the time (depends on request alignment - see start
 of __blockdev_direct_IO).

Signed-off-by: Nathan Scott <nathans@sgi.com>
Acked-by: Badari Pulavarty <pbadari@us.ibm.com>
---
 fs/direct-io.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9d1d2aa73e42..910a8ed74b5d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -524,8 +524,6 @@ static int get_more_blocks(struct dio *dio)
 	 */
 	ret = dio->page_errors;
 	if (ret == 0) {
-		map_bh->b_state = 0;
-		map_bh->b_size = 0;
 		BUG_ON(dio->block_in_file >= dio->final_block_in_request);
 		fs_startblk = dio->block_in_file >> dio->blkfactor;
 		dio_count = dio->final_block_in_request - dio->block_in_file;
@@ -534,6 +532,9 @@ static int get_more_blocks(struct dio *dio)
 		if (dio_count & blkmask)	
 			fs_count++;
 
+		map_bh->b_state = 0;
+		map_bh->b_size = fs_count << dio->inode->i_blkbits;
+
 		create = dio->rw == WRITE;
 		if (dio->lock_type == DIO_LOCKING) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
@@ -542,13 +543,13 @@ static int get_more_blocks(struct dio *dio)
 		} else if (dio->lock_type == DIO_NO_LOCKING) {
 			create = 0;
 		}
+
 		/*
 		 * For writes inside i_size we forbid block creations: only
 		 * overwrites are permitted.  We fall back to buffered writes
 		 * at a higher level for inside-i_size block-instantiating
 		 * writes.
 		 */
-		map_bh->b_size = fs_count << dio->blkbits;
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
-- 
cgit v1.2.3


From 3ccb8b5f650e80b7cc7ef76289348472e026b6ac Mon Sep 17 00:00:00 2001
From: Glen Overby <overby@sgi.com>
Date: Wed, 29 Mar 2006 09:52:28 +1000
Subject: [XFS] A change to inode chunk allocation to try allocating the new
 chunk contiguous with the most recently allocated chunk.  On a striped
 filesystem, this will fill a stripe unit with inodes before allocating new
 inodes in another stripe unit.

SGI-PV: 951416
SGI-Modid: xfs-linux-melb:xfs-kern:208488a

Signed-off-by: Glen Overby <overby@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 108 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 20c39a6e6a0c..4eeb856183b1 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -136,7 +136,7 @@ xfs_ialloc_ag_alloc(
 	int		ninodes;	/* num inodes per buf */
 	xfs_agino_t	thisino;	/* current inode number, for loop */
 	int		version;	/* inode version number to use */
-	int		isaligned;	/* inode allocation at stripe unit */
+	int		isaligned = 0;	/* inode allocation at stripe unit */
 					/* boundary */
 
 	args.tp = tp;
@@ -152,47 +152,75 @@ xfs_ialloc_ag_alloc(
 		return XFS_ERROR(ENOSPC);
 	args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
 	/*
-	 * Set the alignment for the allocation.
-	 * If stripe alignment is turned on then align at stripe unit
-	 * boundary.
-	 * If the cluster size is smaller than a filesystem block
-	 * then we're doing I/O for inodes in filesystem block size pieces,
-	 * so don't need alignment anyway.
-	 */
-	isaligned = 0;
-	if (args.mp->m_sinoalign) {
-		ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
-		args.alignment = args.mp->m_dalign;
-		isaligned = 1;
-	} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
-	    args.mp->m_sb.sb_inoalignmt >=
-	    XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-		args.alignment = args.mp->m_sb.sb_inoalignmt;
-	else
-		args.alignment = 1;
+	 * First try to allocate inodes contiguous with the last-allocated
+	 * chunk of inodes.  If the filesystem is striped, this will fill
+	 * an entire stripe unit with inodes.
+ 	 */
 	agi = XFS_BUF_TO_AGI(agbp);
-	/*
-	 * Need to figure out where to allocate the inode blocks.
-	 * Ideally they should be spaced out through the a.g.
-	 * For now, just allocate blocks up front.
-	 */
-	args.agbno = be32_to_cpu(agi->agi_root);
-	args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
-				    args.agbno);
-	/*
-	 * Allocate a fixed-size extent of inodes.
-	 */
-	args.type = XFS_ALLOCTYPE_NEAR_BNO;
-	args.mod = args.total = args.wasdel = args.isfl = args.userdata =
-		args.minalignslop = 0;
-	args.prod = 1;
-	/*
-	 * Allow space for the inode btree to split.
-	 */
-	args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
-	if ((error = xfs_alloc_vextent(&args)))
-		return error;
+	newino = be32_to_cpu(agi->agi_newino);
+	if(likely(newino != NULLAGINO)) {
+		args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+				XFS_IALLOC_BLOCKS(args.mp);
+		args.fsbno = XFS_AGB_TO_FSB(args.mp,
+				be32_to_cpu(agi->agi_seqno), args.agbno);
+		args.type = XFS_ALLOCTYPE_THIS_BNO;
+		args.mod = args.total = args.wasdel = args.isfl =
+			args.userdata = args.minalignslop = 0;
+		args.prod = 1;
+		args.alignment = 1;
+		/*
+		 * Allow space for the inode btree to split.
+		 */
+		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	} else
+		args.fsbno = NULLFSBLOCK;
 
+	if (unlikely(args.fsbno == NULLFSBLOCK)) {
+		/*
+		 * Set the alignment for the allocation.
+		 * If stripe alignment is turned on then align at stripe unit
+		 * boundary.
+		 * If the cluster size is smaller than a filesystem block 
+		 * then we're doing I/O for inodes in filesystem block size 
+		 * pieces, so don't need alignment anyway.
+		 */
+		isaligned = 0;
+		if (args.mp->m_sinoalign) {
+			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+			args.alignment = args.mp->m_dalign;
+			isaligned = 1;
+		} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+			   args.mp->m_sb.sb_inoalignmt >= 
+			   XFS_B_TO_FSBT(args.mp,
+			  	XFS_INODE_CLUSTER_SIZE(args.mp)))
+				args.alignment = args.mp->m_sb.sb_inoalignmt;
+		else
+			args.alignment = 1;
+		/*
+		 * Need to figure out where to allocate the inode blocks.
+		 * Ideally they should be spaced out through the a.g.
+		 * For now, just allocate blocks up front.
+		 */
+		args.agbno = be32_to_cpu(agi->agi_root);
+		args.fsbno = XFS_AGB_TO_FSB(args.mp,
+				be32_to_cpu(agi->agi_seqno), args.agbno);
+		/*
+		 * Allocate a fixed-size extent of inodes.
+		 */
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.mod = args.total = args.wasdel = args.isfl =
+			args.userdata = args.minalignslop = 0;
+		args.prod = 1;
+		/*
+		 * Allow space for the inode btree to split.
+		 */
+		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+ 
 	/*
 	 * If stripe alignment is turned on, then try again with cluster
 	 * alignment.
-- 
cgit v1.2.3


From 0b7e56a450a4800c5f48f3a345a5a7de2f38041c Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Wed, 29 Mar 2006 09:53:03 +1000
Subject: [XFS] Remove unused/obsoleted function: xfs_bmap_do_search_extents()

SGI-PV: 951415
SGI-Modid: xfs-linux-melb:xfs-kern:208490a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 107 ------------------------------------------------------
 fs/xfs/xfs_bmap.h |   8 ----
 2 files changed, 115 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 2d702e4a74a3..d384e489705f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3467,113 +3467,6 @@ done:
 	return error;
 }
 
-xfs_bmbt_rec_t *			/* pointer to found extent entry */
-xfs_bmap_do_search_extents(
-	xfs_bmbt_rec_t	*base,		/* base of extent list */
-	xfs_extnum_t	lastx,		/* last extent index used */
-	xfs_extnum_t	nextents,	/* number of file extents */
-	xfs_fileoff_t	bno,		/* block number searched for */
-	int		*eofp,		/* out: end of file found */
-	xfs_extnum_t	*lastxp,	/* out: last extent index */
-	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
-	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
-{
-	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
-	xfs_bmbt_irec_t	got;		/* extent list entry, decoded */
-	int		high;		/* high index of binary search */
-	int		low;		/* low index of binary search */
-
-	/*
-	 * Initialize the extent entry structure to catch access to
-	 * uninitialized br_startblock field.
-	 */
-	got.br_startoff = 0xffa5a5a5a5a5a5a5LL;
-	got.br_blockcount = 0xa55a5a5a5a5a5a5aLL;
-	got.br_state = XFS_EXT_INVALID;
-
-#if XFS_BIG_BLKNOS
-	got.br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
-	got.br_startblock = 0xffffa5a5;
-#endif
-
-	if (lastx != NULLEXTNUM && lastx < nextents)
-		ep = base + lastx;
-	else
-		ep = NULL;
-	prevp->br_startoff = NULLFILEOFF;
-	if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
-	    bno < got.br_startoff +
-		  (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
-		*eofp = 0;
-	else if (ep && lastx < nextents - 1 &&
-		 bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
-		 bno < got.br_startoff +
-		       (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
-		lastx++;
-		ep++;
-		*eofp = 0;
-	} else if (nextents == 0)
-		*eofp = 1;
-	else if (bno == 0 &&
-		 (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
-		ep = base;
-		lastx = 0;
-		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-		*eofp = 0;
-	} else {
-		low = 0;
-		high = nextents - 1;
-		/* binary search the extents array */
-		while (low <= high) {
-			XFS_STATS_INC(xs_cmp_exlist);
-			lastx = (low + high) >> 1;
-			ep = base + lastx;
-			got.br_startoff = xfs_bmbt_get_startoff(ep);
-			got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-			if (bno < got.br_startoff)
-				high = lastx - 1;
-			else if (bno >= got.br_startoff + got.br_blockcount)
-				low = lastx + 1;
-			else {
-				got.br_startblock = xfs_bmbt_get_startblock(ep);
-				got.br_state = xfs_bmbt_get_state(ep);
-				*eofp = 0;
-				*lastxp = lastx;
-				*gotp = got;
-				return ep;
-			}
-		}
-		if (bno >= got.br_startoff + got.br_blockcount) {
-			lastx++;
-			if (lastx == nextents) {
-				*eofp = 1;
-				got.br_startblock = xfs_bmbt_get_startblock(ep);
-				got.br_state = xfs_bmbt_get_state(ep);
-				*prevp = got;
-				ep = NULL;
-			} else {
-				*eofp = 0;
-				xfs_bmbt_get_all(ep, prevp);
-				ep++;
-				got.br_startoff = xfs_bmbt_get_startoff(ep);
-				got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-			}
-		} else {
-			*eofp = 0;
-			if (ep > base)
-				xfs_bmbt_get_all(ep - 1, prevp);
-		}
-	}
-	if (ep) {
-		got.br_startblock = xfs_bmbt_get_startblock(ep);
-		got.br_state = xfs_bmbt_get_state(ep);
-	}
-	*lastxp = lastx;
-	*gotp = got;
-	return ep;
-}
-
 /*
  * Search the extent records for the entry containing block bno.
  * If bno lies in a hole, point to the next entry.  If bno lies
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 011ccaa9a1c0..f83399c89ce3 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -362,14 +362,6 @@ xfs_bmbt_rec_t *
 xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
 			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
 
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
-			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
-			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_H__ */
-- 
cgit v1.2.3


From c25366680bab32efcbb5eda5f3c202099ba27b81 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 10:44:40 +1000
Subject: [XFS] Cleanup in XFS after recent get_block_t interface tweaks.

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 53 ++++++++++++++++++++-------------------------
 fs/xfs/linux-2.6/xfs_aops.h |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 3 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 62b4553fb604..6cbbd165c60d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1223,10 +1223,9 @@ free_buffers:
 }
 
 STATIC int
-__xfs_get_block(
+__xfs_get_blocks(
 	struct inode		*inode,
 	sector_t		iblock,
-	unsigned long		blocks,
 	struct buffer_head	*bh_result,
 	int			create,
 	int			direct,
@@ -1236,22 +1235,17 @@ __xfs_get_block(
 	xfs_iomap_t		iomap;
 	xfs_off_t		offset;
 	ssize_t			size;
-	int			retpbbm = 1;
+	int			niomap = 1;
 	int			error;
 
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
-	if (blocks)
-		size = (ssize_t) min_t(xfs_off_t, LONG_MAX,
-					(xfs_off_t)blocks << inode->i_blkbits);
-	else
-		size = 1 << inode->i_blkbits;
-
+	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+	size = bh_result->b_size;
 	VOP_BMAP(vp, offset, size,
-		create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+		create ? flags : BMAPI_READ, &iomap, &niomap, error);
 	if (error)
 		return -error;
-
-	if (retpbbm == 0)
+	if (niomap == 0)
 		return 0;
 
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
@@ -1271,12 +1265,16 @@ __xfs_get_block(
 		}
 	}
 
-	/* If this is a realtime file, data might be on a new device */
+	/*
+	 * If this is a realtime file, data may be on a different device.
+	 * to that pointed to from the buffer_head b_bdev currently.
+	 */
 	bh_result->b_bdev = iomap.iomap_target->bt_bdev;
 
-	/* If we previously allocated a block out beyond eof and
-	 * we are now coming back to use it then we will need to
-	 * flag it as new even if it has a disk address.
+	/*
+	 * If we previously allocated a block out beyond eof and we are
+	 * now coming back to use it then we will need to flag it as new
+	 * even if it has a disk address.
 	 */
 	if (create &&
 	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
@@ -1292,26 +1290,24 @@ __xfs_get_block(
 		}
 	}
 
-	if (blocks) {
+	if (direct || size > (1 << inode->i_blkbits)) {
 		ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
 		offset = min_t(xfs_off_t,
-				iomap.iomap_bsize - iomap.iomap_delta,
-				(xfs_off_t)blocks << inode->i_blkbits);
-		bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
+				iomap.iomap_bsize - iomap.iomap_delta, size);
+		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
 	}
 
 	return 0;
 }
 
 int
-xfs_get_block(
+xfs_get_blocks(
 	struct inode		*inode,
 	sector_t		iblock,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock,
-				bh_result->b_size >> inode->i_blkbits,
+	return __xfs_get_blocks(inode, iblock,
 				bh_result, create, 0, BMAPI_WRITE);
 }
 
@@ -1322,8 +1318,7 @@ xfs_get_blocks_direct(
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock,
-				bh_result->b_size >> inode->i_blkbits,
+	return __xfs_get_blocks(inode, iblock,
 				bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
@@ -1405,7 +1400,7 @@ xfs_vm_prepare_write(
 	unsigned int		from,
 	unsigned int		to)
 {
-	return block_prepare_write(page, from, to, xfs_get_block);
+	return block_prepare_write(page, from, to, xfs_get_blocks);
 }
 
 STATIC sector_t
@@ -1422,7 +1417,7 @@ xfs_vm_bmap(
 	VOP_RWLOCK(vp, VRWLOCK_READ);
 	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
 	VOP_RWUNLOCK(vp, VRWLOCK_READ);
-	return generic_block_bmap(mapping, block, xfs_get_block);
+	return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
 STATIC int
@@ -1430,7 +1425,7 @@ xfs_vm_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, xfs_get_block);
+	return mpage_readpage(page, xfs_get_blocks);
 }
 
 STATIC int
@@ -1440,7 +1435,7 @@ xfs_vm_readpages(
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
 }
 
 STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 795699f121d2..60716543c68b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -41,6 +41,6 @@ typedef struct xfs_ioend {
 } xfs_ioend_t;
 
 extern struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index af487437bd7e..149237304fb6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -708,7 +708,7 @@ STATIC void
 xfs_vn_truncate(
 	struct inode	*inode)
 {
-	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
+	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks);
 }
 
 STATIC int
-- 
cgit v1.2.3


From fef23e7fbb11a0a78cd61935f7056bc2b237995a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 28 Mar 2006 16:10:58 -0800
Subject: [PATCH] exec: allow init to exec from any thread.

After looking at the problem of init calling exec some more I figured out
an easy way to make the code work.

The actual symptom without out this patch is that all threads will die
except pid == 1, and the thread calling exec.  The thread calling exec will
wait forever for pid == 1 to die.

Since pid == 1 does not install a handler for SIGKILL it will never die.

This modifies the tests for init from current->pid == 1 to the equivalent
current == child_reaper.  And then it causes exec in the ugly case to
modify child_reaper.

The only weird symptom is that you wind up with an init process that
doesn't have the oldest start time on the box.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c       | 13 ++++++++++++-
 kernel/exit.c   |  2 +-
 kernel/signal.c |  2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index c7397c46ad6d..d0ecea0781f7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -660,12 +660,23 @@ static int de_thread(struct task_struct *tsk)
 		struct dentry *proc_dentry1, *proc_dentry2;
 		unsigned long ptrace;
 
+		leader = current->group_leader;
+		/*
+		 * If our leader is the child_reaper become
+		 * the child_reaper and resend SIGKILL signal.
+		 */
+		if (unlikely(leader == child_reaper)) {
+			write_lock(&tasklist_lock);
+			child_reaper = current;
+			zap_other_threads(current);
+			write_unlock(&tasklist_lock);
+		}
+
 		/*
 		 * Wait for the thread group leader to be a zombie.
 		 * It should already be zombie at this point, most
 		 * of the time.
 		 */
-		leader = current->group_leader;
 		while (leader->exit_state != EXIT_ZOMBIE)
 			yield();
 
diff --git a/kernel/exit.c b/kernel/exit.c
index a8c7efc7a681..223a8802b665 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -807,7 +807,7 @@ fastcall NORET_TYPE void do_exit(long code)
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
-	if (unlikely(tsk->pid == 1))
+	if (unlikely(tsk == child_reaper))
 		panic("Attempted to kill init!");
 
 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 75f7341b0c39..dc8f91bf9f89 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1990,7 +1990,7 @@ relock:
 			continue;
 
 		/* Init gets no signals it doesn't want.  */
-		if (current->pid == 1)
+		if (current == child_reaper)
 			continue;
 
 		if (sig_kernel_stop(signr)) {
-- 
cgit v1.2.3


From 1434261c07bcebd5ef8b8a18f919fdee533b84e0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:10:59 -0800
Subject: [PATCH] simplify exec from init's subthread

I think it is enough to take tasklist_lock for reading while changing
child_reaper:

	Reparenting needs write_lock(tasklist_lock)

	Only one thread in a thread group can do exec()

	sighand->siglock garantees that get_signal_to_deliver()
	will not see a stale value of child_reaper.

This means that we can change child_reaper earlier, without calling
zap_other_threads() twice.

"child_reaper = current" is a NOOP when init does exec from main thread, we
don't care.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index d0ecea0781f7..dd194923c52c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -616,6 +616,15 @@ static int de_thread(struct task_struct *tsk)
 		kmem_cache_free(sighand_cachep, newsighand);
 		return -EAGAIN;
 	}
+
+	/*
+	 * child_reaper ignores SIGKILL, change it now.
+	 * Reparenting needs write_lock on tasklist_lock,
+	 * so it is safe to do it under read_lock.
+	 */
+	if (unlikely(current->group_leader == child_reaper))
+		child_reaper = current;
+
 	zap_other_threads(current);
 	read_unlock(&tasklist_lock);
 
@@ -660,23 +669,12 @@ static int de_thread(struct task_struct *tsk)
 		struct dentry *proc_dentry1, *proc_dentry2;
 		unsigned long ptrace;
 
-		leader = current->group_leader;
-		/*
-		 * If our leader is the child_reaper become
-		 * the child_reaper and resend SIGKILL signal.
-		 */
-		if (unlikely(leader == child_reaper)) {
-			write_lock(&tasklist_lock);
-			child_reaper = current;
-			zap_other_threads(current);
-			write_unlock(&tasklist_lock);
-		}
-
 		/*
 		 * Wait for the thread group leader to be a zombie.
 		 * It should already be zombie at this point, most
 		 * of the time.
 		 */
+		leader = current->group_leader;
 		while (leader->exit_state != EXIT_ZOMBIE)
 			yield();
 
-- 
cgit v1.2.3


From d73d65293e3e2de7e916a89c8da30be0948afab7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 28 Mar 2006 16:11:03 -0800
Subject: [PATCH] pidhash: kill switch_exec_pids

switch_exec_pids is only called from de_thread by way of exec, and it is
only called when we are exec'ing from a non thread group leader.

Currently switch_exec_pids gives the leader the pid of the thread and
unhashes and rehashes all of the process groups.  The leader is already in
the EXIT_DEAD state so no one cares about it's pids.  The only concern for
the leader is that __unhash_process called from release_task will function
correctly.  If we don't touch the leader at all we know that
__unhash_process will work fine so there is no need to touch the leader.

For the task becomming the thread group leader, we just need to give it the
pid of the old thread group leader, add it to the task list, and attach it
to the session and the process group of the thread group.

Currently de_thread is also adding the task to the task list which is just
silly.

Currently the only leader of __detach_pid besides detach_pid is
switch_exec_pids because of the ugly extra work that was being
performed.

So this patch removes switch_exec_pids because it is doing too much, it is
creating an unnecessary special case in pid.c, duing work duplicated in
de_thread, and generally obscuring what it is going on.

The necessary work is added to de_thread, and it seems to be a little
clearer there what is going on.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c           | 14 +++++++++++---
 include/linux/pid.h |  1 -
 kernel/pid.c        | 30 ------------------------------
 3 files changed, 11 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index dd194923c52c..db0769447d33 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -708,7 +708,17 @@ static int de_thread(struct task_struct *tsk)
 		remove_parent(current);
 		remove_parent(leader);
 
-		switch_exec_pids(leader, current);
+
+		/* Become a process group leader with the old leader's pid.
+		 * Note: The old leader also uses thispid until release_task
+		 *       is called.  Odd but simple and correct.
+		 */
+		detach_pid(current, PIDTYPE_PID);
+		current->pid = leader->pid;
+		attach_pid(current, PIDTYPE_PID,  current->pid);
+		attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
+		attach_pid(current, PIDTYPE_SID,  current->signal->session);
+		list_add_tail(&current->tasks, &init_task.tasks);
 
 		current->parent = current->real_parent = leader->real_parent;
 		leader->parent = leader->real_parent = child_reaper;
@@ -722,8 +732,6 @@ static int de_thread(struct task_struct *tsk)
 			__ptrace_link(current, parent);
 		}
 
-		list_del(&current->tasks);
-		list_add_tail(&current->tasks, &init_task.tasks);
 		current->exit_signal = SIGCHLD;
 
 		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 5b2fcb19d2da..099e70ecf7c7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -38,7 +38,6 @@ extern struct pid *FASTCALL(find_pid(enum pid_type, int));
 
 extern int alloc_pidmap(void);
 extern void FASTCALL(free_pidmap(int));
-extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
 #define do_each_task_pid(who, type, task)				\
 	if ((task = find_task_by_pid_type(type, who))) {		\
diff --git a/kernel/pid.c b/kernel/pid.c
index 1acc07246991..7781d9999058 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -217,36 +217,6 @@ task_t *find_task_by_pid_type(int type, int nr)
 
 EXPORT_SYMBOL(find_task_by_pid_type);
 
-/*
- * This function switches the PIDs if a non-leader thread calls
- * sys_execve() - this must be done without releasing the PID.
- * (which a detach_pid() would eventually do.)
- */
-void switch_exec_pids(task_t *leader, task_t *thread)
-{
-	__detach_pid(leader, PIDTYPE_PID);
-	__detach_pid(leader, PIDTYPE_TGID);
-	__detach_pid(leader, PIDTYPE_PGID);
-	__detach_pid(leader, PIDTYPE_SID);
-
-	__detach_pid(thread, PIDTYPE_PID);
-	__detach_pid(thread, PIDTYPE_TGID);
-
-	leader->pid = leader->tgid = thread->pid;
-	thread->pid = thread->tgid;
-
-	attach_pid(thread, PIDTYPE_PID, thread->pid);
-	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
-	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
-	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
-	list_add_tail(&thread->tasks, &init_task.tasks);
-
-	attach_pid(leader, PIDTYPE_PID, leader->pid);
-	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
-	attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
-	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
-}
-
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
-- 
cgit v1.2.3


From 8fafabd86f1b75ed3cc6a6ffbe6c3e53e3d8457d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:05 -0800
Subject: [PATCH] remove add_parent()'s parent argument

add_parent(p, parent) is always called with parent == p->parent, and it makes
no sense to do it differently.  This patch removes this argument.

No changes in affected .o files.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/kernel/irixsig.c | 4 ++--
 fs/exec.c                  | 4 ++--
 include/linux/sched.h      | 4 ++--
 kernel/exit.c              | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c
index 08273a2a501d..8150f071f80a 100644
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -603,7 +603,7 @@ repeat:
 			/* move to end of parent's list to avoid starvation */
 			write_lock_irq(&tasklist_lock);
 			remove_parent(p);
-			add_parent(p, p->parent);
+			add_parent(p);
 			write_unlock_irq(&tasklist_lock);
 			retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 			if (retval)
@@ -643,7 +643,7 @@ repeat:
 				write_lock_irq(&tasklist_lock);
 				remove_parent(p);
 				p->parent = p->real_parent;
-				add_parent(p, p->parent);
+				add_parent(p);
 				do_notify_parent(p, SIGCHLD);
 				write_unlock_irq(&tasklist_lock);
 			} else
diff --git a/fs/exec.c b/fs/exec.c
index db0769447d33..9046ad2b0614 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -725,8 +725,8 @@ static int de_thread(struct task_struct *tsk)
 		current->group_leader = current;
 		leader->group_leader = leader;
 
-		add_parent(current, current->parent);
-		add_parent(leader, leader->parent);
+		add_parent(current);
+		add_parent(leader);
 		if (ptrace) {
 			current->ptrace = ptrace;
 			__ptrace_link(current, parent);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f5ab98bbb6b..b4b14c32b28a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1184,7 +1184,7 @@ extern void wait_task_inactive(task_t * p);
 #endif
 
 #define remove_parent(p)	list_del_init(&(p)->sibling)
-#define add_parent(p, parent)	list_add_tail(&(p)->sibling,&(parent)->children)
+#define add_parent(p)		list_add_tail(&(p)->sibling,&(p)->parent->children)
 
 #define REMOVE_LINKS(p) do {					\
 	if (thread_group_leader(p))				\
@@ -1195,7 +1195,7 @@ extern void wait_task_inactive(task_t * p);
 #define SET_LINKS(p) do {					\
 	if (thread_group_leader(p))				\
 		list_add_tail(&(p)->tasks,&init_task.tasks);	\
-	add_parent(p, (p)->parent);				\
+	add_parent(p);						\
 	} while (0)
 
 #define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
diff --git a/kernel/exit.c b/kernel/exit.c
index e04a59405e99..df26c33037d2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1281,7 +1281,7 @@ bail_ref:
 
 	/* move to end of parent's list to avoid starvation */
 	remove_parent(p);
-	add_parent(p, p->parent);
+	add_parent(p);
 
 	write_unlock_irq(&tasklist_lock);
 
-- 
cgit v1.2.3


From aa1757f90bea3f598b6e5d04d922a6a60200f1da Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:12 -0800
Subject: [PATCH] convert sighand_cache to use SLAB_DESTROY_BY_RCU

This patch borrows a clever Hugh's 'struct anon_vma' trick.

Without tasklist_lock held we can't trust task->sighand until we locked it
and re-checked that it is still the same.

But this means we don't need to defer 'kmem_cache_free(sighand)'.  We can
return the memory to slab immediately, all we need is to be sure that
sighand->siglock can't dissapear inside rcu protected section.

To do so we need to initialize ->siglock inside ctor function,
SLAB_DESTROY_BY_RCU does the rest.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c             |  3 +--
 include/linux/sched.h |  8 --------
 kernel/fork.c         | 21 +++++++++++----------
 kernel/signal.c       |  2 +-
 4 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 9046ad2b0614..950ebd43cdc3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -768,7 +768,6 @@ no_thread_group:
 		/*
 		 * Move our state over to newsighand and switch it in.
 		 */
-		spin_lock_init(&newsighand->siglock);
 		atomic_set(&newsighand->count, 1);
 		memcpy(newsighand->action, oldsighand->action,
 		       sizeof(newsighand->action));
@@ -785,7 +784,7 @@ no_thread_group:
 		write_unlock_irq(&tasklist_lock);
 
 		if (atomic_dec_and_test(&oldsighand->count))
-			sighand_free(oldsighand);
+			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(current));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ddc0df7f8bf5..bbcfc873bd98 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -355,16 +355,8 @@ struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
-	struct rcu_head		rcu;
 };
 
-extern void sighand_free_cb(struct rcu_head *rhp);
-
-static inline void sighand_free(struct sighand_struct *sp)
-{
-	call_rcu(&sp->rcu, sighand_free_cb);
-}
-
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
diff --git a/kernel/fork.c b/kernel/fork.c
index 0c32e28cdc5f..33ffb5bf0dbc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -786,14 +786,6 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-void sighand_free_cb(struct rcu_head *rhp)
-{
-	struct sighand_struct *sp;
-
-	sp = container_of(rhp, struct sighand_struct, rcu);
-	kmem_cache_free(sighand_cachep, sp);
-}
-
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct sighand_struct *sig;
@@ -806,7 +798,6 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	rcu_assign_pointer(tsk->sighand, sig);
 	if (!sig)
 		return -ENOMEM;
-	spin_lock_init(&sig->siglock);
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 	return 0;
@@ -1356,11 +1347,21 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct sighand_struct *sighand = data;
+
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+					SLAB_CTOR_CONSTRUCTOR)
+		spin_lock_init(&sighand->siglock);
+}
+
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+			sighand_ctor, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
diff --git a/kernel/signal.c b/kernel/signal.c
index dc8f91bf9f89..b0b1ca9daa33 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -330,7 +330,7 @@ void __exit_sighand(struct task_struct *tsk)
 	/* Ok, we're done with the signal handlers */
 	tsk->sighand = NULL;
 	if (atomic_dec_and_test(&sighand->count))
-		sighand_free(sighand);
+		kmem_cache_free(sighand_cachep, sighand);
 }
 
 void exit_sighand(struct task_struct *tsk)
-- 
cgit v1.2.3


From 5274f052e7b3dbd81935772eb551dfd0325dfa9d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:15:30 +0200
Subject: [PATCH] Introduce sys_splice() system call

This adds support for the sys_splice system call. Using a pipe as a
transport, it can connect to files or sockets (latter as output only).

From the splice.c comments:

   "splice": joining two ropes together by interweaving their strands.

   This is the "extended pipe" functionality, where a pipe is used as
   an arbitrary in-memory buffer. Think of a pipe as a small kernel
   buffer that you can use to transfer data from one end to the other.

   The traditional unix read/write is extended with a "splice()" operation
   that transfers data buffers to or from a pipe buffer.

   Named by Larry McVoy, original implementation from Linus, extended by
   Jens to support splicing to files and fixing the initial implementation
   bugs.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |   1 +
 arch/ia64/kernel/entry.S         |   1 +
 fs/Makefile                      |   2 +-
 fs/ext2/file.c                   |   2 +
 fs/ext3/file.c                   |   2 +
 fs/pipe.c                        |  33 ++-
 fs/reiserfs/file.c               |   2 +
 fs/splice.c                      | 612 +++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h        |   3 +-
 include/asm-ia64/unistd.h        |   3 +-
 include/asm-powerpc/unistd.h     |   3 +-
 include/asm-x86_64/unistd.h      |   4 +-
 include/linux/fs.h               |   4 +
 include/linux/syscalls.h         |   2 +
 net/socket.c                     |   6 +-
 15 files changed, 669 insertions(+), 11 deletions(-)
 create mode 100644 fs/splice.c

(limited to 'fs')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 326595f3fa4d..ce3ef4fa0551 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -312,3 +312,4 @@ ENTRY(sys_call_table)
 	.long sys_unshare		/* 310 */
 	.long sys_set_robust_list
 	.long sys_get_robust_list
+	.long sys_splice
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0e3eda99e549..750e8e7fbdc3 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1605,5 +1605,6 @@ sys_call_table:
 	data8 sys_ni_syscall			// reserved for pselect
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
+	data8 sys_splice
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/fs/Makefile b/fs/Makefile
index 080b3867be4d..f3a4f7077175 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o
+		ioprio.o pnode.o drop_caches.o splice.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 509cceca04db..23e2c7ccec1d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,6 +53,8 @@ const struct file_operations ext2_file_operations = {
 	.readv		= generic_file_readv,
 	.writev		= generic_file_writev,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 783a796220bb..1efefb630ea9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -119,6 +119,8 @@ const struct file_operations ext3_file_operations = {
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/pipe.c b/fs/pipe.c
index e2f4f1d9ffc2..2414bf270db6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -15,6 +15,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/uio.h>
 #include <linux/highmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -94,11 +95,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
 {
 	struct page *page = buf->page;
 
-	if (info->tmp_page) {
-		__free_page(page);
+	/*
+	 * If nobody else uses this page, and we don't already have a
+	 * temporary page, let's keep track of it as a one-deep
+	 * allocation cache
+	 */
+	if (page_count(page) == 1 && !info->tmp_page) {
+		info->tmp_page = page;
 		return;
 	}
-	info->tmp_page = page;
+
+	/*
+	 * Otherwise just release our reference to it
+	 */
+	page_cache_release(page);
 }
 
 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
@@ -152,6 +162,11 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 				chars = total_len;
 
 			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				if (!ret)
+					ret = PTR_ERR(addr);
+				break;
+			}
 			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
 			ops->unmap(info, buf);
 			if (unlikely(error)) {
@@ -254,8 +269,16 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 		struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			void *addr = ops->map(filp, info, buf);
-			int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
+			void *addr;
+			int error;
+
+			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				error = PTR_ERR(addr);
+				goto out;
+			}
+			error = pipe_iov_copy_from_user(offset + addr, iov,
+							chars);
 			ops->unmap(info, buf);
 			ret = error;
 			do_wakeup = 1;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 010094d14da6..cf6e1cf40351 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1576,6 +1576,8 @@ const struct file_operations reiserfs_file_operations = {
 	.sendfile = generic_file_sendfile,
 	.aio_read = generic_file_aio_read,
 	.aio_write = reiserfs_aio_write,
+	.splice_read = generic_file_splice_read,
+	.splice_write = generic_file_splice_write,
 };
 
 struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/splice.c b/fs/splice.c
new file mode 100644
index 000000000000..efa47c1c4e13
--- /dev/null
+++ b/fs/splice.c
@@ -0,0 +1,612 @@
+/*
+ * "splice": joining two ropes together by interweaving their strands.
+ *
+ * This is the "extended pipe" functionality, where a pipe is used as
+ * an arbitrary in-memory buffer. Think of a pipe as a small kernel
+ * buffer that you can use to transfer data from one end to the other.
+ *
+ * The traditional unix read/write is extended with a "splice()" operation
+ * that transfers data buffers to or from a pipe buffer.
+ *
+ * Named by Larry McVoy, original implementation from Linus, extended by
+ * Jens to support splicing to files and fixing the initial implementation
+ * bugs.
+ *
+ * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org>
+ *
+ */
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mm_inline.h>
+
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+	unsigned int len, total_len;	/* current and remaining length */
+	unsigned int flags;		/* splice flags */
+	struct file *file;		/* file to read/write */
+	loff_t pos;			/* file position */
+};
+
+static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
+					struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+	buf->page = NULL;
+}
+
+static void *page_cache_pipe_buf_map(struct file *file,
+				     struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	lock_page(page);
+
+	if (!PageUptodate(page)) {
+		unlock_page(page);
+		return ERR_PTR(-EIO);
+	}
+
+	if (!page->mapping) {
+		unlock_page(page);
+		return ERR_PTR(-ENODATA);
+	}
+
+	return kmap(buf->page);
+}
+
+static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
+				      struct pipe_buffer *buf)
+{
+	unlock_page(buf->page);
+	kunmap(buf->page);
+}
+
+static struct pipe_buf_operations page_cache_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = page_cache_pipe_buf_map,
+	.unmap = page_cache_pipe_buf_unmap,
+	.release = page_cache_pipe_buf_release,
+};
+
+static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
+			    int nr_pages, unsigned long offset,
+			    unsigned long len)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, i;
+
+	ret = 0;
+	do_wakeup = 0;
+	i = 0;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs;
+
+		if (!PIPE_READERS(*inode)) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		bufs = info->nrbufs;
+		if (bufs < PIPE_BUFFERS) {
+			int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1);
+			struct pipe_buffer *buf = info->bufs + newbuf;
+			struct page *page = pages[i++];
+			unsigned long this_len;
+
+			this_len = PAGE_CACHE_SIZE - offset;
+			if (this_len > len)
+				this_len = len;
+
+			buf->page = page;
+			buf->offset = offset;
+			buf->len = this_len;
+			buf->ops = &page_cache_pipe_buf_ops;
+			info->nrbufs = ++bufs;
+			do_wakeup = 1;
+
+			ret += this_len;
+			len -= this_len;
+			offset = 0;
+			if (!--nr_pages)
+				break;
+			if (!len)
+				break;
+			if (bufs < PIPE_BUFFERS)
+				continue;
+
+			break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO,
+				    POLL_IN);
+			do_wakeup = 0;
+		}
+
+		PIPE_WAITING_WRITERS(*inode)++;
+		pipe_wait(inode);
+		PIPE_WAITING_WRITERS(*inode)--;
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
+	}
+
+	while (i < nr_pages)
+		page_cache_release(pages[i++]);
+
+	return ret;
+}
+
+static int __generic_file_splice_read(struct file *in, struct inode *pipe,
+				      size_t len)
+{
+	struct address_space *mapping = in->f_mapping;
+	unsigned int offset, nr_pages;
+	struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS];
+	struct page *page;
+	pgoff_t index, pidx;
+	int i, j;
+
+	index = in->f_pos >> PAGE_CACHE_SHIFT;
+	offset = in->f_pos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	if (nr_pages > PIPE_BUFFERS)
+		nr_pages = PIPE_BUFFERS;
+
+	/*
+	 * initiate read-ahead on this page range
+	 */
+	do_page_cache_readahead(mapping, in, index, nr_pages);
+
+	/*
+	 * Get as many pages from the page cache as possible..
+	 * Start IO on the page cache entries we create (we
+	 * can assume that any pre-existing ones we find have
+	 * already had IO started on them).
+	 */
+	i = find_get_pages(mapping, index, nr_pages, pages);
+
+	/*
+	 * common case - we found all pages and they are contiguous,
+	 * kick them off
+	 */
+	if (i && (pages[i - 1]->index == index + i - 1))
+		goto splice_them;
+
+	/*
+	 * fill shadow[] with pages at the right locations, so we only
+	 * have to fill holes
+	 */
+	memset(shadow, 0, i * sizeof(struct page *));
+	for (j = 0, pidx = index; j < i; pidx++, j++)
+		shadow[pages[j]->index - pidx] = pages[j];
+
+	/*
+	 * now fill in the holes
+	 */
+	for (i = 0, pidx = index; i < nr_pages; pidx++, i++) {
+		int error;
+
+		if (shadow[i])
+			continue;
+
+		/*
+		 * no page there, look one up / create it
+		 */
+		page = find_or_create_page(mapping, pidx,
+						   mapping_gfp_mask(mapping));
+		if (!page)
+			break;
+
+		if (PageUptodate(page))
+			unlock_page(page);
+		else {
+			error = mapping->a_ops->readpage(in, page);
+
+			if (unlikely(error)) {
+				page_cache_release(page);
+				break;
+			}
+		}
+		shadow[i] = page;
+	}
+
+	if (!i) {
+		for (i = 0; i < nr_pages; i++) {
+			 if (shadow[i])
+				page_cache_release(shadow[i]);
+		}
+		return 0;
+	}
+
+	memcpy(pages, shadow, i * sizeof(struct page *));
+
+	/*
+	 * Now we splice them into the pipe..
+	 */
+splice_them:
+	return move_to_pipe(pipe, pages, i, offset, len);
+}
+
+ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
+				 size_t len, unsigned int flags)
+{
+	ssize_t spliced;
+	int ret;
+
+	ret = 0;
+	spliced = 0;
+	while (len) {
+		ret = __generic_file_splice_read(in, pipe, len);
+
+		if (ret <= 0)
+			break;
+
+		in->f_pos += ret;
+		len -= ret;
+		spliced += ret;
+	}
+
+	if (spliced)
+		return spliced;
+
+	return ret;
+}
+
+/*
+ * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage().
+ */
+static int pipe_to_sendpage(struct pipe_inode_info *info,
+			    struct pipe_buffer *buf, struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	loff_t pos = sd->pos;
+	unsigned int offset;
+	ssize_t ret;
+	void *ptr;
+
+	/*
+	 * sub-optimal, but we are limited by the pipe ->map. we don't
+	 * need a kmap'ed buffer here, we just want to make sure we
+	 * have the page pinned if the pipe page originates from the
+	 * page cache
+	 */
+	ptr = buf->ops->map(file, info, buf);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	offset = pos & ~PAGE_CACHE_MASK;
+
+	ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,
+					sd->len < sd->total_len);
+
+	buf->ops->unmap(info, buf);
+	if (ret == sd->len)
+		return 0;
+
+	return -EIO;
+}
+
+/*
+ * This is a little more tricky than the file -> pipe splicing. There are
+ * basically three cases:
+ *
+ *	- Destination page already exists in the address space and there
+ *	  are users of it. For that case we have no other option that
+ *	  copying the data. Tough luck.
+ *	- Destination page already exists in the address space, but there
+ *	  are no users of it. Make sure it's uptodate, then drop it. Fall
+ *	  through to last case.
+ *	- Destination page does not exist, we can add the pipe page to
+ *	  the page cache and avoid the copy.
+ *
+ * For now we just do the slower thing and always copy pages over, it's
+ * easier than migrating pages from the pipe to the target file. For the
+ * case of doing file | file splicing, the migrate approach had some LRU
+ * nastiness...
+ */
+static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	struct address_space *mapping = file->f_mapping;
+	unsigned int offset;
+	struct page *page;
+	char *src, *dst;
+	pgoff_t index;
+	int ret;
+
+	/*
+	 * after this, page will be locked and unmapped
+	 */
+	src = buf->ops->map(file, info, buf);
+	if (IS_ERR(src))
+		return PTR_ERR(src);
+
+	index = sd->pos >> PAGE_CACHE_SHIFT;
+	offset = sd->pos & ~PAGE_CACHE_MASK;
+
+find_page:
+	ret = -ENOMEM;
+	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
+	if (!page)
+		goto out;
+
+	/*
+	 * If the page is uptodate, it is also locked. If it isn't
+	 * uptodate, we can mark it uptodate if we are filling the
+	 * full page. Otherwise we need to read it in first...
+	 */
+	if (!PageUptodate(page)) {
+		if (sd->len < PAGE_CACHE_SIZE) {
+			ret = mapping->a_ops->readpage(file, page);
+			if (unlikely(ret))
+				goto out;
+
+			lock_page(page);
+
+			if (!PageUptodate(page)) {
+				/*
+				 * page got invalidated, repeat
+				 */
+				if (!page->mapping) {
+					unlock_page(page);
+					page_cache_release(page);
+					goto find_page;
+				}
+				ret = -EIO;
+				goto out;
+			}
+		} else {
+			WARN_ON(!PageLocked(page));
+			SetPageUptodate(page);
+		}
+	}
+
+	ret = mapping->a_ops->prepare_write(file, page, 0, sd->len);
+	if (ret)
+		goto out;
+
+	dst = kmap_atomic(page, KM_USER0);
+	memcpy(dst + offset, src + buf->offset, sd->len);
+	flush_dcache_page(page);
+	kunmap_atomic(dst, KM_USER0);
+
+	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
+	if (ret < 0)
+		goto out;
+
+	set_page_dirty(page);
+	ret = write_one_page(page, 0);
+out:
+	if (ret < 0)
+		unlock_page(page);
+	page_cache_release(page);
+	buf->ops->unmap(info, buf);
+	return ret;
+}
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+			   struct splice_desc *);
+
+static ssize_t move_from_pipe(struct inode *inode, struct file *out,
+			      size_t len, unsigned int flags,
+			      splice_actor *actor)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, err;
+	struct splice_desc sd;
+
+	ret = 0;
+	do_wakeup = 0;
+
+	sd.total_len = len;
+	sd.flags = flags;
+	sd.file = out;
+	sd.pos = out->f_pos;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs = info->nrbufs;
+
+		if (bufs) {
+			int curbuf = info->curbuf;
+			struct pipe_buffer *buf = info->bufs + curbuf;
+			struct pipe_buf_operations *ops = buf->ops;
+
+			sd.len = buf->len;
+			if (sd.len > sd.total_len)
+				sd.len = sd.total_len;
+
+			err = actor(info, buf, &sd);
+			if (err) {
+				if (!ret && err != -ENODATA)
+					ret = err;
+
+				break;
+			}
+
+			ret += sd.len;
+			buf->offset += sd.len;
+			buf->len -= sd.len;
+			if (!buf->len) {
+				buf->ops = NULL;
+				ops->release(info, buf);
+				curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1);
+				info->curbuf = curbuf;
+				info->nrbufs = --bufs;
+				do_wakeup = 1;
+			}
+
+			sd.pos += sd.len;
+			sd.total_len -= sd.len;
+			if (!sd.total_len)
+				break;
+		}
+
+		if (bufs)
+			continue;
+		if (!PIPE_WRITERS(*inode))
+			break;
+		if (!PIPE_WAITING_WRITERS(*inode)) {
+			if (ret)
+				break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT);
+			do_wakeup = 0;
+		}
+
+		pipe_wait(inode);
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
+	}
+
+	mutex_lock(&out->f_mapping->host->i_mutex);
+	out->f_pos = sd.pos;
+	mutex_unlock(&out->f_mapping->host->i_mutex);
+	return ret;
+
+}
+
+ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
+				  size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_file);
+}
+
+ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+				size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_sendpage);
+}
+
+static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
+			   unsigned int flags)
+{
+	loff_t pos;
+	int ret;
+
+	if (!out->f_op || !out->f_op->splice_write)
+		return -EINVAL;
+
+	if (!(out->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	pos = out->f_pos;
+	ret = rw_verify_area(WRITE, out, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return out->f_op->splice_write(pipe, out, len, flags);
+}
+
+static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
+			 unsigned int flags)
+{
+	loff_t pos, isize, left;
+	int ret;
+
+	if (!in->f_op || !in->f_op->splice_read)
+		return -EINVAL;
+
+	if (!(in->f_mode & FMODE_READ))
+		return -EBADF;
+
+	pos = in->f_pos;
+	ret = rw_verify_area(READ, in, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	isize = i_size_read(in->f_mapping->host);
+	if (unlikely(in->f_pos >= isize))
+		return 0;
+	
+	left = isize - in->f_pos;
+	if (left < len)
+		len = left;
+
+	return in->f_op->splice_read(in, pipe, len, flags);
+}
+
+static long do_splice(struct file *in, struct file *out, size_t len,
+		      unsigned int flags)
+{
+	struct inode *pipe;
+
+	pipe = in->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_from(pipe, out, len, flags);
+
+	pipe = out->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_to(in, pipe, len, flags);
+
+	return -EINVAL;
+}
+
+asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
+{
+	long error;
+	struct file *in, *out;
+	int fput_in, fput_out;
+
+	if (unlikely(!len))
+		return 0;
+
+	error = -EBADF;
+	in = fget_light(fdin, &fput_in);
+	if (in) {
+		if (in->f_mode & FMODE_READ) {
+			out = fget_light(fdout, &fput_out);
+			if (out) {
+				if (out->f_mode & FMODE_WRITE)
+					error = do_splice(in, out, len, flags);
+				fput_light(out, fput_out);
+			}
+		}
+
+		fput_light(in, fput_in);
+	}
+
+	return error;
+}
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 014e3562895b..789e9bdd0a40 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -318,8 +318,9 @@
 #define __NR_unshare		310
 #define __NR_set_robust_list	311
 #define __NR_get_robust_list	312
+#define __NR_sys_splice		313
 
-#define NR_syscalls 313
+#define NR_syscalls 314
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 019956c613e4..36070c1014d8 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -285,12 +285,13 @@
 #define __NR_faccessat			1293
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
+#define __NR_splice			1297
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			273 /* length of syscall table */
+#define NR_syscalls			274 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 1e990747dce7..536ba0873052 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -301,8 +301,9 @@
 #define __NR_pselect6		280
 #define __NR_ppoll		281
 #define __NR_unshare		282
+#define __NR_splice		283
 
-#define __NR_syscalls		283
+#define __NR_syscalls		284
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index fcc516353087..f21ff2c1e960 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -609,8 +609,10 @@ __SYSCALL(__NR_unshare,	sys_unshare)
 __SYSCALL(__NR_set_robust_list, sys_set_robust_list)
 #define __NR_get_robust_list	274
 __SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+#define __NR_splice		275
+__SYSCALL(__NR_splice, sys_splice)
 
-#define __NR_syscall_max __NR_get_robust_list
+#define __NR_syscall_max __NR_splice
 
 #ifndef __NO_STUBS
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fe89498f4..20fa5f6d7269 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1032,6 +1032,8 @@ struct file_operations {
 	int (*check_flags)(int);
 	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
+	ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int);
+	ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int);
 };
 
 struct inode_operations {
@@ -1609,6 +1611,8 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor
 extern void do_generic_mapping_read(struct address_space *mapping,
 				    struct file_ra_state *, struct file *,
 				    loff_t *, read_descriptor_t *, read_actor_t);
+extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int);
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e487e3b60f60..e78ffc7d5b56 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -569,5 +569,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				   int flags, int mode);
 asmlinkage long sys_unshare(unsigned long unshare_flags);
+asmlinkage long sys_splice(int fdin, int fdout, size_t len,
+				unsigned int flags);
 
 #endif
diff --git a/net/socket.c b/net/socket.c
index fcd77eac0ccf..b13042f68c02 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -119,6 +119,9 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 
+extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+				size_t len, unsigned int flags);
+
 
 /*
  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -141,7 +144,8 @@ static struct file_operations socket_file_ops = {
 	.fasync =	sock_fasync,
 	.readv =	sock_readv,
 	.writev =	sock_writev,
-	.sendpage =	sock_sendpage
+	.sendpage =	sock_sendpage,
+	.splice_write = generic_splice_sendpage,
 };
 
 /*
-- 
cgit v1.2.3


From 5abc97aa25b2c41413b3a520faee83f2282d9f18 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:16:46 +0200
Subject: [PATCH] splice: add support for SPLICE_F_MOVE flag

This enables the caller to migrate pages from one address space page
cache to another.  In buzz word marketing, you can do zero-copy file
copies!

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c                 |   8 +++
 fs/splice.c               | 121 ++++++++++++++++++++++++++++++++--------------
 include/linux/pipe_fs_i.h |   8 +++
 3 files changed, 100 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 2414bf270db6..109a102c150d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -121,11 +121,19 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer
 	kunmap(buf->page);
 }
 
+static int anon_pipe_buf_steal(struct pipe_inode_info *info,
+			       struct pipe_buffer *buf)
+{
+	buf->stolen = 1;
+	return 0;
+}
+
 static struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = anon_pipe_buf_map,
 	.unmap = anon_pipe_buf_unmap,
 	.release = anon_pipe_buf_release,
+	.steal = anon_pipe_buf_steal,
 };
 
 static ssize_t
diff --git a/fs/splice.c b/fs/splice.c
index efa47c1c4e13..4a026f95884f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -21,6 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/mm_inline.h>
+#include <linux/swap.h>
 
 /*
  * Passed to the actors
@@ -32,11 +33,37 @@ struct splice_desc {
 	loff_t pos;			/* file position */
 };
 
+static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	WARN_ON(!PageLocked(page));
+	WARN_ON(!PageUptodate(page));
+
+	if (!remove_mapping(page_mapping(page), page))
+		return 1;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		BUG_ON(!PageLRU(page));
+		__ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	buf->stolen = 1;
+	return 0;
+}
+
 static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
 	buf->page = NULL;
+	buf->stolen = 0;
 }
 
 static void *page_cache_pipe_buf_map(struct file *file,
@@ -63,7 +90,8 @@ static void *page_cache_pipe_buf_map(struct file *file,
 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
 				      struct pipe_buffer *buf)
 {
-	unlock_page(buf->page);
+	if (!buf->stolen)
+		unlock_page(buf->page);
 	kunmap(buf->page);
 }
 
@@ -72,6 +100,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.map = page_cache_pipe_buf_map,
 	.unmap = page_cache_pipe_buf_unmap,
 	.release = page_cache_pipe_buf_release,
+	.steal = page_cache_pipe_buf_steal,
 };
 
 static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
@@ -336,8 +365,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	struct address_space *mapping = file->f_mapping;
 	unsigned int offset;
 	struct page *page;
-	char *src, *dst;
 	pgoff_t index;
+	char *src;
 	int ret;
 
 	/*
@@ -350,40 +379,54 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	index = sd->pos >> PAGE_CACHE_SHIFT;
 	offset = sd->pos & ~PAGE_CACHE_MASK;
 
-find_page:
-	ret = -ENOMEM;
-	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
-	if (!page)
-		goto out;
-
 	/*
-	 * If the page is uptodate, it is also locked. If it isn't
-	 * uptodate, we can mark it uptodate if we are filling the
-	 * full page. Otherwise we need to read it in first...
+	 * reuse buf page, if SPLICE_F_MOVE is set
 	 */
-	if (!PageUptodate(page)) {
-		if (sd->len < PAGE_CACHE_SIZE) {
-			ret = mapping->a_ops->readpage(file, page);
-			if (unlikely(ret))
-				goto out;
-
-			lock_page(page);
-
-			if (!PageUptodate(page)) {
-				/*
-				 * page got invalidated, repeat
-				 */
-				if (!page->mapping) {
-					unlock_page(page);
-					page_cache_release(page);
-					goto find_page;
+	if (sd->flags & SPLICE_F_MOVE) {
+		if (buf->ops->steal(info, buf))
+			goto find_page;
+
+		page = buf->page;
+		if (add_to_page_cache_lru(page, mapping, index,
+						mapping_gfp_mask(mapping)))
+			goto find_page;
+	} else {
+find_page:
+		ret = -ENOMEM;
+		page = find_or_create_page(mapping, index,
+						mapping_gfp_mask(mapping));
+		if (!page)
+			goto out;
+
+		/*
+		 * If the page is uptodate, it is also locked. If it isn't
+		 * uptodate, we can mark it uptodate if we are filling the
+		 * full page. Otherwise we need to read it in first...
+		 */
+		if (!PageUptodate(page)) {
+			if (sd->len < PAGE_CACHE_SIZE) {
+				ret = mapping->a_ops->readpage(file, page);
+				if (unlikely(ret))
+					goto out;
+
+				lock_page(page);
+
+				if (!PageUptodate(page)) {
+					/*
+					 * page got invalidated, repeat
+					 */
+					if (!page->mapping) {
+						unlock_page(page);
+						page_cache_release(page);
+						goto find_page;
+					}
+					ret = -EIO;
+					goto out;
 				}
-				ret = -EIO;
-				goto out;
+			} else {
+				WARN_ON(!PageLocked(page));
+				SetPageUptodate(page);
 			}
-		} else {
-			WARN_ON(!PageLocked(page));
-			SetPageUptodate(page);
 		}
 	}
 
@@ -391,10 +434,13 @@ find_page:
 	if (ret)
 		goto out;
 
-	dst = kmap_atomic(page, KM_USER0);
-	memcpy(dst + offset, src + buf->offset, sd->len);
-	flush_dcache_page(page);
-	kunmap_atomic(dst, KM_USER0);
+	if (!buf->stolen) {
+		char *dst = kmap_atomic(page, KM_USER0);
+
+		memcpy(dst + offset, src + buf->offset, sd->len);
+		flush_dcache_page(page);
+		kunmap_atomic(dst, KM_USER0);
+	}
 
 	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
 	if (ret < 0)
@@ -405,7 +451,8 @@ find_page:
 out:
 	if (ret < 0)
 		unlock_page(page);
-	page_cache_release(page);
+	if (!buf->stolen)
+		page_cache_release(page);
 	buf->ops->unmap(info, buf);
 	return ret;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index b12e59c75752..75c7f55023ab 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -9,6 +9,7 @@ struct pipe_buffer {
 	struct page *page;
 	unsigned int offset, len;
 	struct pipe_buf_operations *ops;
+	unsigned int stolen;
 };
 
 struct pipe_buf_operations {
@@ -16,6 +17,7 @@ struct pipe_buf_operations {
 	void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *);
 	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
+	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 struct pipe_inode_info {
@@ -53,4 +55,10 @@ void pipe_wait(struct inode * inode);
 struct inode* pipe_new(struct inode* inode);
 void free_pipe_info(struct inode* inode);
 
+/*
+ * splice is tied to pipes as a transport (at least for now), so we'll just
+ * add the splice flags here.
+ */
+#define SPLICE_F_MOVE	(0x01)	/* move pages instead of copying */
+
 #endif
-- 
cgit v1.2.3