From fd9d63678d42ffd4312815ac720a12920642eb36 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 13:44:48 +0100
Subject: NTFS: Change ntfs_map_runlist_nolock() to also take an optional
 attribute       search context.  This allows calling it with the mft record
 mapped.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   5 ++
 fs/ntfs/Makefile  |   2 +-
 fs/ntfs/attrib.c  | 232 ++++++++++++++++++++++++++++++++++++++++++++++--------
 fs/ntfs/attrib.h  |   3 +-
 4 files changed, 209 insertions(+), 33 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index de58579a1d0e..85f797a2edad 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,6 +22,11 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
+2.1.25-WIP
+
+	- Change ntfs_map_runlist_nolock() to also take an optional attribute
+	  search context.  This allows calling it with the mft record mapped.
+
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
 	- Support journals ($LogFile) which have been modified by chkdsk.  This
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 894b2b876d35..a3ce2c0e7dd9 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25-WIP\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 3f9a4ff42ee5..b194197b72f7 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -36,9 +36,27 @@
  * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
  * @ni:		ntfs inode for which to map (part of) a runlist
  * @vcn:	map runlist part containing this vcn
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
  *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_map_runlist_nolock() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
+ * will perform the necessary mapping and unmapping.
+ *
+ * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
+ * restores it before returning.  Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry.  However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_map_runlist_nolock(), you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
  * Return 0 on success and -errno on error.  There is one special error code
  * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
  * of bounds of the runlist.
@@ -46,19 +64,32 @@
  * Note the runlist can be NULL after this function returns if @vcn is zero and
  * the attribute has zero allocated size, i.e. there simply is no runlist.
  *
- * Locking: - The runlist must be locked for writing.
- *	    - This function modifies the runlist.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist will be modified.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
-int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
 {
 	VCN end_vcn;
+	unsigned long flags;
 	ntfs_inode *base_ni;
 	MFT_RECORD *m;
 	ATTR_RECORD *a;
-	ntfs_attr_search_ctx *ctx;
 	runlist_element *rl;
-	unsigned long flags;
+	struct page *put_this_page = NULL;
 	int err = 0;
+	BOOL ctx_is_temporary, ctx_needs_reset;
+	ntfs_attr_search_ctx old_ctx;
 
 	ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
 			(unsigned long long)vcn);
@@ -66,20 +97,77 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m))
-		return PTR_ERR(m);
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
+	if (!ctx) {
+		ctx_is_temporary = ctx_needs_reset = TRUE;
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+	} else {
+		VCN allocated_size_vcn;
+
+		BUG_ON(IS_ERR(ctx->mrec));
+		a = ctx->attr;
+		BUG_ON(!a->non_resident);
+		ctx_is_temporary = FALSE;
+		end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size_vcn = ni->allocated_size >>
+				ni->vol->cluster_size_bits;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
+			end_vcn = allocated_size_vcn - 1;
+		/*
+		 * If we already have the attribute extent containing @vcn in
+		 * @ctx, no need to look it up again.  We slightly cheat in
+		 * that if vcn exceeds the allocated size, we will refuse to
+		 * map the runlist below, so there is definitely no need to get
+		 * the right attribute extent.
+		 */
+		if (vcn >= allocated_size_vcn || (a->type == ni->type &&
+				a->name_length == ni->name_len &&
+				!memcmp((u8*)a + le16_to_cpu(a->name_offset),
+				ni->name, ni->name_len) &&
+				sle64_to_cpu(a->data.non_resident.lowest_vcn)
+				<= vcn && end_vcn >= vcn))
+			ctx_needs_reset = FALSE;
+		else {
+			/* Save the old search context. */
+			old_ctx = *ctx;
+			/*
+			 * If the currently mapped (extent) inode is not the
+			 * base inode we will unmap it when we reinitialize the
+			 * search context which means we need to get a
+			 * reference to the page containing the mapped mft
+			 * record so we do not accidentally drop changes to the
+			 * mft record when it has not been marked dirty yet.
+			 */
+			if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
+					old_ctx.base_ntfs_ino) {
+				put_this_page = old_ctx.ntfs_ino->page;
+				page_cache_get(put_this_page);
+			}
+			/*
+			 * Reinitialize the search context so we can lookup the
+			 * needed attribute extent.
+			 */
+			ntfs_attr_reinit_search_ctx(ctx);
+			ctx_needs_reset = TRUE;
+		}
 	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, vcn, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
+	if (ctx_needs_reset) {
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, vcn, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			goto err_out;
+		}
+		BUG_ON(!ctx->attr->non_resident);
 	}
 	a = ctx->attr;
 	/*
@@ -89,11 +177,9 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 	 * ntfs_mapping_pairs_decompress() fails.
 	 */
 	end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
-	if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) {
-		read_lock_irqsave(&ni->size_lock, flags);
-		end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
-		read_unlock_irqrestore(&ni->size_lock, flags);
-	}
+	if (!a->data.non_resident.lowest_vcn && end_vcn == 1)
+		end_vcn = sle64_to_cpu(a->data.non_resident.allocated_size) >>
+				ni->vol->cluster_size_bits;
 	if (unlikely(vcn >= end_vcn)) {
 		err = -ENOENT;
 		goto err_out;
@@ -104,9 +190,93 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 	else
 		ni->runlist.rl = rl;
 err_out:
-	if (likely(ctx))
-		ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
+	if (ctx_is_temporary) {
+		if (likely(ctx))
+			ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+	} else if (ctx_needs_reset) {
+		/*
+		 * If there is no attribute list, restoring the search context
+		 * is acomplished simply by copying the saved context back over
+		 * the caller supplied context.  If there is an attribute list,
+		 * things are more complicated as we need to deal with mapping
+		 * of mft records and resulting potential changes in pointers.
+		 */
+		if (NInoAttrList(base_ni)) {
+			/*
+			 * If the currently mapped (extent) inode is not the
+			 * one we had before, we need to unmap it and map the
+			 * old one.
+			 */
+			if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
+				/*
+				 * If the currently mapped inode is not the
+				 * base inode, unmap it.
+				 */
+				if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
+						ctx->base_ntfs_ino) {
+					unmap_extent_mft_record(ctx->ntfs_ino);
+					ctx->mrec = ctx->base_mrec;
+					BUG_ON(!ctx->mrec);
+				}
+				/*
+				 * If the old mapped inode is not the base
+				 * inode, map it.
+				 */
+				if (old_ctx.base_ntfs_ino &&
+						old_ctx.ntfs_ino !=
+						old_ctx.base_ntfs_ino) {
+retry_map:
+					ctx->mrec = map_mft_record(
+							old_ctx.ntfs_ino);
+					/*
+					 * Something bad has happened.  If out
+					 * of memory retry till it succeeds.
+					 * Any other errors are fatal and we
+					 * return the error code in ctx->mrec.
+					 * Let the caller deal with it...  We
+					 * just need to fudge things so the
+					 * caller can reinit and/or put the
+					 * search context safely.
+					 */
+					if (IS_ERR(ctx->mrec)) {
+						if (PTR_ERR(ctx->mrec) ==
+								-ENOMEM) {
+							schedule();
+							goto retry_map;
+						} else
+							old_ctx.ntfs_ino =
+								old_ctx.
+								base_ntfs_ino;
+					}
+				}
+			}
+			/* Update the changed pointers in the saved context. */
+			if (ctx->mrec != old_ctx.mrec) {
+				if (!IS_ERR(ctx->mrec))
+					old_ctx.attr = (ATTR_RECORD*)(
+							(u8*)ctx->mrec +
+							((u8*)old_ctx.attr -
+							(u8*)old_ctx.mrec));
+				old_ctx.mrec = ctx->mrec;
+			}
+		}
+		/* Restore the search context to the saved one. */
+		*ctx = old_ctx;
+		/*
+		 * We drop the reference on the page we took earlier.  In the
+		 * case that IS_ERR(ctx->mrec) is true this means we might lose
+		 * some changes to the mft record that had been made between
+		 * the last time it was marked dirty/written out and now.  This
+		 * at this stage is not a problem as the mapping error is fatal
+		 * enough that the mft record cannot be written out anyway and
+		 * the caller is very likely to shutdown the whole inode
+		 * immediately and mark the volume dirty for chkdsk to pick up
+		 * the pieces anyway.
+		 */
+		if (put_this_page)
+			page_cache_release(put_this_page);
+	}
 	return err;
 }
 
@@ -122,8 +292,8 @@ err_out:
  * of bounds of the runlist.
  *
  * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- *	    - This function takes the runlist lock for writing and modifies the
- *	      runlist.
+ *	    - This function takes the runlist lock for writing and may modify
+ *	      the runlist.
  */
 int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 {
@@ -133,7 +303,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 	/* Make sure someone else didn't do the work while we were sleeping. */
 	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
 			LCN_RL_NOT_MAPPED))
-		err = ntfs_map_runlist_nolock(ni, vcn);
+		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 	up_write(&ni->runlist.lock);
 	return err;
 }
@@ -212,7 +382,7 @@ retry_remap:
 				goto retry_remap;
 			}
 		}
-		err = ntfs_map_runlist_nolock(ni, vcn);
+		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 		if (!write_locked) {
 			up_write(&ni->runlist.lock);
 			down_read(&ni->runlist.lock);
@@ -325,7 +495,7 @@ retry_remap:
 				goto retry_remap;
 			}
 		}
-		err = ntfs_map_runlist_nolock(ni, vcn);
+		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 		if (!write_locked) {
 			up_write(&ni->runlist.lock);
 			down_read(&ni->runlist.lock);
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0618ed6fd7b3..eeca8e500971 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -60,7 +60,8 @@ typedef struct {
 	ATTR_RECORD *base_attr;
 } ntfs_attr_search_ctx;
 
-extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
+		ntfs_attr_search_ctx *ctx);
 extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
 
 extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
-- 
cgit v1.2.3


From 69b41e3c0223bd38cf23e3d8f1385963089fbf22 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 14:01:14 +0100
Subject: NTFS: Change ntfs_attr_find_vcn_nolock() to also take an optional
 attribute       search context as argument.  This allows calling it with the
 mft       record mapped.  Update all callers.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  6 ++--
 fs/ntfs/attrib.c   | 84 +++++++++++++++++++++++++++++++-----------------------
 fs/ntfs/attrib.h   |  2 +-
 fs/ntfs/lcnalloc.c |  4 +--
 fs/ntfs/mft.c      |  7 +++--
 5 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 85f797a2edad..0a361ddb3b45 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -24,8 +24,10 @@ ToDo/Notes:
 
 2.1.25-WIP
 
-	- Change ntfs_map_runlist_nolock() to also take an optional attribute
-	  search context.  This allows calling it with the mft record mapped.
+	- Change ntfs_map_runlist_nolock() and ntfs_attr_find_vcn_nolock() to
+	  also take an optional attribute search context as argument.  This
+	  allows calling these functions with the mft record mapped.  Update
+	  all callers.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index b194197b72f7..2aafc87e9601 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -406,9 +406,9 @@ retry_remap:
 
 /**
  * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
- * @ni:			ntfs inode describing the runlist to search
- * @vcn:		vcn to find
- * @write_locked:	true if the runlist is locked for writing
+ * @ni:		ntfs inode describing the runlist to search
+ * @vcn:	vcn to find
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Find the virtual cluster number @vcn in the runlist described by the ntfs
  * inode @ni and return the address of the runlist element containing the @vcn.
@@ -416,9 +416,22 @@ retry_remap:
  * If the @vcn is not mapped yet, the attempt is made to map the attribute
  * extent containing the @vcn and the vcn to lcn conversion is retried.
  *
- * If @write_locked is true the caller has locked the runlist for writing and
- * if false for reading.
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
+ * will perform the necessary mapping and unmapping.
  *
+ * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
+ * restores it before returning.  Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry.  However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_attr_find_vcn_nolock(), you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
  * Note you need to distinguish between the lcn of the returned runlist element
  * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
  * read and allocate clusters on write.
@@ -433,22 +446,31 @@ retry_remap:
  *	-ENOMEM - Not enough memory to map runlist.
  *	-EIO	- Critical error (runlist/file is corrupt, i/o error, etc).
  *
- * Locking: - The runlist must be locked on entry and is left locked on return.
- *	    - If @write_locked is FALSE, i.e. the runlist is locked for reading,
- *	      the lock may be dropped inside the function so you cannot rely on
- *	      the runlist still being the same when this function returns.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
 runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
-		const BOOL write_locked)
+		ntfs_attr_search_ctx *ctx)
 {
 	unsigned long flags;
 	runlist_element *rl;
 	int err = 0;
 	BOOL is_retry = FALSE;
 
-	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
-			ni->mft_no, (unsigned long long)vcn,
-			write_locked ? "write" : "read");
+	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
+			ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
 	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
@@ -482,33 +504,22 @@ retry_remap:
 	}
 	if (!err && !is_retry) {
 		/*
-		 * The @vcn is in an unmapped region, map the runlist and
-		 * retry.
+		 * If the search context is invalid we cannot map the unmapped
+		 * region.
 		 */
-		if (!write_locked) {
-			up_read(&ni->runlist.lock);
-			down_write(&ni->runlist.lock);
-			if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
-					LCN_RL_NOT_MAPPED)) {
-				up_write(&ni->runlist.lock);
-				down_read(&ni->runlist.lock);
+		if (IS_ERR(ctx->mrec))
+			err = PTR_ERR(ctx->mrec);
+		else {
+			/*
+			 * The @vcn is in an unmapped region, map the runlist
+			 * and retry.
+			 */
+			err = ntfs_map_runlist_nolock(ni, vcn, ctx);
+			if (likely(!err)) {
+				is_retry = TRUE;
 				goto retry_remap;
 			}
 		}
-		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
-		if (!write_locked) {
-			up_write(&ni->runlist.lock);
-			down_read(&ni->runlist.lock);
-		}
-		if (likely(!err)) {
-			is_retry = TRUE;
-			goto retry_remap;
-		}
-		/*
-		 * -EINVAL coming from a failed mapping attempt is equivalent
-		 * to i/o error for us as it should not happen in our code
-		 * paths.
-		 */
 		if (err == -EINVAL)
 			err = -EIO;
 	} else if (!err)
@@ -1181,6 +1192,7 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 	ntfs_inode *base_ni;
 
 	ntfs_debug("Entering.");
+	BUG_ON(IS_ERR(ctx->mrec));
 	if (ctx->base_ntfs_ino)
 		base_ni = ctx->base_ntfs_ino;
 	else
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index eeca8e500971..62f76258d9c3 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -68,7 +68,7 @@ extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked);
 
 extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
-		const VCN vcn, const BOOL write_locked);
+		const VCN vcn, ntfs_attr_search_ctx *ctx);
 
 int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 		const u32 name_len, const IGNORE_CASE_BOOL ic,
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 5af3bf0b7eee..8e60c47fafac 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -839,7 +839,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
 
 	total_freed = real_freed = 0;
 
-	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, TRUE);
+	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, NULL);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -893,7 +893,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
 
 			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			rl = ntfs_attr_find_vcn_nolock(ni, vcn, TRUE);
+			rl = ntfs_attr_find_vcn_nolock(ni, vcn, NULL);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b011369b5956..15df34f62038 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -49,7 +49,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 	ntfs_volume *vol = ni->vol;
 	struct inode *mft_vi = vol->mft_ino;
 	struct page *page;
-	unsigned long index, ofs, end_index;
+	unsigned long index, end_index;
+	unsigned ofs;
 
 	BUG_ON(ni->page);
 	/*
@@ -1308,7 +1309,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	ll = mftbmp_ni->allocated_size;
 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 	rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
-			(ll - 1) >> vol->cluster_size_bits, TRUE);
+			(ll - 1) >> vol->cluster_size_bits, NULL);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1738,7 +1739,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	ll = mft_ni->allocated_size;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	rl = ntfs_attr_find_vcn_nolock(mft_ni,
-			(ll - 1) >> vol->cluster_size_bits, TRUE);
+			(ll - 1) >> vol->cluster_size_bits, NULL);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mft_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
-- 
cgit v1.2.3


From 511bea5ea2b2b330e67c9e58ffb5027caebf9052 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 14:24:21 +0100
Subject: NTFS: - Change {__,}ntfs_cluster_free() to also take an optional
 attribute         search context as argument.  This allows calling it with
 the mft         record mapped.  Update all callers.       - Fix potential
 deadlock in ntfs_mft_data_extend_allocation_nolock() 	error handling by
 passing in the active search context when calling 	ntfs_cluster_free().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  | 11 +++++++----
 fs/ntfs/lcnalloc.c | 41 +++++++++++++++++++++++++++++++++++------
 fs/ntfs/lcnalloc.h | 40 +++++++++++++++++++++++++++++++++++-----
 fs/ntfs/mft.c      | 13 +++++++++----
 4 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 0a361ddb3b45..6e4f44eed6fa 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -24,10 +24,13 @@ ToDo/Notes:
 
 2.1.25-WIP
 
-	- Change ntfs_map_runlist_nolock() and ntfs_attr_find_vcn_nolock() to
-	  also take an optional attribute search context as argument.  This
-	  allows calling these functions with the mft record mapped.  Update
-	  all callers.
+	- Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and
+	  {__,}ntfs_cluster_free() to also take an optional attribute search
+	  context as argument.  This allows calling these functions with the
+	  mft record mapped.  Update all callers.
+	- Fix potential deadlock in ntfs_mft_data_extend_allocation_nolock()
+	  error handling by passing in the active search context when calling
+	  ntfs_cluster_free().
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 8e60c47fafac..75313f4307e3 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -782,6 +782,7 @@ out:
  * @ni:		ntfs inode whose runlist describes the clusters to free
  * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
+ * @ctx:	active attribute search context if present or NULL if not
  * @is_rollback:	true if this is a rollback operation
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
@@ -791,15 +792,39 @@ out:
  * deallocated.  Thus, to completely free all clusters in a runlist, use
  * @start_vcn = 0 and @count = -1.
  *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when __ntfs_cluster_free() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
+ * perform the necessary mapping and unmapping.
+ *
+ * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning.  Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry.  However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
+ * you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
  * @is_rollback should always be FALSE, it is for internal use to rollback
  * errors.  You probably want to use ntfs_cluster_free() instead.
  *
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
+ * remove from the runlist or mark sparse the freed runs later.
  *
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
  * Locking: - The runlist described by @ni must be locked for writing on entry
  *	      and is locked on return.  Note the runlist may be modified when
  *	      needed runlist fragments need to be mapped.
@@ -807,9 +832,13 @@ out:
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
 s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
-		const BOOL is_rollback)
+		ntfs_attr_search_ctx *ctx, const BOOL is_rollback)
 {
 	s64 delta, to_free, total_freed, real_freed;
 	ntfs_volume *vol;
@@ -839,7 +868,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
 
 	total_freed = real_freed = 0;
 
-	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, NULL);
+	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -893,7 +922,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
 
 			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			rl = ntfs_attr_find_vcn_nolock(ni, vcn, NULL);
+			rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
@@ -961,7 +990,7 @@ err_out:
 	 * If rollback fails, set the volume errors flag, emit an error
 	 * message, and return the error code.
 	 */
-	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, TRUE);
+	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, TRUE);
 	if (delta < 0) {
 		ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
 				"inconsistent metadata!  Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index a6a8827882e7..aa0518509cd3 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -27,6 +27,7 @@
 
 #include <linux/fs.h>
 
+#include "attrib.h"
 #include "types.h"
 #include "inode.h"
 #include "runlist.h"
@@ -44,13 +45,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
 		const NTFS_CLUSTER_ALLOCATION_ZONES zone);
 
 extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
-		s64 count, const BOOL is_rollback);
+		s64 count, ntfs_attr_search_ctx *ctx, const BOOL is_rollback);
 
 /**
  * ntfs_cluster_free - free clusters on an ntfs volume
  * @ni:		ntfs inode whose runlist describes the clusters to free
  * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
  * described by the ntfs inode @ni.
@@ -59,12 +61,36 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
  * deallocated.  Thus, to completely free all clusters in a runlist, use
  * @start_vcn = 0 and @count = -1.
  *
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_cluster_free() encounters unmapped runlist
+ * fragments and allows their mapping.  If you do not have the mft record
+ * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform
+ * the necessary mapping and unmapping.
+ *
+ * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning.  Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry.  However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(),
+ * you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
+ * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove
+ * from the runlist or mark sparse the freed runs later.
  *
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
  * Locking: - The runlist described by @ni must be locked for writing on entry
  *	      and is locked on return.  Note the runlist may be modified when
  *	      needed runlist fragments need to be mapped.
@@ -72,11 +98,15 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
 static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
-		s64 count)
+		s64 count, ntfs_attr_search_ctx *ctx)
 {
-	return __ntfs_cluster_free(ni, start_vcn, count, FALSE);
+	return __ntfs_cluster_free(ni, start_vcn, count, ctx, FALSE);
 }
 
 extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 15df34f62038..5577fc6e190f 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1952,20 +1952,21 @@ restore_undo_alloc:
 		NVolSetErrors(vol);
 		return ret;
 	}
-	a = ctx->attr;
-	a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
+	ctx->attr->data.non_resident.highest_vcn =
+			cpu_to_sle64(old_last_vcn - 1);
 undo_alloc:
-	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1) < 0) {
+	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
 		ntfs_error(vol->sb, "Failed to free clusters from mft data "
 				"attribute.%s", es);
 		NVolSetErrors(vol);
 	}
+	a = ctx->attr;
 	if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
 		ntfs_error(vol->sb, "Failed to truncate mft data attribute "
 				"runlist.%s", es);
 		NVolSetErrors(vol);
 	}
-	if (mp_rebuilt) {
+	if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
 		if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
 				a->data.non_resident.mapping_pairs_offset),
 				old_alen - le16_to_cpu(
@@ -1982,6 +1983,10 @@ undo_alloc:
 		}
 		flush_dcache_mft_record_page(ctx->ntfs_ino);
 		mark_mft_record_dirty(ctx->ntfs_ino);
+	} else if (IS_ERR(ctx->mrec)) {
+		ntfs_error(vol->sb, "Failed to restore attribute search "
+				"context.%s", es);
+		NVolSetErrors(vol);
 	}
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
-- 
cgit v1.2.3


From fc0fa7dc7d243afabdb3fb6a11d59a944a9c91f8 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 14:36:56 +0100
Subject: NTFS: - Change ntfs_cluster_alloc() to take an extra boolean
 parameter         specifying whether the cluster are being allocated to
 extend an         attribute or to fill a hole.       - Change
 ntfs_attr_make_non_resident() to call ntfs_cluster_alloc()         with
 @is_extension set to TRUE and remove the runlist terminator         fixup
 code as this is now done by ntfs_cluster_alloc().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  6 ++++++
 fs/ntfs/attrib.c   | 10 +---------
 fs/ntfs/lcnalloc.c | 15 ++++++++++++---
 fs/ntfs/lcnalloc.h |  3 ++-
 fs/ntfs/mft.c      |  6 ++++--
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 6e4f44eed6fa..aad2a3f2d1f8 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -31,6 +31,12 @@ ToDo/Notes:
 	- Fix potential deadlock in ntfs_mft_data_extend_allocation_nolock()
 	  error handling by passing in the active search context when calling
 	  ntfs_cluster_free().
+	- Change ntfs_cluster_alloc() to take an extra boolean parameter
+	  specifying whether the cluster are being allocated to extend an
+	  attribute or to fill a hole.
+	- Change ntfs_attr_make_non_resident() to call ntfs_cluster_alloc()
+	  with @is_extension set to TRUE and remove the runlist terminator
+	  fixup code as this is now done by ntfs_cluster_alloc().
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 2aafc87e9601..33e689f82a55 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1566,8 +1566,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	new_size = (i_size_read(vi) + vol->cluster_size - 1) &
 			~(vol->cluster_size - 1);
 	if (new_size > 0) {
-		runlist_element *rl2;
-
 		/*
 		 * Will need the page later and since the page lock nests
 		 * outside all ntfs locks, we need to get the page now.
@@ -1578,7 +1576,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 			return -ENOMEM;
 		/* Start by allocating clusters to hold the attribute value. */
 		rl = ntfs_cluster_alloc(vol, 0, new_size >>
-				vol->cluster_size_bits, -1, DATA_ZONE);
+				vol->cluster_size_bits, -1, DATA_ZONE, TRUE);
 		if (IS_ERR(rl)) {
 			err = PTR_ERR(rl);
 			ntfs_debug("Failed to allocate cluster%s, error code "
@@ -1587,12 +1585,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 					err);
 			goto page_err_out;
 		}
-		/* Change the runlist terminator to LCN_ENOENT. */
-		rl2 = rl;
-		while (rl2->length)
-			rl2++;
-		BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
-		rl2->lcn = LCN_ENOENT;
 	} else {
 		rl = NULL;
 		page = NULL;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 75313f4307e3..29cabf93d2d2 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -76,6 +76,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  * @count:	number of clusters to allocate
  * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
  * @zone:	zone from which to allocate the clusters
+ * @is_extension:	if TRUE, this is an attribute extension
  *
  * Allocate @count clusters preferably starting at cluster @start_lcn or at the
  * current allocator position if @start_lcn is -1, on the mounted ntfs volume
@@ -86,6 +87,13 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  * @start_vcn specifies the vcn of the first allocated cluster.  This makes
  * merging the resulting runlist with the old runlist easier.
  *
+ * If @is_extension is TRUE, the caller is allocating clusters to extend an
+ * attribute and if it is FALSE, the caller is allocating clusters to fill a
+ * hole in an attribute.  Practically the difference is that if @is_extension
+ * is TRUE the returned runlist will be terminated with LCN_ENOENT and if
+ * @is_extension is FALSE the runlist will be terminated with
+ * LCN_RL_NOT_MAPPED.
+ *
  * You need to check the return value with IS_ERR().  If this is false, the
  * function was successful and the return value is a runlist describing the
  * allocated cluster(s).  If IS_ERR() is true, the function failed and
@@ -137,7 +145,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  */
 runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 		const s64 count, const LCN start_lcn,
-		const NTFS_CLUSTER_ALLOCATION_ZONES zone)
+		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+		const BOOL is_extension)
 {
 	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
 	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
@@ -310,7 +319,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 				continue;
 			}
 			bit = 1 << (lcn & 7);
-			ntfs_debug("bit %i.", bit);
+			ntfs_debug("bit 0x%x.", bit);
 			/* If the bit is already set, go onto the next one. */
 			if (*byte & bit) {
 				lcn++;
@@ -729,7 +738,7 @@ out:
 	/* Add runlist terminator element. */
 	if (likely(rl)) {
 		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
-		rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+		rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
 		rl[rlpos].length = 0;
 	}
 	if (likely(page && !IS_ERR(page))) {
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index aa0518509cd3..72cbca7003b2 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -42,7 +42,8 @@ typedef enum {
 
 extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
 		const VCN start_vcn, const s64 count, const LCN start_lcn,
-		const NTFS_CLUSTER_ALLOCATION_ZONES zone);
+		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+		const BOOL is_extension);
 
 extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
 		s64 count, ntfs_attr_search_ctx *ctx, const BOOL is_rollback);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 5577fc6e190f..0c65cbb8c5cf 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1355,7 +1355,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 		up_write(&vol->lcnbmp_lock);
 		ntfs_unmap_page(page);
 		/* Allocate a cluster from the DATA_ZONE. */
-		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
+		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
+				TRUE);
 		if (IS_ERR(rl2)) {
 			up_write(&mftbmp_ni->runlist.lock);
 			ntfs_error(vol->sb, "Failed to allocate a cluster for "
@@ -1780,7 +1781,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 			nr > min_nr ? "default" : "minimal", (long long)nr);
 	old_last_vcn = rl[1].vcn;
 	do {
-		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
+		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
+				TRUE);
 		if (likely(!IS_ERR(rl2)))
 			break;
 		if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
-- 
cgit v1.2.3


From 8925d4f0d3479b9c5ed7e49acc648beccca95f21 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 14:48:20 +0100
Subject: NTFS: Change ntfs_attr_make_non_resident to take the attribute value
 size       as an extra parameter.  This is needed since we need to know the
 size       before we can map the mft record and our callers always know it. 
 The       reason we cannot simply read the size from the vfs inode i_size is 
      that this is not necessarily uptodate.  This happens when      
 ntfs_attr_make_non_resident() is called in the ->truncate call path.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  6 ++++++
 fs/ntfs/attrib.c  | 13 ++++++++++---
 fs/ntfs/attrib.h  |  2 +-
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index aad2a3f2d1f8..60ba3c5cb2ea 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -37,6 +37,12 @@ ToDo/Notes:
 	- Change ntfs_attr_make_non_resident() to call ntfs_cluster_alloc()
 	  with @is_extension set to TRUE and remove the runlist terminator
 	  fixup code as this is now done by ntfs_cluster_alloc().
+	- Change ntfs_attr_make_non_resident to take the attribute value size
+	  as an extra parameter.  This is needed since we need to know the size
+	  before we can map the mft record and our callers always know it.  The
+	  reason we cannot simply read the size from the vfs inode i_size is
+	  that this is not necessarily uptodate.  This happens when
+	  ntfs_attr_make_non_resident() is called in the ->truncate call path.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 33e689f82a55..380f70a5f2e1 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1501,10 +1501,17 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
 /**
  * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
  * @ni:		ntfs inode describing the attribute to convert
+ * @data_size:	size of the resident data to copy to the non-resident attribute
  *
  * Convert the resident ntfs attribute described by the ntfs inode @ni to a
  * non-resident one.
  *
+ * @data_size must be equal to the attribute value size.  This is needed since
+ * we need to know the size before we can map the mft record and our callers
+ * always know it.  The reason we cannot simply read the size from the vfs
+ * inode i_size is that this is not necessarily uptodate.  This happens when
+ * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
+ *
  * Return 0 on success and -errno on error.  The following error return codes
  * are defined:
  *	-EPERM	- The attribute is not allowed to be non-resident.
@@ -1525,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
  *
  * Locking: - The caller must hold i_sem on the inode.
  */
-int ntfs_attr_make_non_resident(ntfs_inode *ni)
+int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 {
 	s64 new_size;
 	struct inode *vi = VFS_I(ni);
@@ -1563,7 +1570,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	 * The size needs to be aligned to a cluster boundary for allocation
 	 * purposes.
 	 */
-	new_size = (i_size_read(vi) + vol->cluster_size - 1) &
+	new_size = (data_size + vol->cluster_size - 1) &
 			~(vol->cluster_size - 1);
 	if (new_size > 0) {
 		/*
@@ -1647,7 +1654,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	 * attribute value.
 	 */
 	attr_size = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(attr_size != i_size_read(vi));
+	BUG_ON(attr_size != data_size);
 	if (page && !PageUptodate(page)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memcpy(kaddr, (u8*)a +
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 62f76258d9c3..a959af9cef12 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -103,7 +103,7 @@ extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
 extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
 		const u32 new_size);
 
-extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
+extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
 
 extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
 		const u8 val);
-- 
cgit v1.2.3


From 2a6fc4e1b0f7d2ec3711d5b1782fb30f78cca765 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 14:57:15 +0100
Subject: NTFS: Fix ntfs_attr_make_non_resident() to update the vfs inode
 i_blocks       which is zero for a resident attribute but should no longer be
 zero       once the attribute is non-resident as it then has real clusters   
    allocated.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 ++++
 fs/ntfs/attrib.c  | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 60ba3c5cb2ea..045beda82942 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -43,6 +43,10 @@ ToDo/Notes:
 	  reason we cannot simply read the size from the vfs inode i_size is
 	  that this is not necessarily uptodate.  This happens when
 	  ntfs_attr_make_non_resident() is called in the ->truncate call path.
+	- Fix ntfs_attr_make_non_resident() to update the vfs inode i_blocks
+	  which is zero for a resident attribute but should no longer be zero
+	  once the attribute is non-resident as it then has real clusters
+	  allocated.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 380f70a5f2e1..8821e2d088b7 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1719,7 +1719,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 				ffs(ni->itype.compressed.block_size) - 1;
 		ni->itype.compressed.block_clusters = 1U <<
 				a->data.non_resident.compression_unit;
-	}
+		vi->i_blocks = ni->itype.compressed.size >> 9;
+	} else
+		vi->i_blocks = ni->allocated_size >> 9;
 	write_unlock_irqrestore(&ni->size_lock, flags);
 	/*
 	 * This needs to be last since the address space operations ->readpage
-- 
cgit v1.2.3


From 2d86829b846d1447a6ab5af4060fc9f301521317 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 15:18:56 +0100
Subject: NTFS: Add fs/ntfs/attrib.[hc]::ntfs_attr_extend_allocation(), a
 function to       extend the allocation of an attributes.  Optionally, the
 data size,       but not the initialized size can be extended, too.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   3 +
 fs/ntfs/attrib.c  | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ntfs/attrib.h  |   3 +
 3 files changed, 640 insertions(+)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 045beda82942..6c5bdfbb7bbf 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -47,6 +47,9 @@ ToDo/Notes:
 	  which is zero for a resident attribute but should no longer be zero
 	  once the attribute is non-resident as it then has real clusters
 	  allocated.
+	- Add fs/ntfs/attrib.[hc]::ntfs_attr_extend_allocation(), a function to
+	  extend the allocation of an attributes.  Optionally, the data size,
+	  but not the initialized size can be extended, too.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 8821e2d088b7..bc25e88ad468 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1834,6 +1834,640 @@ page_err_out:
 	return err;
 }
 
+/**
+ * ntfs_attr_extend_allocation - extend the allocated space of an attribute
+ * @ni:			ntfs inode of the attribute whose allocation to extend
+ * @new_alloc_size:	new size in bytes to which to extend the allocation to
+ * @new_data_size:	new size in bytes to which to extend the data to
+ * @data_start:		beginning of region which is required to be non-sparse
+ *
+ * Extend the allocated space of an attribute described by the ntfs inode @ni
+ * to @new_alloc_size bytes.  If @data_start is -1, the whole extension may be
+ * implemented as a hole in the file (as long as both the volume and the ntfs
+ * inode @ni have sparse support enabled).  If @data_start is >= 0, then the
+ * region between the old allocated size and @data_start - 1 may be made sparse
+ * but the regions between @data_start and @new_alloc_size must be backed by
+ * actual clusters.
+ *
+ * If @new_data_size is -1, it is ignored.  If it is >= 0, then the data size
+ * of the attribute is extended to @new_data_size.  Note that the i_size of the
+ * vfs inode is not updated.  Only the data size in the base attribute record
+ * is updated.  The caller has to update i_size separately if this is required.
+ * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
+ * size as well as for @new_data_size to be greater than @new_alloc_size.
+ *
+ * For resident attributes this involves resizing the attribute record and if
+ * necessary moving it and/or other attributes into extent mft records and/or
+ * converting the attribute to a non-resident attribute which in turn involves
+ * extending the allocation of a non-resident attribute as described below.
+ *
+ * For non-resident attributes this involves allocating clusters in the data
+ * zone on the volume (except for regions that are being made sparse) and
+ * extending the run list to describe the allocated clusters as well as
+ * updating the mapping pairs array of the attribute.  This in turn involves
+ * resizing the attribute record and if necessary moving it and/or other
+ * attributes into extent mft records and/or splitting the attribute record
+ * into multiple extent attribute records.
+ *
+ * Also, the attribute list attribute is updated if present and in some of the
+ * above cases (the ones where extent mft records/attributes come into play),
+ * an attribute list attribute is created if not already present.
+ *
+ * Return the new allocated size on success and -errno on error.  In the case
+ * that an error is encountered but a partial extension at least up to
+ * @data_start (if present) is possible, the allocation is partially extended
+ * and this is returned.  This means the caller must check the returned size to
+ * determine if the extension was partial.  If @data_start is -1 then partial
+ * allocations are not performed.
+ *
+ * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
+ *
+ * Locking: This function takes the runlist lock of @ni for writing as well as
+ * locking the mft record of the base ntfs inode.  These locks are maintained
+ * throughout execution of the function.  These locks are required so that the
+ * attribute can be resized safely and so that it can for example be converted
+ * from resident to non-resident safely.
+ *
+ * TODO: At present attribute list attribute handling is not implemented.
+ *
+ * TODO: At present it is not safe to call this function for anything other
+ * than the $DATA attribute(s) of an uncompressed and unencrypted file.
+ */
+s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+		const s64 new_data_size, const s64 data_start)
+{
+	VCN vcn;
+	s64 ll, allocated_size, start = data_start;
+	struct inode *vi = VFS_I(ni);
+	ntfs_volume *vol = ni->vol;
+	ntfs_inode *base_ni;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	ntfs_attr_search_ctx *ctx;
+	runlist_element *rl, *rl2;
+	unsigned long flags;
+	int err, mp_size;
+	u32 attr_len = 0; /* Silence stupid gcc warning. */
+	BOOL mp_rebuilt;
+
+#ifdef NTFS_DEBUG
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"old_allocated_size 0x%llx, "
+			"new_allocated_size 0x%llx, new_data_size 0x%llx, "
+			"data_start 0x%llx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)allocated_size,
+			(unsigned long long)new_alloc_size,
+			(unsigned long long)new_data_size,
+			(unsigned long long)start);
+#endif
+retry_extend:
+	/*
+	 * For non-resident attributes, @start and @new_size need to be aligned
+	 * to cluster boundaries for allocation purposes.
+	 */
+	if (NInoNonResident(ni)) {
+		if (start > 0)
+			start &= ~(s64)vol->cluster_size_mask;
+		new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
+				~(s64)vol->cluster_size_mask;
+	}
+	BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
+	/* Check if new size is allowed in $AttrDef. */
+	err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
+	if (unlikely(err)) {
+		/* Only emit errors when the write will fail completely. */
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (start < 0 || start >= allocated_size) {
+			if (err == -ERANGE) {
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because the new "
+						"allocation would exceed the "
+						"maximum allowed size for "
+						"this attribute type.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+			} else {
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because this "
+						"attribute type is not "
+						"defined on the NTFS volume.  "
+						"Possible corruption!  You "
+						"should run chkdsk!",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+			}
+		}
+		/* Translate error code to be POSIX conformant for write(2). */
+		if (err == -ERANGE)
+			err = -EFBIG;
+		else
+			err = -EIO;
+		return err;
+	}
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/*
+	 * We will be modifying both the runlist (if non-resident) and the mft
+	 * record so lock them both down.
+	 */
+	down_write(&ni->runlist.lock);
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * If non-resident, seek to the last extent.  If resident, there is
+	 * only one extent, so seek to that.
+	 */
+	vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
+			0;
+	/*
+	 * Abort if someone did the work whilst we waited for the locks.  If we
+	 * just converted the attribute from resident to non-resident it is
+	 * likely that exactly this has happened already.  We cannot quite
+	 * abort if we need to update the data size.
+	 */
+	if (unlikely(new_alloc_size <= allocated_size)) {
+		ntfs_debug("Allocated size already exceeds requested size.");
+		new_alloc_size = allocated_size;
+		if (new_data_size < 0)
+			goto done;
+		/*
+		 * We want the first attribute extent so that we can update the
+		 * data size.
+		 */
+		vcn = 0;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, vcn, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	/* Use goto to reduce indentation. */
+	if (a->non_resident)
+		goto do_non_resident_extend;
+	BUG_ON(NInoNonResident(ni));
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	/*
+	 * Extend the attribute record to be able to store the new attribute
+	 * size.  ntfs_attr_record_resize() will not do anything if the size is
+	 * not changing.
+	 */
+	if (new_alloc_size < vol->mft_record_size &&
+			!ntfs_attr_record_resize(m, a,
+			le16_to_cpu(a->data.resident.value_offset) +
+			new_alloc_size)) {
+		/* The resize succeeded! */
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->allocated_size = le32_to_cpu(a->length) -
+				le16_to_cpu(a->data.resident.value_offset);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		if (new_data_size >= 0) {
+			BUG_ON(new_data_size < attr_len);
+			a->data.resident.value_length =
+					cpu_to_le32((u32)new_data_size);
+		}
+		goto flush_done;
+	}
+	/*
+	 * We have to drop all the locks so we can call
+	 * ntfs_attr_make_non_resident().  This could be optimised by try-
+	 * locking the first page cache page and only if that fails dropping
+	 * the locks, locking the page, and redoing all the locking and
+	 * lookups.  While this would be a huge optimisation, it is not worth
+	 * it as this is definitely a slow code path.
+	 */
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	/*
+	 * Not enough space in the mft record, try to make the attribute
+	 * non-resident and if successful restart the extension process.
+	 */
+	err = ntfs_attr_make_non_resident(ni, attr_len);
+	if (likely(!err))
+		goto retry_extend;
+	/*
+	 * Could not make non-resident.  If this is due to this not being
+	 * permitted for this attribute type or there not being enough space,
+	 * try to make other attributes non-resident.  Otherwise fail.
+	 */
+	if (unlikely(err != -EPERM && err != -ENOSPC)) {
+		/* Only emit errors when the write will fail completely. */
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the conversion from resident "
+					"to non-resident attribute failed "
+					"with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		goto conv_err_out;
+	}
+	/* TODO: Not implemented from here, abort. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (start < 0 || start >= allocated_size) {
+		if (err == -ENOSPC)
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record/on disk for the non-resident "
+					"attribute value.  This case is not "
+					"implemented yet.");
+		else /* if (err == -EPERM) */
+			ntfs_error(vol->sb, "This attribute type may not be "
+					"non-resident.  This case is not "
+					"implemented yet.");
+	}
+	err = -EOPNOTSUPP;
+	goto conv_err_out;
+#if 0
+	// TODO: Attempt to make other attributes non-resident.
+	if (!err)
+		goto do_resident_extend;
+	/*
+	 * Both the attribute list attribute and the standard information
+	 * attribute must remain in the base inode.  Thus, if this is one of
+	 * these attributes, we have to try to move other attributes out into
+	 * extent mft records instead.
+	 */
+	if (ni->type == AT_ATTRIBUTE_LIST ||
+			ni->type == AT_STANDARD_INFORMATION) {
+		// TODO: Attempt to move other attributes into extent mft
+		// records.
+		err = -EOPNOTSUPP;
+		if (!err)
+			goto do_resident_extend;
+		goto err_out;
+	}
+	// TODO: Attempt to move this attribute to an extent mft record, but
+	// only if it is not already the only attribute in an mft record in
+	// which case there would be nothing to gain.
+	err = -EOPNOTSUPP;
+	if (!err)
+		goto do_resident_extend;
+	/* There is nothing we can do to make enough space. )-: */
+	goto err_out;
+#endif
+do_non_resident_extend:
+	BUG_ON(!NInoNonResident(ni));
+	if (new_alloc_size == allocated_size) {
+		BUG_ON(vcn);
+		goto alloc_done;
+	}
+	/*
+	 * If the data starts after the end of the old allocation, this is a
+	 * $DATA attribute and sparse attributes are enabled on the volume and
+	 * for this inode, then create a sparse region between the old
+	 * allocated size and the start of the data.  Otherwise simply proceed
+	 * with filling the whole space between the old allocated size and the
+	 * new allocated size with clusters.
+	 */
+	if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
+			!NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
+		goto skip_sparse;
+	// TODO: This is not implemented yet.  We just fill in with real
+	// clusters for now...
+	ntfs_debug("Inserting holes is not-implemented yet.  Falling back to "
+			"allocating real clusters instead.");
+skip_sparse:
+	rl = ni->runlist.rl;
+	if (likely(rl)) {
+		/* Seek to the end of the runlist. */
+		while (rl->length)
+			rl++;
+	}
+	/* If this attribute extent is not mapped, map it now. */
+	if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
+			(rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
+			(rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
+		if (!rl && !allocated_size)
+			goto first_alloc;
+		rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
+		if (IS_ERR(rl)) {
+			err = PTR_ERR(rl);
+			if (start < 0 || start >= allocated_size)
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because the "
+						"mapping of a runlist "
+						"fragment failed with error "
+						"code %i.", vi->i_ino,
+						(unsigned)le32_to_cpu(ni->type),
+						err);
+			if (err != -ENOMEM)
+				err = -EIO;
+			goto err_out;
+		}
+		ni->runlist.rl = rl;
+		/* Seek to the end of the runlist. */
+		while (rl->length)
+			rl++;
+	}
+	/*
+	 * We now know the runlist of the last extent is mapped and @rl is at
+	 * the end of the runlist.  We want to begin allocating clusters
+	 * starting at the last allocated cluster to reduce fragmentation.  If
+	 * there are no valid LCNs in the attribute we let the cluster
+	 * allocator choose the starting cluster.
+	 */
+	/* If the last LCN is a hole or simillar seek back to last real LCN. */
+	while (rl->lcn < 0 && rl > ni->runlist.rl)
+		rl--;
+first_alloc:
+	// FIXME: Need to implement partial allocations so at least part of the
+	// write can be performed when start >= 0.  (Needed for POSIX write(2)
+	// conformance.)
+	rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
+			(new_alloc_size - allocated_size) >>
+			vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
+			rl->lcn + rl->length : -1, DATA_ZONE, TRUE);
+	if (IS_ERR(rl2)) {
+		err = PTR_ERR(rl2);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the allocation of clusters "
+					"failed with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM && err != -ENOSPC)
+			err = -EIO;
+		goto err_out;
+	}
+	rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+	if (IS_ERR(rl)) {
+		err = PTR_ERR(rl);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the runlist merge failed "
+					"with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		if (ntfs_cluster_free_from_rl(vol, rl2)) {
+			ntfs_error(vol->sb, "Failed to release allocated "
+					"cluster(s) in error code path.  Run "
+					"chkdsk to recover the lost "
+					"cluster(s).");
+			NVolSetErrors(vol);
+		}
+		ntfs_free(rl2);
+		goto err_out;
+	}
+	ni->runlist.rl = rl;
+	ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
+			allocated_size) >> vol->cluster_size_bits);
+	/* Find the runlist element with which the attribute extent starts. */
+	ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+	rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
+	BUG_ON(!rl2);
+	BUG_ON(!rl2->length);
+	BUG_ON(rl2->lcn < LCN_HOLE);
+	mp_rebuilt = FALSE;
+	/* Get the size for the new mapping pairs array for this extent. */
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
+	if (unlikely(mp_size <= 0)) {
+		err = mp_size;
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because determining the size for the "
+					"mapping pairs failed with error code "
+					"%i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		err = -EIO;
+		goto undo_alloc;
+	}
+	/* Extend the attribute record to fit the bigger mapping pairs array. */
+	attr_len = le32_to_cpu(a->length);
+	err = ntfs_attr_record_resize(m, a, mp_size +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+	if (unlikely(err)) {
+		BUG_ON(err != -ENOSPC);
+		// TODO: Deal with this by moving this extent to a new mft
+		// record or by starting a new extent in a new mft record,
+		// possibly by extending this extent partially and filling it
+		// and creating a new extent for the remainder, or by making
+		// other attributes non-resident and/or by moving other
+		// attributes out of this mft record.
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record for the extended attribute "
+					"record.  This case is not "
+					"implemented yet.");
+		err = -EOPNOTSUPP;
+		goto undo_alloc;
+	}
+	mp_rebuilt = TRUE;
+	/* Generate the mapping pairs array directly into the attr record. */
+	err = ntfs_mapping_pairs_build(vol, (u8*)a +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+			mp_size, rl2, ll, -1, NULL);
+	if (unlikely(err)) {
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because building the mapping pairs "
+					"failed with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		err = -EIO;
+		goto undo_alloc;
+	}
+	/* Update the highest_vcn. */
+	a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+			vol->cluster_size_bits) - 1);
+	/*
+	 * We now have extended the allocated size of the attribute.  Reflect
+	 * this in the ntfs_inode structure and the attribute record.
+	 */
+	if (a->data.non_resident.lowest_vcn) {
+		/*
+		 * We are not in the first attribute extent, switch to it, but
+		 * first ensure the changes will make it to disk later.
+		 */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		ntfs_attr_reinit_search_ctx(ctx);
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, 0, NULL, 0, ctx);
+		if (unlikely(err))
+			goto restore_undo_alloc;
+		/* @m is not used any more so no need to set it. */
+		a = ctx->attr;
+	}
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_alloc_size;
+	a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+	/*
+	 * FIXME: This would fail if @ni is a directory, $MFT, or an index,
+	 * since those can have sparse/compressed set.  For example can be
+	 * set compressed even though it is not compressed itself and in that
+	 * case the bit means that files are to be created compressed in the
+	 * directory...  At present this is ok as this code is only called for
+	 * regular files, and only for their $DATA attribute(s).
+	 * FIXME: The calculation is wrong if we created a hole above.  For now
+	 * it does not matter as we never create holes.
+	 */
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		ni->itype.compressed.size += new_alloc_size - allocated_size;
+		a->data.non_resident.compressed_size =
+				cpu_to_sle64(ni->itype.compressed.size);
+		vi->i_blocks = ni->itype.compressed.size >> 9;
+	} else
+		vi->i_blocks = new_alloc_size >> 9;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+alloc_done:
+	if (new_data_size >= 0) {
+		BUG_ON(new_data_size <
+				sle64_to_cpu(a->data.non_resident.data_size));
+		a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
+	}
+flush_done:
+	/* Ensure the changes make it to disk. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+done:
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	ntfs_debug("Done, new_allocated_size 0x%llx.",
+			(unsigned long long)new_alloc_size);
+	return new_alloc_size;
+restore_undo_alloc:
+	if (start < 0 || start >= allocated_size)
+		ntfs_error(vol->sb, "Cannot complete extension of allocation "
+				"of inode 0x%lx, attribute type 0x%x, because "
+				"lookup of first attribute extent failed with "
+				"error code %i.", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), err);
+	if (err == -ENOENT)
+		err = -EIO;
+	ntfs_attr_reinit_search_ctx(ctx);
+	if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
+			allocated_size >> vol->cluster_size_bits, NULL, 0,
+			ctx)) {
+		ntfs_error(vol->sb, "Failed to find last attribute extent of "
+				"attribute in error code path.  Run chkdsk to "
+				"recover.");
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->allocated_size = new_alloc_size;
+		/*
+		 * FIXME: This would fail if @ni is a directory...  See above.
+		 * FIXME: The calculation is wrong if we created a hole above.
+		 * For now it does not matter as we never create holes.
+		 */
+		if (NInoSparse(ni) || NInoCompressed(ni)) {
+			ni->itype.compressed.size += new_alloc_size -
+					allocated_size;
+			vi->i_blocks = ni->itype.compressed.size >> 9;
+		} else
+			vi->i_blocks = new_alloc_size >> 9;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+		up_write(&ni->runlist.lock);
+		/*
+		 * The only thing that is now wrong is the allocated size of the
+		 * base attribute extent which chkdsk should be able to fix.
+		 */
+		NVolSetErrors(vol);
+		return err;
+	}
+	ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
+			(allocated_size >> vol->cluster_size_bits) - 1);
+undo_alloc:
+	ll = allocated_size >> vol->cluster_size_bits;
+	if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
+		ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
+				"in error code path.  Run chkdsk to recover "
+				"the lost cluster(s).");
+		NVolSetErrors(vol);
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	/*
+	 * If the runlist truncation fails and/or the search context is no
+	 * longer valid, we cannot resize the attribute record or build the
+	 * mapping pairs array thus we mark the inode bad so that no access to
+	 * the freed clusters can happen.
+	 */
+	if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
+		ntfs_error(vol->sb, "Failed to %s in error code path.  Run "
+				"chkdsk to recover.", IS_ERR(m) ?
+				"restore attribute search context" :
+				"truncate attribute runlist");
+		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
+	} else if (mp_rebuilt) {
+		if (ntfs_attr_record_resize(m, a, attr_len)) {
+			ntfs_error(vol->sb, "Failed to restore attribute "
+					"record in error code path.  Run "
+					"chkdsk to recover.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+					a->data.non_resident.
+					mapping_pairs_offset), attr_len -
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), rl2, ll, -1,
+					NULL)) {
+				ntfs_error(vol->sb, "Failed to restore "
+						"mapping pairs array in error "
+						"code path.  Run chkdsk to "
+						"recover.");
+				make_bad_inode(vi);
+				make_bad_inode(VFS_I(base_ni));
+				NVolSetErrors(vol);
+			}
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+		}
+	}
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+conv_err_out:
+	ntfs_debug("Failed.  Returning error code %i.", err);
+	return err;
+}
+
 /**
  * ntfs_attr_set - fill (a part of) an attribute with a byte
  * @ni:		ntfs inode describing the attribute to fill
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index a959af9cef12..9074886b44ba 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -105,6 +105,9 @@ extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
 
 extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
 
+extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+		const s64 new_data_size, const s64 data_start);
+
 extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
 		const u8 val);
 
-- 
cgit v1.2.3


From dd072330d1a60be11a5c284fa1e645350750a4fc Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 15:39:02 +0100
Subject: NTFS: Implement fs/ntfs/inode.[hc]::ntfs_truncate().  It only
 supports       uncompressed and unencrypted files.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  20 +--
 fs/ntfs/inode.c   | 491 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 468 insertions(+), 43 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 6c5bdfbb7bbf..70ad4be7a7fe 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -3,16 +3,14 @@ ToDo/Notes:
 	- In between ntfs_prepare/commit_write, need exclusion between
 	  simultaneous file extensions.  This is given to us by holding i_sem
 	  on the inode.  The only places in the kernel when a file is resized
-	  are prepare/commit write and truncate for both of which i_sem is
-	  held.  Just have to be careful in readpage/writepage and all other
-	  helpers not running under i_sem that we play nice...
-	  Also need to be careful with initialized_size extention in
-	  ntfs_prepare_write. Basically, just be _very_ careful in this code...
-	  UPDATE: The only things that need to be checked are read/writepage
-	  which do not hold i_sem.  Note writepage cannot change i_size but it
-	  needs to cope with a concurrent i_size change, just like readpage.
-	  Also both need to cope with concurrent changes to the other sizes,
-	  i.e. initialized/allocated/compressed size, as well.
+	  are prepare/commit write and ntfs_truncate() for both of which i_sem
+	  is held.  Just have to be careful in read-/writepage and other helpers
+	  not running under i_sem that we play nice...  Also need to be careful
+	  with initialized_size extention in ntfs_prepare_write and writepage.
+	  UPDATE: The only things that need to be checked are
+	  prepare/commit_write as well as the compressed write and the other
+	  attribute resize/write cases like index attributes, etc.  For now
+	  none of these are implemented so are safe.
 	- Implement mft.c::sync_mft_mirror_umount().  We currently will just
 	  leave the volume dirty on umount if the final iput(vol->mft_ino)
 	  causes a write of any mirrored mft records due to the mft mirror
@@ -50,6 +48,8 @@ ToDo/Notes:
 	- Add fs/ntfs/attrib.[hc]::ntfs_attr_extend_allocation(), a function to
 	  extend the allocation of an attributes.  Optionally, the data size,
 	  but not the initialized size can be extended, too.
+	- Implement fs/ntfs/inode.[hc]::ntfs_truncate().  It only supports
+	  uncompressed and unencrypted files.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 7ec045131808..a1682342baa6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -30,6 +30,7 @@
 #include "debug.h"
 #include "inode.h"
 #include "attrib.h"
+#include "lcnalloc.h"
 #include "malloc.h"
 #include "mft.h"
 #include "time.h"
@@ -2291,11 +2292,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
 
 #ifdef NTFS_RW
 
+static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
+		"chkdsk.";
+
 /**
  * ntfs_truncate - called when the i_size of an ntfs inode is changed
  * @vi:		inode for which the i_size was changed
  *
- * We do not support i_size changes yet.
+ * We only support i_size changes for normal files at present, i.e. not
+ * compressed and not encrypted.  This is enforced in ntfs_setattr(), see
+ * below.
  *
  * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
  * that the change is allowed.
@@ -2306,80 +2312,499 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
  * Returns 0 on success or -errno on error.
  *
  * Called with ->i_sem held.  In all but one case ->i_alloc_sem is held for
- * writing.  The only case where ->i_alloc_sem is not held is
+ * writing.  The only case in the kernel where ->i_alloc_sem is not held is
  * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset which means that it is a noop as far
- * as ntfs_truncate() is concerned.
+ * with the current i_size as the offset.  The analogous place in NTFS is in
+ * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
+ * without holding ->i_alloc_sem.
  */
 int ntfs_truncate(struct inode *vi)
 {
-	ntfs_inode *ni = NTFS_I(vi);
+	s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
+	VCN highest_vcn;
+	unsigned long flags;
+	ntfs_inode *base_ni, *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *m;
 	ATTR_RECORD *a;
 	const char *te = "  Leaving file length out of sync with i_size.";
-	int err;
+	int err, mp_size, size_change, alloc_change;
+	u32 attr_len;
 
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 	BUG_ON(NInoAttr(ni));
+	BUG_ON(S_ISDIR(vi->i_mode));
+	BUG_ON(NInoMstProtected(ni));
 	BUG_ON(ni->nr_extents < 0);
-	m = map_mft_record(ni);
+retry_truncate:
+	/*
+	 * Lock the runlist for writing and map the mft record to ensure it is
+	 * safe to mess with the attribute runlist and sizes.
+	 */
+	down_write(&ni->runlist.lock);
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	m = map_mft_record(base_ni);
 	if (IS_ERR(m)) {
 		err = PTR_ERR(m);
 		ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
 				"(error code %d).%s", vi->i_ino, err, te);
 		ctx = NULL;
 		m = NULL;
-		goto err_out;
+		goto old_bad_out;
 	}
-	ctx = ntfs_attr_get_search_ctx(ni, m);
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
 	if (unlikely(!ctx)) {
 		ntfs_error(vi->i_sb, "Failed to allocate a search context for "
 				"inode 0x%lx (not enough memory).%s",
 				vi->i_ino, te);
 		err = -ENOMEM;
-		goto err_out;
+		goto old_bad_out;
 	}
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, 0, NULL, 0, ctx);
 	if (unlikely(err)) {
-		if (err == -ENOENT)
+		if (err == -ENOENT) {
 			ntfs_error(vi->i_sb, "Open attribute is missing from "
 					"mft record.  Inode 0x%lx is corrupt.  "
-					"Run chkdsk.", vi->i_ino);
-		else
+					"Run chkdsk.%s", vi->i_ino, te);
+			err = -EIO;
+		} else
 			ntfs_error(vi->i_sb, "Failed to lookup attribute in "
-					"inode 0x%lx (error code %d).",
-					vi->i_ino, err);
-		goto err_out;
+					"inode 0x%lx (error code %d).%s",
+					vi->i_ino, err, te);
+		goto old_bad_out;
 	}
+	m = ctx->mrec;
 	a = ctx->attr;
-	/* If the size has not changed there is nothing to do. */
-	if (ntfs_attr_size(a) == i_size_read(vi))
-		goto done;
-	// TODO: Implement the truncate...
-	ntfs_error(vi->i_sb, "Inode size has changed but this is not "
-			"implemented yet.  Resetting inode size to old value. "
-			" This is most likely a bug in the ntfs driver!");
-	i_size_write(vi, ntfs_attr_size(a)); 
-done:
+	/*
+	 * The i_size of the vfs inode is the new size for the attribute value.
+	 */
+	new_size = i_size_read(vi);
+	/* The current size of the attribute value is the old size. */
+	old_size = ntfs_attr_size(a);
+	/* Calculate the new allocated size. */
+	if (NInoNonResident(ni))
+		new_alloc_size = (new_size + vol->cluster_size - 1) &
+				~(s64)vol->cluster_size_mask;
+	else
+		new_alloc_size = (new_size + 7) & ~7;
+	/* The current allocated size is the old allocated size. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	old_alloc_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * The change in the file size.  This will be 0 if no change, >0 if the
+	 * size is growing, and <0 if the size is shrinking.
+	 */
+	size_change = -1;
+	if (new_size - old_size >= 0) {
+		size_change = 1;
+		if (new_size == old_size)
+			size_change = 0;
+	}
+	/* As above for the allocated size. */
+	alloc_change = -1;
+	if (new_alloc_size - old_alloc_size >= 0) {
+		alloc_change = 1;
+		if (new_alloc_size == old_alloc_size)
+			alloc_change = 0;
+	}
+	/*
+	 * If neither the size nor the allocation are being changed there is
+	 * nothing to do.
+	 */
+	if (!size_change && !alloc_change)
+		goto unm_done;
+	/* If the size is changing, check if new size is allowed in $AttrDef. */
+	if (size_change) {
+		err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
+		if (unlikely(err)) {
+			if (err == -ERANGE) {
+				ntfs_error(vol->sb, "Truncate would cause the "
+						"inode 0x%lx to %simum size "
+						"for its attribute type "
+						"(0x%x).  Aborting truncate.",
+						vi->i_ino,
+						new_size > old_size ? "exceed "
+						"the max" : "go under the min",
+						le32_to_cpu(ni->type));
+				err = -EFBIG;
+			} else {
+				ntfs_error(vol->sb, "Inode 0x%lx has unknown "
+						"attribute type 0x%x.  "
+						"Aborting truncate.",
+						vi->i_ino,
+						le32_to_cpu(ni->type));
+				err = -EIO;
+			}
+			/* Reset the vfs inode size to the old size. */
+			i_size_write(vi, old_size);
+			goto err_out;
+		}
+	}
+	if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+		ntfs_warning(vi->i_sb, "Changes in inode size are not "
+				"supported yet for %s files, ignoring.",
+				NInoCompressed(ni) ? "compressed" :
+				"encrypted");
+		err = -EOPNOTSUPP;
+		goto bad_out;
+	}
+	if (a->non_resident)
+		goto do_non_resident_truncate;
+	BUG_ON(NInoNonResident(ni));
+	/* Resize the attribute record to best fit the new attribute size. */
+	if (new_size < vol->mft_record_size &&
+			!ntfs_resident_attr_value_resize(m, a, new_size)) {
+		unsigned long flags;
+
+		/* The resize succeeded! */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		write_lock_irqsave(&ni->size_lock, flags);
+		/* Update the sizes in the ntfs inode and all is done. */
+		ni->allocated_size = le32_to_cpu(a->length) -
+				le16_to_cpu(a->data.resident.value_offset);
+		/*
+		 * Note ntfs_resident_attr_value_resize() has already done any
+		 * necessary data clearing in the attribute record.  When the
+		 * file is being shrunk vmtruncate() will already have cleared
+		 * the top part of the last partial page, i.e. since this is
+		 * the resident case this is the page with index 0.  However,
+		 * when the file is being expanded, the page cache page data
+		 * between the old data_size, i.e. old_size, and the new_size
+		 * has not been zeroed.  Fortunately, we do not need to zero it
+		 * either since on one hand it will either already be zero due
+		 * to both readpage and writepage clearing partial page data
+		 * beyond i_size in which case there is nothing to do or in the
+		 * case of the file being mmap()ped at the same time, POSIX
+		 * specifies that the behaviour is unspecified thus we do not
+		 * have to do anything.  This means that in our implementation
+		 * in the rare case that the file is mmap()ped and a write
+		 * occured into the mmap()ped region just beyond the file size
+		 * and writepage has not yet been called to write out the page
+		 * (which would clear the area beyond the file size) and we now
+		 * extend the file size to incorporate this dirty region
+		 * outside the file size, a write of the page would result in
+		 * this data being written to disk instead of being cleared.
+		 * Given both POSIX and the Linux mmap(2) man page specify that
+		 * this corner case is undefined, we choose to leave it like
+		 * that as this is much simpler for us as we cannot lock the
+		 * relevant page now since we are holding too many ntfs locks
+		 * which would result in a lock reversal deadlock.
+		 */
+		ni->initialized_size = new_size;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		goto unm_done;
+	}
+	/* If the above resize failed, this must be an attribute extension. */
+	BUG_ON(size_change < 0);
+	/*
+	 * We have to drop all the locks so we can call
+	 * ntfs_attr_make_non_resident().  This could be optimised by try-
+	 * locking the first page cache page and only if that fails dropping
+	 * the locks, locking the page, and redoing all the locking and
+	 * lookups.  While this would be a huge optimisation, it is not worth
+	 * it as this is definitely a slow code path as it only ever can happen
+	 * once for any given file.
+	 */
 	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(ni);
-	NInoClearTruncateFailed(ni);
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err != -ENOMEM) {
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	/*
+	 * Not enough space in the mft record, try to make the attribute
+	 * non-resident and if successful restart the truncation process.
+	 */
+	err = ntfs_attr_make_non_resident(ni, old_size);
+	if (likely(!err))
+		goto retry_truncate;
+	/*
+	 * Could not make non-resident.  If this is due to this not being
+	 * permitted for this attribute type or there not being enough space,
+	 * try to make other attributes non-resident.  Otherwise fail.
+	 */
+	if (unlikely(err != -EPERM && err != -ENOSPC)) {
+		ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
+				"type 0x%x, because the conversion from "
+				"resident to non-resident attribute failed "
+				"with error code %i.", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		goto conv_err_out;
+	}
+	/* TODO: Not implemented from here, abort. */
+	if (err == -ENOSPC)
+		ntfs_error(vol->sb, "Not enough space in the mft record/on "
+				"disk for the non-resident attribute value.  "
+				"This case is not implemented yet.");
+	else /* if (err == -EPERM) */
+		ntfs_error(vol->sb, "This attribute type may not be "
+				"non-resident.  This case is not implemented "
+				"yet.");
+	err = -EOPNOTSUPP;
+	goto conv_err_out;
+#if 0
+	// TODO: Attempt to make other attributes non-resident.
+	if (!err)
+		goto do_resident_extend;
+	/*
+	 * Both the attribute list attribute and the standard information
+	 * attribute must remain in the base inode.  Thus, if this is one of
+	 * these attributes, we have to try to move other attributes out into
+	 * extent mft records instead.
+	 */
+	if (ni->type == AT_ATTRIBUTE_LIST ||
+			ni->type == AT_STANDARD_INFORMATION) {
+		// TODO: Attempt to move other attributes into extent mft
+		// records.
+		err = -EOPNOTSUPP;
+		if (!err)
+			goto do_resident_extend;
+		goto err_out;
+	}
+	// TODO: Attempt to move this attribute to an extent mft record, but
+	// only if it is not already the only attribute in an mft record in
+	// which case there would be nothing to gain.
+	err = -EOPNOTSUPP;
+	if (!err)
+		goto do_resident_extend;
+	/* There is nothing we can do to make enough space. )-: */
+	goto err_out;
+#endif
+do_non_resident_truncate:
+	BUG_ON(!NInoNonResident(ni));
+	if (alloc_change < 0) {
+		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		if (highest_vcn > 0 &&
+				old_alloc_size >> vol->cluster_size_bits >
+				highest_vcn + 1) {
+			/*
+			 * This attribute has multiple extents.  Not yet
+			 * supported.
+			 */
+			ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
+					"attribute type 0x%x, because the "
+					"attribute is highly fragmented (it "
+					"consists of multiple extents) and "
+					"this case is not implemented yet.",
+					vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type));
+			err = -EOPNOTSUPP;
+			goto bad_out;
+		}
+	}
+	/*
+	 * If the size is shrinking, need to reduce the initialized_size and
+	 * the data_size before reducing the allocation.
+	 */
+	if (size_change < 0) {
+		/*
+		 * Make the valid size smaller (i_size is already up-to-date).
+		 */
+		write_lock_irqsave(&ni->size_lock, flags);
+		if (new_size < ni->initialized_size) {
+			ni->initialized_size = new_size;
+			a->data.non_resident.initialized_size =
+					cpu_to_sle64(new_size);
+		}
+		a->data.non_resident.data_size = cpu_to_sle64(new_size);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		/* If the allocated size is not changing, we are done. */
+		if (!alloc_change)
+			goto unm_done;
+		/*
+		 * If the size is shrinking it makes no sense for the
+		 * allocation to be growing.
+		 */
+		BUG_ON(alloc_change > 0);
+	} else /* if (size_change >= 0) */ {
+		/*
+		 * The file size is growing or staying the same but the
+		 * allocation can be shrinking, growing or staying the same.
+		 */
+		if (alloc_change > 0) {
+			/*
+			 * We need to extend the allocation and possibly update
+			 * the data size.  If we are updating the data size,
+			 * since we are not touching the initialized_size we do
+			 * not need to worry about the actual data on disk.
+			 * And as far as the page cache is concerned, there
+			 * will be no pages beyond the old data size and any
+			 * partial region in the last page between the old and
+			 * new data size (or the end of the page if the new
+			 * data size is outside the page) does not need to be
+			 * modified as explained above for the resident
+			 * attribute truncate case.  To do this, we simply drop
+			 * the locks we hold and leave all the work to our
+			 * friendly helper ntfs_attr_extend_allocation().
+			 */
+			ntfs_attr_put_search_ctx(ctx);
+			unmap_mft_record(base_ni);
+			up_write(&ni->runlist.lock);
+			err = ntfs_attr_extend_allocation(ni, new_size,
+					size_change > 0 ? new_size : -1, -1);
+			/*
+			 * ntfs_attr_extend_allocation() will have done error
+			 * output already.
+			 */
+			goto done;
+		}
+		if (!alloc_change)
+			goto alloc_done;
+	}
+	/* alloc_change < 0 */
+	/* Free the clusters. */
+	nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
+			vol->cluster_size_bits, -1, ctx);
+	m = ctx->mrec;
+	a = ctx->attr;
+	if (unlikely(nr_freed < 0)) {
+		ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
+				"%lli).  Unmount and run chkdsk to recover "
+				"the lost cluster(s).", (long long)nr_freed);
 		NVolSetErrors(vol);
+		nr_freed = 0;
+	}
+	/* Truncate the runlist. */
+	err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
+			new_alloc_size >> vol->cluster_size_bits);
+	/*
+	 * If the runlist truncation failed and/or the search context is no
+	 * longer valid, we cannot resize the attribute record or build the
+	 * mapping pairs array thus we mark the inode bad so that no access to
+	 * the freed clusters can happen.
+	 */
+	if (unlikely(err || IS_ERR(m))) {
+		ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
+				IS_ERR(m) ?
+				"restore attribute search context" :
+				"truncate attribute runlist",
+				IS_ERR(m) ? PTR_ERR(m) : err, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/* Get the size for the shrunk mapping pairs array for the runlist. */
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
+	if (unlikely(mp_size <= 0)) {
+		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+				"attribute type 0x%x, because determining the "
+				"size for the mapping pairs failed with error "
+				"code %i.%s", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), mp_size, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/*
+	 * Shrink the attribute record for the new mapping pairs array.  Note,
+	 * this cannot fail since we are making the attribute smaller thus by
+	 * definition there is enough space to do so.
+	 */
+	attr_len = le32_to_cpu(a->length);
+	err = ntfs_attr_record_resize(m, a, mp_size +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+	BUG_ON(err);
+	/*
+	 * Generate the mapping pairs array directly into the attribute record.
+	 */
+	err = ntfs_mapping_pairs_build(vol, (u8*)a +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+			mp_size, ni->runlist.rl, 0, -1, NULL);
+	if (unlikely(err)) {
+		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+				"attribute type 0x%x, because building the "
+				"mapping pairs failed with error code %i.%s",
+				vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+				err, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/* Update the allocated/compressed size as well as the highest vcn. */
+	a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+			vol->cluster_size_bits) - 1);
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_alloc_size;
+	a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		if (nr_freed) {
+			ni->itype.compressed.size -= nr_freed <<
+					vol->cluster_size_bits;
+			BUG_ON(ni->itype.compressed.size < 0);
+			a->data.non_resident.compressed_size = cpu_to_sle64(
+					ni->itype.compressed.size);
+			vi->i_blocks = ni->itype.compressed.size >> 9;
+		}
+	} else
+		vi->i_blocks = new_alloc_size >> 9;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * We have shrunk the allocation.  If this is a shrinking truncate we
+	 * have already dealt with the initialized_size and the data_size above
+	 * and we are done.  If the truncate is only changing the allocation
+	 * and not the data_size, we are also done.  If this is an extending
+	 * truncate, need to extend the data_size now which is ensured by the
+	 * fact that @size_change is positive.
+	 */
+alloc_done:
+	/*
+	 * If the size is growing, need to update it now.  If it is shrinking,
+	 * we have already updated it above (before the allocation change).
+	 */
+	if (size_change > 0)
+		a->data.non_resident.data_size = cpu_to_sle64(new_size);
+	/* Ensure the modified mft record is written out. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+unm_done:
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+done:
+	/* Update the mtime and ctime on the base inode. */
+	inode_update_time(VFS_I(base_ni), 1);
+	if (likely(!err)) {
+		NInoClearTruncateFailed(ni);
+		ntfs_debug("Done.");
+	}
+	return err;
+old_bad_out:
+	old_size = -1;
+bad_out:
+	if (err != -ENOMEM && err != -EOPNOTSUPP) {
 		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
 	}
+	if (err != -EOPNOTSUPP)
+		NInoSetTruncateFailed(ni);
+	else if (old_size >= 0)
+		i_size_write(vi, old_size);
+err_out:
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
 	if (m)
-		unmap_mft_record(ni);
-	NInoSetTruncateFailed(ni);
+		unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+out:
+	ntfs_debug("Failed.  Returning error code %i.", err);
 	return err;
+conv_err_out:
+	if (err != -ENOMEM && err != -EOPNOTSUPP) {
+		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
+	}
+	if (err != -EOPNOTSUPP)
+		NInoSetTruncateFailed(ni);
+	else
+		i_size_write(vi, old_size);
+	goto out;
 }
 
 /**
-- 
cgit v1.2.3


From e9438250b635f7832e99a8c8d2e394dd1522ce65 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 4 Oct 2005 16:01:06 +0100
Subject: NTFS: Enable ATTR_SIZE attribute changes in ntfs_setattr().  This
 completes       the initial implementation of file truncation.  Now both
 open(2)ing       a file with the O_TRUNC flag and the {,f}truncate(2) system
 calls       will resize a file appropriately.  The limitations are that only 
      uncompressed and unencrypted files are supported.  Also, there is      
 only very limited support for highly fragmented files (the ones whose      
 $DATA attribute is split into multiple attribute extents).

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  7 +++++++
 fs/ntfs/inode.c   | 23 +++++++++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 70ad4be7a7fe..9f4674a026f2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -50,6 +50,13 @@ ToDo/Notes:
 	  but not the initialized size can be extended, too.
 	- Implement fs/ntfs/inode.[hc]::ntfs_truncate().  It only supports
 	  uncompressed and unencrypted files.
+	- Enable ATTR_SIZE attribute changes in ntfs_setattr().  This completes
+	  the initial implementation of file truncation.  Now both open(2)ing
+	  a file with the O_TRUNC flag and the {,f}truncate(2) system calls
+	  will resize a file appropriately.  The limitations are that only
+	  uncompressed and unencrypted files are supported.  Also, there is
+	  only very limited support for highly fragmented files (the ones whose
+	  $DATA attribute is split into multiple attribute extents).
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index a1682342baa6..b24f4c4b2c5c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2845,8 +2845,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	err = inode_change_ok(vi, attr);
 	if (err)
-		return err;
-
+		goto out;
 	/* We do not support NTFS ACLs yet. */
 	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
 		ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
@@ -2854,14 +2853,22 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-
 	if (ia_valid & ATTR_SIZE) {
 		if (attr->ia_size != i_size_read(vi)) {
-			ntfs_warning(vi->i_sb, "Changes in inode size are not "
-					"supported yet, ignoring.");
-			err = -EOPNOTSUPP;
-			// TODO: Implement...
-			// err = vmtruncate(vi, attr->ia_size);
+			ntfs_inode *ni = NTFS_I(vi);
+			/*
+			 * FIXME: For now we do not support resizing of
+			 * compressed or encrypted files yet.
+			 */
+			if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+				ntfs_warning(vi->i_sb, "Changes in inode size "
+						"are not supported yet for "
+						"%s files, ignoring.",
+						NInoCompressed(ni) ?
+						"compressed" : "encrypted");
+				err = -EOPNOTSUPP;
+			} else
+				err = vmtruncate(vi, attr->ia_size);
 			if (err || ia_valid == ATTR_SIZE)
 				goto out;
 		} else {
-- 
cgit v1.2.3


From 29b8990513b077dc388b0756acd31465e5c21441 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 11 Oct 2005 14:54:42 +0100
Subject: NTFS: In attrib.c::ntfs_attr_set() call
 balance_dirty_pages_ratelimited()       and cond_resched() in the main loop
 as we could be dirtying a lot of       pages and this ensures we play nice
 with the VM and the system as a       whole.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 ++++
 fs/ntfs/attrib.c  | 4 ++++
 fs/ntfs/malloc.h  | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9f4674a026f2..3b8ff2318085 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -57,6 +57,10 @@ ToDo/Notes:
 	  uncompressed and unencrypted files are supported.  Also, there is
 	  only very limited support for highly fragmented files (the ones whose
 	  $DATA attribute is split into multiple attribute extents).
+	- In attrib.c::ntfs_attr_set() call balance_dirty_pages_ratelimited()
+	  and cond_resched() in the main loop as we could be dirtying a lot of
+	  pages and this ensures we play nice with the VM and the system as a
+	  whole.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index bc25e88ad468..338e47144fc9 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -21,7 +21,9 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/sched.h>
 #include <linux/swap.h>
+#include <linux/writeback.h>
 
 #include "attrib.h"
 #include "debug.h"
@@ -2590,6 +2592,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		/* Finally unlock and release the page. */
 		unlock_page(page);
 		page_cache_release(page);
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
 	}
 	/* If there is a last partial page, need to do it the slow way. */
 	if (end_ofs) {
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 590887b943f5..e38e402e4103 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -39,8 +39,7 @@
  * If there was insufficient memory to complete the request, return NULL.
  * Depending on @gfp_mask the allocation may be guaranteed to succeed.
  */
-static inline void *__ntfs_malloc(unsigned long size,
-		gfp_t gfp_mask)
+static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 {
 	if (likely(size <= PAGE_SIZE)) {
 		BUG_ON(!size);
-- 
cgit v1.2.3


From 29f5f3c141c58b0a4c0765c77da612271875bcce Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 11 Oct 2005 14:59:40 +0100
Subject: NTFS: Remove address space operations ->prepare_write and
 ->commit_write in       preparation for the big rewrite of write(2) support
 in ntfs.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/aops.c | 827 ---------------------------------------------------------
 1 file changed, 827 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 5e80c07c6a4d..8f23c60030c0 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1542,830 +1542,6 @@ err_out:
 	return err;
 }
 
-/**
- * ntfs_prepare_nonresident_write -
- *
- */
-static int ntfs_prepare_nonresident_write(struct page *page,
-		unsigned from, unsigned to)
-{
-	VCN vcn;
-	LCN lcn;
-	s64 initialized_size;
-	loff_t i_size;
-	sector_t block, ablock, iblock;
-	struct inode *vi;
-	ntfs_inode *ni;
-	ntfs_volume *vol;
-	runlist_element *rl;
-	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
-	unsigned long flags;
-	unsigned int vcn_ofs, block_start, block_end, blocksize;
-	int err;
-	BOOL is_retry;
-	unsigned char blocksize_bits;
-
-	vi = page->mapping->host;
-	ni = NTFS_I(vi);
-	vol = ni->vol;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
-			page->index, from, to);
-
-	BUG_ON(!NInoNonResident(ni));
-
-	blocksize_bits = vi->i_blkbits;
-	blocksize = 1 << blocksize_bits;
-
-	/*
-	 * create_empty_buffers() will create uptodate/dirty buffers if the
-	 * page is uptodate/dirty.
-	 */
-	if (!page_has_buffers(page))
-		create_empty_buffers(page, blocksize, 0);
-	bh = head = page_buffers(page);
-	if (unlikely(!bh))
-		return -ENOMEM;
-
-	/* The first block in the page. */
-	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
-
-	read_lock_irqsave(&ni->size_lock, flags);
-	/*
-	 * The first out of bounds block for the allocated size.  No need to
-	 * round up as allocated_size is in multiples of cluster size and the
-	 * minimum cluster size is 512 bytes, which is equal to the smallest
-	 * blocksize.
-	 */
-	ablock = ni->allocated_size >> blocksize_bits;
-	i_size = i_size_read(vi);
-	initialized_size = ni->initialized_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-
-	/* The last (fully or partially) initialized block. */
-	iblock = initialized_size >> blocksize_bits;
-
-	/* Loop through all the buffers in the page. */
-	block_start = 0;
-	rl = NULL;
-	err = 0;
-	do {
-		block_end = block_start + blocksize;
-		/*
-		 * If buffer @bh is outside the write, just mark it uptodate
-		 * if the page is uptodate and continue with the next buffer.
-		 */
-		if (block_end <= from || block_start >= to) {
-			if (PageUptodate(page)) {
-				if (!buffer_uptodate(bh))
-					set_buffer_uptodate(bh);
-			}
-			continue;
-		}
-		/*
-		 * @bh is at least partially being written to.
-		 * Make sure it is not marked as new.
-		 */
-		//if (buffer_new(bh))
-		//	clear_buffer_new(bh);
-
-		if (block >= ablock) {
-			// TODO: block is above allocated_size, need to
-			// allocate it. Best done in one go to accommodate not
-			// only block but all above blocks up to and including:
-			// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
-			// - 1) >> blobksize_bits. Obviously will need to round
-			// up to next cluster boundary, too. This should be
-			// done with a helper function, so it can be reused.
-			ntfs_error(vol->sb, "Writing beyond allocated size "
-					"is not supported yet. Sorry.");
-			err = -EOPNOTSUPP;
-			goto err_out;
-			// Need to update ablock.
-			// Need to set_buffer_new() on all block bhs that are
-			// newly allocated.
-		}
-		/*
-		 * Now we have enough allocated size to fulfill the whole
-		 * request, i.e. block < ablock is true.
-		 */
-		if (unlikely((block >= iblock) &&
-				(initialized_size < i_size))) {
-			/*
-			 * If this page is fully outside initialized size, zero
-			 * out all pages between the current initialized size
-			 * and the current page. Just use ntfs_readpage() to do
-			 * the zeroing transparently.
-			 */
-			if (block > iblock) {
-				// TODO:
-				// For each page do:
-				// - read_cache_page()
-				// Again for each page do:
-				// - wait_on_page_locked()
-				// - Check (PageUptodate(page) &&
-				//			!PageError(page))
-				// Update initialized size in the attribute and
-				// in the inode.
-				// Again, for each page do:
-				//	__set_page_dirty_buffers();
-				// page_cache_release()
-				// We don't need to wait on the writes.
-				// Update iblock.
-			}
-			/*
-			 * The current page straddles initialized size. Zero
-			 * all non-uptodate buffers and set them uptodate (and
-			 * dirty?). Note, there aren't any non-uptodate buffers
-			 * if the page is uptodate.
-			 * FIXME: For an uptodate page, the buffers may need to
-			 * be written out because they were not initialized on
-			 * disk before.
-			 */
-			if (!PageUptodate(page)) {
-				// TODO:
-				// Zero any non-uptodate buffers up to i_size.
-				// Set them uptodate and dirty.
-			}
-			// TODO:
-			// Update initialized size in the attribute and in the
-			// inode (up to i_size).
-			// Update iblock.
-			// FIXME: This is inefficient. Try to batch the two
-			// size changes to happen in one go.
-			ntfs_error(vol->sb, "Writing beyond initialized size "
-					"is not supported yet. Sorry.");
-			err = -EOPNOTSUPP;
-			goto err_out;
-			// Do NOT set_buffer_new() BUT DO clear buffer range
-			// outside write request range.
-			// set_buffer_uptodate() on complete buffers as well as
-			// set_buffer_dirty().
-		}
-
-		/* Need to map unmapped buffers. */
-		if (!buffer_mapped(bh)) {
-			/* Unmapped buffer. Need to map it. */
-			bh->b_bdev = vol->sb->s_bdev;
-
-			/* Convert block into corresponding vcn and offset. */
-			vcn = (VCN)block << blocksize_bits >>
-					vol->cluster_size_bits;
-			vcn_ofs = ((VCN)block << blocksize_bits) &
-					vol->cluster_size_mask;
-
-			is_retry = FALSE;
-			if (!rl) {
-lock_retry_remap:
-				down_read(&ni->runlist.lock);
-				rl = ni->runlist.rl;
-			}
-			if (likely(rl != NULL)) {
-				/* Seek to element containing target vcn. */
-				while (rl->length && rl[1].vcn <= vcn)
-					rl++;
-				lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
-			} else
-				lcn = LCN_RL_NOT_MAPPED;
-			if (unlikely(lcn < 0)) {
-				/*
-				 * We extended the attribute allocation above.
-				 * If we hit an ENOENT here it means that the
-				 * allocation was insufficient which is a bug.
-				 */
-				BUG_ON(lcn == LCN_ENOENT);
-
-				/* It is a hole, need to instantiate it. */
-				if (lcn == LCN_HOLE) {
-					// TODO: Instantiate the hole.
-					// clear_buffer_new(bh);
-					// unmap_underlying_metadata(bh->b_bdev,
-					//		bh->b_blocknr);
-					// For non-uptodate buffers, need to
-					// zero out the region outside the
-					// request in this bh or all bhs,
-					// depending on what we implemented
-					// above.
-					// Need to flush_dcache_page().
-					// Or could use set_buffer_new()
-					// instead?
-					ntfs_error(vol->sb, "Writing into "
-							"sparse regions is "
-							"not supported yet. "
-							"Sorry.");
-					err = -EOPNOTSUPP;
-					if (!rl)
-						up_read(&ni->runlist.lock);
-					goto err_out;
-				} else if (!is_retry &&
-						lcn == LCN_RL_NOT_MAPPED) {
-					is_retry = TRUE;
-					/*
-					 * Attempt to map runlist, dropping
-					 * lock for the duration.
-					 */
-					up_read(&ni->runlist.lock);
-					err = ntfs_map_runlist(ni, vcn);
-					if (likely(!err))
-						goto lock_retry_remap;
-					rl = NULL;
-				} else if (!rl)
-					up_read(&ni->runlist.lock);
-				/*
-				 * Failed to map the buffer, even after
-				 * retrying.
-				 */
-				if (!err)
-					err = -EIO;
-				bh->b_blocknr = -1;
-				ntfs_error(vol->sb, "Failed to write to inode "
-						"0x%lx, attribute type 0x%x, "
-						"vcn 0x%llx, offset 0x%x "
-						"because its location on disk "
-						"could not be determined%s "
-						"(error code %i).",
-						ni->mft_no, ni->type,
-						(unsigned long long)vcn,
-						vcn_ofs, is_retry ? " even "
-						"after retrying" : "", err);
-				goto err_out;
-			}
-			/* We now have a successful remap, i.e. lcn >= 0. */
-
-			/* Setup buffer head to correct block. */
-			bh->b_blocknr = ((lcn << vol->cluster_size_bits)
-					+ vcn_ofs) >> blocksize_bits;
-			set_buffer_mapped(bh);
-
-			// FIXME: Something analogous to this is needed for
-			// each newly allocated block, i.e. BH_New.
-			// FIXME: Might need to take this out of the
-			// if (!buffer_mapped(bh)) {}, depending on how we
-			// implement things during the allocated_size and
-			// initialized_size extension code above.
-			if (buffer_new(bh)) {
-				clear_buffer_new(bh);
-				unmap_underlying_metadata(bh->b_bdev,
-						bh->b_blocknr);
-				if (PageUptodate(page)) {
-					set_buffer_uptodate(bh);
-					continue;
-				}
-				/*
-				 * Page is _not_ uptodate, zero surrounding
-				 * region. NOTE: This is how we decide if to
-				 * zero or not!
-				 */
-				if (block_end > to || block_start < from) {
-					void *kaddr;
-
-					kaddr = kmap_atomic(page, KM_USER0);
-					if (block_end > to)
-						memset(kaddr + to, 0,
-								block_end - to);
-					if (block_start < from)
-						memset(kaddr + block_start, 0,
-								from -
-								block_start);
-					flush_dcache_page(page);
-					kunmap_atomic(kaddr, KM_USER0);
-				}
-				continue;
-			}
-		}
-		/* @bh is mapped, set it uptodate if the page is uptodate. */
-		if (PageUptodate(page)) {
-			if (!buffer_uptodate(bh))
-				set_buffer_uptodate(bh);
-			continue;
-		}
-		/*
-		 * The page is not uptodate. The buffer is mapped. If it is not
-		 * uptodate, and it is only partially being written to, we need
-		 * to read the buffer in before the write, i.e. right now.
-		 */
-		if (!buffer_uptodate(bh) &&
-				(block_start < from || block_end > to)) {
-			ll_rw_block(READ, 1, &bh);
-			*wait_bh++ = bh;
-		}
-	} while (block++, block_start = block_end,
-			(bh = bh->b_this_page) != head);
-
-	/* Release the lock if we took it. */
-	if (rl) {
-		up_read(&ni->runlist.lock);
-		rl = NULL;
-	}
-
-	/* If we issued read requests, let them complete. */
-	while (wait_bh > wait) {
-		wait_on_buffer(*--wait_bh);
-		if (!buffer_uptodate(*wait_bh))
-			return -EIO;
-	}
-
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	/*
-	 * Zero out any newly allocated blocks to avoid exposing stale data.
-	 * If BH_New is set, we know that the block was newly allocated in the
-	 * above loop.
-	 * FIXME: What about initialized_size increments? Have we done all the
-	 * required zeroing above? If not this error handling is broken, and
-	 * in particular the if (block_end <= from) check is completely bogus.
-	 */
-	bh = head;
-	block_start = 0;
-	is_retry = FALSE;
-	do {
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to)
-			break;
-		if (buffer_new(bh)) {
-			void *kaddr;
-
-			clear_buffer_new(bh);
-			kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr + block_start, 0, bh->b_size);
-			kunmap_atomic(kaddr, KM_USER0);
-			set_buffer_uptodate(bh);
-			mark_buffer_dirty(bh);
-			is_retry = TRUE;
-		}
-	} while (block_start = block_end, (bh = bh->b_this_page) != head);
-	if (is_retry)
-		flush_dcache_page(page);
-	if (rl)
-		up_read(&ni->runlist.lock);
-	return err;
-}
-
-/**
- * ntfs_prepare_write - prepare a page for receiving data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host).  The @page is locked but not kmap()ped.  The source
- * data has not yet been copied into the @page.
- *
- * Need to extend the attribute/fill in holes if necessary, create blocks and
- * make partially overwritten blocks uptodate,
- *
- * i_size is not to be modified yet.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using block_prepare_write() [support for sparse files] or
- * cont_prepare_write() [no support for sparse files].  Cannot do that due to
- * ntfs specifics but can look at them for implementation guidance.
- *
- * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
- * the first byte in the page that will be written to and @to is the first byte
- * after the last byte that will be written to.
- */
-static int ntfs_prepare_write(struct file *file, struct page *page,
-		unsigned from, unsigned to)
-{
-	s64 new_size;
-	loff_t i_size;
-	struct inode *vi = page->mapping->host;
-	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
-	ntfs_volume *vol = ni->vol;
-	ntfs_attr_search_ctx *ctx = NULL;
-	MFT_RECORD *m = NULL;
-	ATTR_RECORD *a;
-	u8 *kaddr;
-	u32 attr_len;
-	int err;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
-			page->index, from, to);
-	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
-	BUG_ON(from > to);
-	BUG_ON(NInoMstProtected(ni));
-	/*
-	 * If a previous ntfs_truncate() failed, repeat it and abort if it
-	 * fails again.
-	 */
-	if (unlikely(NInoTruncateFailed(ni))) {
-		down_write(&vi->i_alloc_sem);
-		err = ntfs_truncate(vi);
-		up_write(&vi->i_alloc_sem);
-		if (err || NInoTruncateFailed(ni)) {
-			if (!err)
-				err = -EIO;
-			goto err_out;
-		}
-	}
-	/* If the attribute is not resident, deal with it elsewhere. */
-	if (NInoNonResident(ni)) {
-		/*
-		 * Only unnamed $DATA attributes can be compressed, encrypted,
-		 * and/or sparse.
-		 */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* If file is encrypted, deny access, just like NT4. */
-			if (NInoEncrypted(ni)) {
-				ntfs_debug("Denying write access to encrypted "
-						"file.");
-				return -EACCES;
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni)) {
-				// TODO: Implement and replace this check with
-				// return ntfs_write_compressed_block(page);
-				ntfs_error(vi->i_sb, "Writing to compressed "
-						"files is not supported yet. "
-						"Sorry.");
-				return -EOPNOTSUPP;
-			}
-			// TODO: Implement and remove this check.
-			if (NInoSparse(ni)) {
-				ntfs_error(vi->i_sb, "Writing to sparse files "
-						"is not supported yet. Sorry.");
-				return -EOPNOTSUPP;
-			}
-		}
-		/* Normal data stream. */
-		return ntfs_prepare_nonresident_write(page, from, to);
-	}
-	/*
-	 * Attribute is resident, implying it is not compressed, encrypted, or
-	 * sparse.
-	 */
-	BUG_ON(page_has_buffers(page));
-	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
-	/* If we do not need to resize the attribute allocation we are done. */
-	if (new_size <= i_size_read(vi))
-		goto done;
-	/* Map, pin, and lock the (base) mft record. */
-	if (!NInoAttr(ni))
-		base_ni = ni;
-	else
-		base_ni = ni->ext.base_ntfs_ino;
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m)) {
-		err = PTR_ERR(m);
-		m = NULL;
-		ctx = NULL;
-		goto err_out;
-	}
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, 0, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
-	}
-	m = ctx->mrec;
-	a = ctx->attr;
-	/* The total length of the attribute value. */
-	attr_len = le32_to_cpu(a->data.resident.value_length);
-	/* Fix an eventual previous failure of ntfs_commit_write(). */
-	i_size = i_size_read(vi);
-	if (unlikely(attr_len > i_size)) {
-		attr_len = i_size;
-		a->data.resident.value_length = cpu_to_le32(attr_len);
-	}
-	/* If we do not need to resize the attribute allocation we are done. */
-	if (new_size <= attr_len)
-		goto done_unm;
-	/* Check if new size is allowed in $AttrDef. */
-	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
-	if (unlikely(err)) {
-		if (err == -ERANGE) {
-			ntfs_error(vol->sb, "Write would cause the inode "
-					"0x%lx to exceed the maximum size for "
-					"its attribute type (0x%x).  Aborting "
-					"write.", vi->i_ino,
-					le32_to_cpu(ni->type));
-		} else {
-			ntfs_error(vol->sb, "Inode 0x%lx has unknown "
-					"attribute type 0x%x.  Aborting "
-					"write.", vi->i_ino,
-					le32_to_cpu(ni->type));
-			err = -EIO;
-		}
-		goto err_out2;
-	}
-	/*
-	 * Extend the attribute record to be able to store the new attribute
-	 * size.
-	 */
-	if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
-			le16_to_cpu(a->data.resident.value_offset) +
-			new_size)) {
-		/* Not enough space in the mft record. */
-		ntfs_error(vol->sb, "Not enough space in the mft record for "
-				"the resized attribute value.  This is not "
-				"supported yet.  Aborting write.");
-		err = -EOPNOTSUPP;
-		goto err_out2;
-	}
-	/*
-	 * We have enough space in the mft record to fit the write.  This
-	 * implies the attribute is smaller than the mft record and hence the
-	 * attribute must be in a single page and hence page->index must be 0.
-	 */
-	BUG_ON(page->index);
-	/*
-	 * If the beginning of the write is past the old size, enlarge the
-	 * attribute value up to the beginning of the write and fill it with
-	 * zeroes.
-	 */
-	if (from > attr_len) {
-		memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
-				attr_len, 0, from - attr_len);
-		a->data.resident.value_length = cpu_to_le32(from);
-		/* Zero the corresponding area in the page as well. */
-		if (PageUptodate(page)) {
-			kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr + attr_len, 0, from - attr_len);
-			kunmap_atomic(kaddr, KM_USER0);
-			flush_dcache_page(page);
-		}
-	}
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
-	mark_mft_record_dirty(ctx->ntfs_ino);
-done_unm:
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
-	/*
-	 * Because resident attributes are handled by memcpy() to/from the
-	 * corresponding MFT record, and because this form of i/o is byte
-	 * aligned rather than block aligned, there is no need to bring the
-	 * page uptodate here as in the non-resident case where we need to
-	 * bring the buffers straddled by the write uptodate before
-	 * generic_file_write() does the copying from userspace.
-	 *
-	 * We thus defer the uptodate bringing of the page region outside the
-	 * region written to to ntfs_commit_write(), which makes the code
-	 * simpler and saves one atomic kmap which is good.
-	 */
-done:
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err == -ENOMEM)
-		ntfs_warning(vi->i_sb, "Error allocating memory required to "
-				"prepare the write.");
-	else {
-		ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
-				"with error %i.", err);
-		NVolSetErrors(vol);
-		make_bad_inode(vi);
-	}
-err_out2:
-	if (ctx)
-		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(base_ni);
-	return err;
-}
-
-/**
- * ntfs_commit_nonresident_write -
- *
- */
-static int ntfs_commit_nonresident_write(struct page *page,
-		unsigned from, unsigned to)
-{
-	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
-	struct inode *vi = page->mapping->host;
-	struct buffer_head *bh, *head;
-	unsigned int block_start, block_end, blocksize;
-	BOOL partial;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino,
-			NTFS_I(vi)->type, page->index, from, to);
-	blocksize = 1 << vi->i_blkbits;
-
-	// FIXME: We need a whole slew of special cases in here for compressed
-	// files for example...
-	// For now, we know ntfs_prepare_write() would have failed so we can't
-	// get here in any of the cases which we have to special case, so we
-	// are just a ripped off, unrolled generic_commit_write().
-
-	bh = head = page_buffers(page);
-	block_start = 0;
-	partial = FALSE;
-	do {
-		block_end = block_start + blocksize;
-		if (block_end <= from || block_start >= to) {
-			if (!buffer_uptodate(bh))
-				partial = TRUE;
-		} else {
-			set_buffer_uptodate(bh);
-			mark_buffer_dirty(bh);
-		}
-	} while (block_start = block_end, (bh = bh->b_this_page) != head);
-	/*
-	 * If this is a partial write which happened to make all buffers
-	 * uptodate then we can optimize away a bogus ->readpage() for the next
-	 * read().  Here we 'discover' whether the page went uptodate as a
-	 * result of this (potentially partial) write.
-	 */
-	if (!partial)
-		SetPageUptodate(page);
-	/*
-	 * Not convinced about this at all.  See disparity comment above.  For
-	 * now we know ntfs_prepare_write() would have failed in the write
-	 * exceeds i_size case, so this will never trigger which is fine.
-	 */
-	if (pos > i_size_read(vi)) {
-		ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
-				"not supported yet.  Sorry.");
-		return -EOPNOTSUPP;
-		// vi->i_size = pos;
-		// mark_inode_dirty(vi);
-	}
-	ntfs_debug("Done.");
-	return 0;
-}
-
-/**
- * ntfs_commit_write - commit the received data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host).  The @page is locked but not kmap()ped.  The source
- * data has already been copied into the @page.  ntfs_prepare_write() has been
- * called before the data copied and it returned success so we can take the
- * results of various BUG checks and some error handling for granted.
- *
- * Need to mark modified blocks dirty so they get written out later when
- * ntfs_writepage() is invoked by the VM.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using generic_commit_write().  This marks buffers uptodate and
- * dirty, sets the page uptodate if all buffers in the page are uptodate, and
- * updates i_size if the end of io is beyond i_size.  In that case, it also
- * marks the inode dirty.
- *
- * Cannot use generic_commit_write() due to ntfs specialities but can look at
- * it for implementation guidance.
- *
- * If things have gone as outlined in ntfs_prepare_write(), then we do not
- * need to do any page content modifications here at all, except in the write
- * to resident attribute case, where we need to do the uptodate bringing here
- * which we combine with the copying into the mft record which means we save
- * one atomic kmap.
- */
-static int ntfs_commit_write(struct file *file, struct page *page,
-		unsigned from, unsigned to)
-{
-	struct inode *vi = page->mapping->host;
-	ntfs_inode *base_ni, *ni = NTFS_I(vi);
-	char *kaddr, *kattr;
-	ntfs_attr_search_ctx *ctx;
-	MFT_RECORD *m;
-	ATTR_RECORD *a;
-	u32 attr_len;
-	int err;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
-			page->index, from, to);
-	/* If the attribute is not resident, deal with it elsewhere. */
-	if (NInoNonResident(ni)) {
-		/* Only unnamed $DATA attributes can be compressed/encrypted. */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* Encrypted files need separate handling. */
-			if (NInoEncrypted(ni)) {
-				// We never get here at present!
-				BUG();
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni)) {
-				// TODO: Implement this!
-				// return ntfs_write_compressed_block(page);
-				// We never get here at present!
-				BUG();
-			}
-		}
-		/* Normal data stream. */
-		return ntfs_commit_nonresident_write(page, from, to);
-	}
-	/*
-	 * Attribute is resident, implying it is not compressed, encrypted, or
-	 * sparse.
-	 */
-	if (!NInoAttr(ni))
-		base_ni = ni;
-	else
-		base_ni = ni->ext.base_ntfs_ino;
-	/* Map, pin, and lock the mft record. */
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m)) {
-		err = PTR_ERR(m);
-		m = NULL;
-		ctx = NULL;
-		goto err_out;
-	}
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, 0, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
-	}
-	a = ctx->attr;
-	/* The total length of the attribute value. */
-	attr_len = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(from > attr_len);
-	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
-	kaddr = kmap_atomic(page, KM_USER0);
-	/* Copy the received data from the page to the mft record. */
-	memcpy(kattr + from, kaddr + from, to - from);
-	/* Update the attribute length if necessary. */
-	if (to > attr_len) {
-		attr_len = to;
-		a->data.resident.value_length = cpu_to_le32(attr_len);
-	}
-	/*
-	 * If the page is not uptodate, bring the out of bounds area(s)
-	 * uptodate by copying data from the mft record to the page.
-	 */
-	if (!PageUptodate(page)) {
-		if (from > 0)
-			memcpy(kaddr, kattr, from);
-		if (to < attr_len)
-			memcpy(kaddr + to, kattr + to, attr_len - to);
-		/* Zero the region outside the end of the attribute value. */
-		if (attr_len < PAGE_CACHE_SIZE)
-			memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-		/*
-		 * The probability of not having done any of the above is
-		 * extremely small, so we just flush unconditionally.
-		 */
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-	}
-	kunmap_atomic(kaddr, KM_USER0);
-	/* Update i_size if necessary. */
-	if (i_size_read(vi) < attr_len) {
-		unsigned long flags;
-
-		write_lock_irqsave(&ni->size_lock, flags);
-		ni->allocated_size = ni->initialized_size = attr_len;
-		i_size_write(vi, attr_len);
-		write_unlock_irqrestore(&ni->size_lock, flags);
-	}
-	/* Mark the mft record dirty, so it gets written back. */
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
-	mark_mft_record_dirty(ctx->ntfs_ino);
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err == -ENOMEM) {
-		ntfs_warning(vi->i_sb, "Error allocating memory required to "
-				"commit the write.");
-		if (PageUptodate(page)) {
-			ntfs_warning(vi->i_sb, "Page is uptodate, setting "
-					"dirty so the write will be retried "
-					"later on by the VM.");
-			/*
-			 * Put the page on mapping->dirty_pages, but leave its
-			 * buffers' dirty state as-is.
-			 */
-			__set_page_dirty_nobuffers(page);
-			err = 0;
-		} else
-			ntfs_error(vi->i_sb, "Page is not uptodate.  Written "
-					"data has been lost.");
-	} else {
-		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
-				"with error %i.", err);
-		NVolSetErrors(ni->vol);
-		make_bad_inode(vi);
-	}
-	if (ctx)
-		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(base_ni);
-	return err;
-}
-
 #endif	/* NTFS_RW */
 
 /**
@@ -2377,9 +1553,6 @@ struct address_space_operations ntfs_aops = {
 						   disk request queue. */
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
-	.prepare_write	= ntfs_prepare_write,	/* Prepare page and buffers
-						   ready to receive data. */
-	.commit_write	= ntfs_commit_write,	/* Commit received data. */
 #endif /* NTFS_RW */
 };
 
-- 
cgit v1.2.3


From 98b270362bb9ea6629732e7f5b65b8a6ce4743c7 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 11 Oct 2005 15:40:40 +0100
Subject: NTFS: The big ntfs write(2) rewrite has arrived.  We now implement
 our own       file operations ->write(), ->aio_write(), and ->writev() for
 regular       files.  This replaces the old use of generic_file_write(), et
 al and       the address space operations ->prepare_write and ->commit_write.
       This means that both sparse and non-sparse (unencrypted and      
 uncompressed) files can now be extended using the normal write(2)       code
 path.  There are two limitations at present and these are that       we never
 create sparse files and that we only have limited support       for highly
 fragmented files, i.e. ones whose data attribute is split       across
 multiple extents.   When such a case is encountered,       EOPNOTSUPP is
 returned.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt |   42 +-
 fs/ntfs/ChangeLog                  |   38 +-
 fs/ntfs/Makefile                   |    2 +-
 fs/ntfs/file.c                     | 2247 +++++++++++++++++++++++++++++++++++-
 4 files changed, 2280 insertions(+), 49 deletions(-)

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index a5fbc8e897fa..614de3124901 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -50,9 +50,14 @@ userspace utilities, etc.
 Features
 ========
 
-- This is a complete rewrite of the NTFS driver that used to be in the kernel.
-  This new driver implements NTFS read support and is functionally equivalent
-  to the old ntfs driver.
+- This is a complete rewrite of the NTFS driver that used to be in the 2.4 and
+  earlier kernels.  This new driver implements NTFS read support and is
+  functionally equivalent to the old ntfs driver and it also implements limited
+  write support.  The biggest limitation at present is that files/directories
+  cannot be created or deleted.  See below for the list of write features that
+  are so far supported.  Another limitation is that writing to compressed files
+  is not implemented at all.  Also, neither read nor write access to encrypted
+  files is so far implemented.
 - The new driver has full support for sparse files on NTFS 3.x volumes which
   the old driver isn't happy with.
 - The new driver supports execution of binaries due to mmap() now being
@@ -78,7 +83,20 @@ Features
 - The new driver supports fsync(2), fdatasync(2), and msync(2).
 - The new driver supports readv(2) and writev(2).
 - The new driver supports access time updates (including mtime and ctime).
-
+- The new driver supports truncate(2) and open(2) with O_TRUNC.  But at present
+  only very limited support for highly fragmented files, i.e. ones which have
+  their data attribute split across multiple extents, is included.  Another
+  limitation is that at present truncate(2) will never create sparse files,
+  since to mark a file sparse we need to modify the directory entry for the
+  file and we do not implement directory modifications yet.
+- The new driver supports write(2) which can both overwrite existing data and
+  extend the file size so that you can write beyond the existing data.  Also,
+  writing into sparse regions is supported and the holes are filled in with
+  clusters.  But at present only limited support for highly fragmented files,
+  i.e. ones which have their data attribute split across multiple extents, is
+  included.  Another limitation is that write(2) will never create sparse
+  files, since to mark a file sparse we need to modify the directory entry for
+  the file and we do not implement directory modifications yet.
 
 Supported mount options
 =======================
@@ -439,6 +457,22 @@ ChangeLog
 
 Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 
+2.1.25:
+	- Write support is now extended with write(2) being able to both
+	  overwrite existing file data and to extend files.  Also, if a write
+	  to a sparse region occurs, write(2) will fill in the hole.  Note,
+	  mmap(2) based writes still do not support writing into holes or
+	  writing beyond the initialized size.
+	- Write support has a new feature and that is that truncate(2) and
+	  open(2) with O_TRUNC are now implemented thus files can be both made
+	  smaller and larger.
+	- Note: Both write(2) and truncate(2)/open(2) with O_TRUNC still have
+	  limitations in that they
+	  - only provide limited support for highly fragmented files.
+	  - only work on regular, i.e. uncompressed and unencrypted files.
+	  - never create sparse files although this will change once directory
+	    operations are implemented.
+	- Lots of bug fixes and enhancements across the board.
 2.1.24:
 	- Support journals ($LogFile) which have been modified by chkdsk.  This
 	  means users can boot into Windows after we marked the volume dirty.
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3b8ff2318085..03015c7b236c 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,16 +1,15 @@
 ToDo/Notes:
 	- Find and fix bugs.
-	- In between ntfs_prepare/commit_write, need exclusion between
-	  simultaneous file extensions.  This is given to us by holding i_sem
-	  on the inode.  The only places in the kernel when a file is resized
-	  are prepare/commit write and ntfs_truncate() for both of which i_sem
-	  is held.  Just have to be careful in read-/writepage and other helpers
+	- The only places in the kernel where a file is resized are
+	  ntfs_file_write*() and ntfs_truncate() for both of which i_sem is
+	  held.  Just have to be careful in read-/writepage and other helpers
 	  not running under i_sem that we play nice...  Also need to be careful
-	  with initialized_size extention in ntfs_prepare_write and writepage.
-	  UPDATE: The only things that need to be checked are
-	  prepare/commit_write as well as the compressed write and the other
-	  attribute resize/write cases like index attributes, etc.  For now
-	  none of these are implemented so are safe.
+	  with initialized_size extension in ntfs_file_write*() and writepage.
+	  UPDATE: The only things that need to be checked are the compressed
+	  write and the other attribute resize/write cases like index
+	  attributes, etc.  For now none of these are implemented so are safe.
+	- Implement filling in of holes in aops.c::ntfs_writepage() and its
+	  helpers.
 	- Implement mft.c::sync_mft_mirror_umount().  We currently will just
 	  leave the volume dirty on umount if the final iput(vol->mft_ino)
 	  causes a write of any mirrored mft records due to the mft mirror
@@ -20,7 +19,7 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
-2.1.25-WIP
+2.1.25 - (Almost) fully implement write(2) and truncate(2).
 
 	- Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and
 	  {__,}ntfs_cluster_free() to also take an optional attribute search
@@ -49,7 +48,12 @@ ToDo/Notes:
 	  extend the allocation of an attributes.  Optionally, the data size,
 	  but not the initialized size can be extended, too.
 	- Implement fs/ntfs/inode.[hc]::ntfs_truncate().  It only supports
-	  uncompressed and unencrypted files.
+	  uncompressed and unencrypted files and it never creates sparse files
+	  at least for the moment (making a file sparse requires us to modify
+	  its directory entries and we do not support directory operations at
+	  the moment).  Also, support for highly fragmented files, i.e. ones
+	  whose data attribute is split across multiple extents, is severly
+	  limited.  When such a case is encountered, EOPNOTSUPP is returned.
 	- Enable ATTR_SIZE attribute changes in ntfs_setattr().  This completes
 	  the initial implementation of file truncation.  Now both open(2)ing
 	  a file with the O_TRUNC flag and the {,f}truncate(2) system calls
@@ -61,6 +65,16 @@ ToDo/Notes:
 	  and cond_resched() in the main loop as we could be dirtying a lot of
 	  pages and this ensures we play nice with the VM and the system as a
 	  whole.
+	- Implement file operations ->write, ->aio_write, ->writev for regular
+	  files.  This replaces the old use of generic_file_write(), et al and
+	  the address space operations ->prepare_write and ->commit_write.
+	  This means that both sparse and non-sparse (unencrypted and
+	  uncompressed) files can now be extended using the normal write(2)
+	  code path.  There are two limitations at present and these are that
+	  we never create sparse files and that we only have limited support
+	  for highly fragmented files, i.e. ones whose data attribute is split
+	  across multiple extents.   When such a case is encountered,
+	  EOPNOTSUPP is returned.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index a3ce2c0e7dd9..d0d45d1c853a 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25-WIP\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index be9fd1dd423d..cf2a0e2330df 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -19,11 +19,24 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
 
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+#include "attrib.h"
+#include "bitmap.h"
 #include "inode.h"
 #include "debug.h"
+#include "lcnalloc.h"
+#include "malloc.h"
+#include "mft.h"
 #include "ntfs.h"
 
 /**
@@ -55,6 +68,2176 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
 
 #ifdef NTFS_RW
 
+/**
+ * ntfs_attr_extend_initialized - extend the initialized size of an attribute
+ * @ni:			ntfs inode of the attribute to extend
+ * @new_init_size:	requested new initialized size in bytes
+ * @cached_page:	store any allocated but unused page here
+ * @lru_pvec:		lru-buffering pagevec of the caller
+ *
+ * Extend the initialized size of an attribute described by the ntfs inode @ni
+ * to @new_init_size bytes.  This involves zeroing any non-sparse space between
+ * the old initialized size and @new_init_size both in the page cache and on
+ * disk (if relevant complete pages are zeroed in the page cache then these may
+ * simply be marked dirty for later writeout).  There is one caveat and that is
+ * that if any uptodate page cache pages between the old initialized size and
+ * the smaller of @new_init_size and the file size (vfs inode->i_size) are in
+ * memory, these need to be marked dirty without being zeroed since they could
+ * be non-zero due to mmap() based writes.
+ *
+ * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
+ * in the resident attribute case, it is tied to the initialized size and, in
+ * the non-resident attribute case, it may not fall below the initialized size.
+ *
+ * Note that if the attribute is resident, we do not need to touch the page
+ * cache at all.  This is because if the page cache page is not uptodate we
+ * bring it uptodate later, when doing the write to the mft record since we
+ * then already have the page mapped.  And if the page is uptodate, the
+ * non-initialized region will already have been zeroed when the page was
+ * brought uptodate and the region may in fact already have been overwritten
+ * with new data via mmap() based writes, so we cannot just zero it.  And since
+ * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
+ * is unspecified, we choose not to do zeroing and thus we do not need to touch
+ * the page at all.  For a more detailed explanation see ntfs_truncate() which
+ * is in fs/ntfs/inode.c.
+ *
+ * @cached_page and @lru_pvec are just optimisations for dealing with multiple
+ * pages.
+ *
+ * Return 0 on success and -errno on error.  In the case that an error is
+ * encountered it is possible that the initialized size will already have been
+ * incremented some way towards @new_init_size but it is guaranteed that if
+ * this is the case, the necessary zeroing will also have happened and that all
+ * metadata is self-consistent.
+ *
+ * Locking: This function locks the mft record of the base ntfs inode and
+ * maintains the lock throughout execution of the function.  This is required
+ * so that the initialized size of the attribute can be modified safely.
+ */
+static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
+		struct page **cached_page, struct pagevec *lru_pvec)
+{
+	s64 old_init_size;
+	loff_t old_i_size;
+	pgoff_t index, end_index;
+	unsigned long flags;
+	struct inode *vi = VFS_I(ni);
+	ntfs_inode *base_ni;
+	MFT_RECORD *m = NULL;
+	ATTR_RECORD *a;
+	ntfs_attr_search_ctx *ctx = NULL;
+	struct address_space *mapping;
+	struct page *page = NULL;
+	u8 *kattr;
+	int err;
+	u32 attr_len;
+
+	read_lock_irqsave(&ni->size_lock, flags);
+	old_init_size = ni->initialized_size;
+	old_i_size = i_size_read(vi);
+	BUG_ON(new_init_size > ni->allocated_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"old_initialized_size 0x%llx, "
+			"new_initialized_size 0x%llx, i_size 0x%llx.",
+			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)old_init_size,
+			(unsigned long long)new_init_size, old_i_size);
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/* Use goto to reduce indentation and we need the label below anyway. */
+	if (NInoNonResident(ni))
+		goto do_non_resident_extend;
+	BUG_ON(old_init_size != old_i_size);
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	BUG_ON(a->non_resident);
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	BUG_ON(old_i_size != (loff_t)attr_len);
+	/*
+	 * Do the zeroing in the mft record and update the attribute size in
+	 * the mft record.
+	 */
+	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+	memset(kattr + attr_len, 0, new_init_size - attr_len);
+	a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
+	/* Finally, update the sizes in the vfs and ntfs inodes. */
+	write_lock_irqsave(&ni->size_lock, flags);
+	i_size_write(vi, new_init_size);
+	ni->initialized_size = new_init_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	goto done;
+do_non_resident_extend:
+	/*
+	 * If the new initialized size @new_init_size exceeds the current file
+	 * size (vfs inode->i_size), we need to extend the file size to the
+	 * new initialized size.
+	 */
+	if (new_init_size > old_i_size) {
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m)) {
+			err = PTR_ERR(m);
+			m = NULL;
+			goto err_out;
+		}
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, 0, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			goto err_out;
+		}
+		m = ctx->mrec;
+		a = ctx->attr;
+		BUG_ON(!a->non_resident);
+		BUG_ON(old_i_size != (loff_t)
+				sle64_to_cpu(a->data.non_resident.data_size));
+		a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		/* Update the file size in the vfs inode. */
+		i_size_write(vi, new_init_size);
+		ntfs_attr_put_search_ctx(ctx);
+		ctx = NULL;
+		unmap_mft_record(base_ni);
+		m = NULL;
+	}
+	mapping = vi->i_mapping;
+	index = old_init_size >> PAGE_CACHE_SHIFT;
+	end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	do {
+		/*
+		 * Read the page.  If the page is not present, this will zero
+		 * the uninitialized regions for us.
+		 */
+		page = read_cache_page(mapping, index,
+				(filler_t*)mapping->a_ops->readpage, NULL);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto init_err_out;
+		}
+		wait_on_page_locked(page);
+		if (unlikely(!PageUptodate(page) || PageError(page))) {
+			page_cache_release(page);
+			err = -EIO;
+			goto init_err_out;
+		}
+		/*
+		 * Update the initialized size in the ntfs inode.  This is
+		 * enough to make ntfs_writepage() work.
+		 */
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->initialized_size = (index + 1) << PAGE_CACHE_SHIFT;
+		if (ni->initialized_size > new_init_size)
+			ni->initialized_size = new_init_size;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		/* Set the page dirty so it gets written out. */
+		set_page_dirty(page);
+		page_cache_release(page);
+		/*
+		 * Play nice with the vm and the rest of the system.  This is
+		 * very much needed as we can potentially be modifying the
+		 * initialised size from a very small value to a really huge
+		 * value, e.g.
+		 *	f = open(somefile, O_TRUNC);
+		 *	truncate(f, 10GiB);
+		 *	seek(f, 10GiB);
+		 *	write(f, 1);
+		 * And this would mean we would be marking dirty hundreds of
+		 * thousands of pages or as in the above example more than
+		 * two and a half million pages!
+		 *
+		 * TODO: For sparse pages could optimize this workload by using
+		 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit.  This
+		 * would be set in readpage for sparse pages and here we would
+		 * not need to mark dirty any pages which have this bit set.
+		 * The only caveat is that we have to clear the bit everywhere
+		 * where we allocate any clusters that lie in the page or that
+		 * contain the page.
+		 *
+		 * TODO: An even greater optimization would be for us to only
+		 * call readpage() on pages which are not in sparse regions as
+		 * determined from the runlist.  This would greatly reduce the
+		 * number of pages we read and make dirty in the case of sparse
+		 * files.
+		 */
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+	} while (++index < end_index);
+	read_lock_irqsave(&ni->size_lock, flags);
+	BUG_ON(ni->initialized_size != new_init_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/* Now bring in sync the initialized_size in the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		goto init_err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto init_err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto init_err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	BUG_ON(!a->non_resident);
+	a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
+done:
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
+			(unsigned long long)new_init_size, i_size_read(vi));
+	return 0;
+init_err_out:
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->initialized_size = old_init_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_debug("Failed.  Returning error code %i.", err);
+	return err;
+}
+
+/**
+ * ntfs_fault_in_pages_readable -
+ *
+ * Fault a number of userspace pages into pagetables.
+ *
+ * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
+ * with more than two userspace pages as well as handling the single page case
+ * elegantly.
+ *
+ * If you find this difficult to understand, then think of the while loop being
+ * the following code, except that we do without the integer variable ret:
+ *
+ *	do {
+ *		ret = __get_user(c, uaddr);
+ *		uaddr += PAGE_SIZE;
+ *	} while (!ret && uaddr < end);
+ *
+ * Note, the final __get_user() may well run out-of-bounds of the user buffer,
+ * but _not_ out-of-bounds of the page the user buffer belongs to, and since
+ * this is only a read and not a write, and since it is still in the same page,
+ * it should not matter and this makes the code much simpler.
+ */
+static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
+		int bytes)
+{
+	const char __user *end;
+	volatile char c;
+
+	/* Set @end to the first byte outside the last page we care about. */
+	end = (const char __user*)PAGE_ALIGN((ptrdiff_t __user)uaddr + bytes);
+
+	while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
+		;
+}
+
+/**
+ * ntfs_fault_in_pages_readable_iovec -
+ *
+ * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
+ */
+static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
+		size_t iov_ofs, int bytes)
+{
+	do {
+		const char __user *buf;
+		unsigned len;
+
+		buf = iov->iov_base + iov_ofs;
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		ntfs_fault_in_pages_readable(buf, len);
+		bytes -= len;
+		iov++;
+		iov_ofs = 0;
+	} while (bytes);
+}
+
+/**
+ * __ntfs_grab_cache_pages - obtain a number of locked pages
+ * @mapping:	address space mapping from which to obtain page cache pages
+ * @index:	starting index in @mapping at which to begin obtaining pages
+ * @nr_pages:	number of page cache pages to obtain
+ * @pages:	array of pages in which to return the obtained page cache pages
+ * @cached_page: allocated but as yet unused page
+ * @lru_pvec:	lru-buffering pagevec of caller
+ *
+ * Obtain @nr_pages locked page cache pages from the mapping @maping and
+ * starting at index @index.
+ *
+ * If a page is newly created, increment its refcount and add it to the
+ * caller's lru-buffering pagevec @lru_pvec.
+ *
+ * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages
+ * are obtained at once instead of just one page and that 0 is returned on
+ * success and -errno on error.
+ *
+ * Note, the page locks are obtained in ascending page index order.
+ */
+static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
+		pgoff_t index, const unsigned nr_pages, struct page **pages,
+		struct page **cached_page, struct pagevec *lru_pvec)
+{
+	int err, nr;
+
+	BUG_ON(!nr_pages);
+	err = nr = 0;
+	do {
+		pages[nr] = find_lock_page(mapping, index);
+		if (!pages[nr]) {
+			if (!*cached_page) {
+				*cached_page = page_cache_alloc(mapping);
+				if (unlikely(!*cached_page)) {
+					err = -ENOMEM;
+					goto err_out;
+				}
+			}
+			err = add_to_page_cache(*cached_page, mapping, index,
+					GFP_KERNEL);
+			if (unlikely(err)) {
+				if (err == -EEXIST)
+					continue;
+				goto err_out;
+			}
+			pages[nr] = *cached_page;
+			page_cache_get(*cached_page);
+			if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
+				__pagevec_lru_add(lru_pvec);
+			*cached_page = NULL;
+		}
+		index++;
+		nr++;
+	} while (nr < nr_pages);
+out:
+	return err;
+err_out:
+	while (nr > 0) {
+		unlock_page(pages[--nr]);
+		page_cache_release(pages[nr]);
+	}
+	goto out;
+}
+
+static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
+{
+	lock_buffer(bh);
+	get_bh(bh);
+	bh->b_end_io = end_buffer_read_sync;
+	return submit_bh(READ, bh);
+}
+
+/**
+ * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * This is called for non-resident attributes from ntfs_file_buffered_write()
+ * with i_sem held on the inode (@pages[0]->mapping->host).  There are
+ * @nr_pages pages in @pages which are locked but not kmap()ped.  The source
+ * data has not yet been copied into the @pages.
+ * 
+ * Need to fill any holes with actual clusters, allocate buffers if necessary,
+ * ensure all the buffers are mapped, and bring uptodate any buffers that are
+ * only partially being written to.
+ *
+ * If @nr_pages is greater than one, we are guaranteed that the cluster size is
+ * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * the same cluster and that they are the entirety of that cluster, and that
+ * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
+ *
+ * i_size is not to be modified yet.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
+		unsigned nr_pages, s64 pos, size_t bytes)
+{
+	VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
+	LCN lcn;
+	s64 bh_pos, vcn_len, end, initialized_size;
+	sector_t lcn_block;
+	struct page *page;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni = NULL;
+	ntfs_volume *vol;
+	runlist_element *rl, *rl2;
+	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+	ntfs_attr_search_ctx *ctx = NULL;
+	MFT_RECORD *m = NULL;
+	ATTR_RECORD *a = NULL;
+	unsigned long flags;
+	u32 attr_rec_len = 0;
+	unsigned blocksize, u;
+	int err, mp_size;
+	BOOL rl_write_locked, was_hole, is_retry;
+	unsigned char blocksize_bits;
+	struct {
+		u8 runlist_merged:1;
+		u8 mft_attr_mapped:1;
+		u8 mp_rebuilt:1;
+		u8 attr_switched:1;
+	} status = { 0, 0, 0, 0 };
+
+	BUG_ON(!nr_pages);
+	BUG_ON(!pages);
+	BUG_ON(!*pages);
+	vi = pages[0]->mapping->host;
+	ni = NTFS_I(vi);
+	vol = ni->vol;
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%x.",
+			vi->i_ino, ni->type, pages[0]->index, nr_pages,
+			(long long)pos, bytes);
+	blocksize_bits = vi->i_blkbits;
+	blocksize = 1 << blocksize_bits;
+	u = 0;
+	do {
+		struct page *page = pages[u];
+		/*
+		 * create_empty_buffers() will create uptodate/dirty buffers if
+		 * the page is uptodate/dirty.
+		 */
+		if (!page_has_buffers(page)) {
+			create_empty_buffers(page, blocksize, 0);
+			if (unlikely(!page_has_buffers(page)))
+				return -ENOMEM;
+		}
+	} while (++u < nr_pages);
+	rl_write_locked = FALSE;
+	rl = NULL;
+	err = 0;
+	vcn = lcn = -1;
+	vcn_len = 0;
+	lcn_block = -1;
+	was_hole = FALSE;
+	cpos = pos >> vol->cluster_size_bits;
+	end = pos + bytes;
+	cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
+	/*
+	 * Loop over each page and for each page over each buffer.  Use goto to
+	 * reduce indentation.
+	 */
+	u = 0;
+do_next_page:
+	page = pages[u];
+	bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+	bh = head = page_buffers(page);
+	do {
+		VCN cdelta;
+		s64 bh_end;
+		unsigned bh_cofs;
+
+		/* Clear buffer_new on all buffers to reinitialise state. */
+		if (buffer_new(bh))
+			clear_buffer_new(bh);
+		bh_end = bh_pos + blocksize;
+		bh_cpos = bh_pos >> vol->cluster_size_bits;
+		bh_cofs = bh_pos & vol->cluster_size_mask;
+		if (buffer_mapped(bh)) {
+			/*
+			 * The buffer is already mapped.  If it is uptodate,
+			 * ignore it.
+			 */
+			if (buffer_uptodate(bh))
+				continue;
+			/*
+			 * The buffer is not uptodate.  If the page is uptodate
+			 * set the buffer uptodate and otherwise ignore it.
+			 */
+			if (PageUptodate(page)) {
+				set_buffer_uptodate(bh);
+				continue;
+			}
+			/*
+			 * Neither the page nor the buffer are uptodate.  If
+			 * the buffer is only partially being written to, we
+			 * need to read it in before the write, i.e. now.
+			 */
+			if ((bh_pos < pos && bh_end > pos) ||
+					(bh_pos < end && bh_end > end)) {
+				/*
+				 * If the buffer is fully or partially within
+				 * the initialized size, do an actual read.
+				 * Otherwise, simply zero the buffer.
+				 */
+				read_lock_irqsave(&ni->size_lock, flags);
+				initialized_size = ni->initialized_size;
+				read_unlock_irqrestore(&ni->size_lock, flags);
+				if (bh_pos < initialized_size) {
+					ntfs_submit_bh_for_read(bh);
+					*wait_bh++ = bh;
+				} else {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+			}
+			continue;
+		}
+		/* Unmapped buffer.  Need to map it. */
+		bh->b_bdev = vol->sb->s_bdev;
+		/*
+		 * If the current buffer is in the same clusters as the map
+		 * cache, there is no need to check the runlist again.  The
+		 * map cache is made up of @vcn, which is the first cached file
+		 * cluster, @vcn_len which is the number of cached file
+		 * clusters, @lcn is the device cluster corresponding to @vcn,
+		 * and @lcn_block is the block number corresponding to @lcn.
+		 */
+		cdelta = bh_cpos - vcn;
+		if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
+map_buffer_cached:
+			BUG_ON(lcn < 0);
+			bh->b_blocknr = lcn_block +
+					(cdelta << (vol->cluster_size_bits -
+					blocksize_bits)) +
+					(bh_cofs >> blocksize_bits);
+			set_buffer_mapped(bh);
+			/*
+			 * If the page is uptodate so is the buffer.  If the
+			 * buffer is fully outside the write, we ignore it if
+			 * it was already allocated and we mark it dirty so it
+			 * gets written out if we allocated it.  On the other
+			 * hand, if we allocated the buffer but we are not
+			 * marking it dirty we set buffer_new so we can do
+			 * error recovery.
+			 */
+			if (PageUptodate(page)) {
+				if (!buffer_uptodate(bh))
+					set_buffer_uptodate(bh);
+				if (unlikely(was_hole)) {
+					/* We allocated the buffer. */
+					unmap_underlying_metadata(bh->b_bdev,
+							bh->b_blocknr);
+					if (bh_end <= pos || bh_pos >= end)
+						mark_buffer_dirty(bh);
+					else
+						set_buffer_new(bh);
+				}
+				continue;
+			}
+			/* Page is _not_ uptodate. */
+			if (likely(!was_hole)) {
+				/*
+				 * Buffer was already allocated.  If it is not
+				 * uptodate and is only partially being written
+				 * to, we need to read it in before the write,
+				 * i.e. now.
+				 */
+				if (!buffer_uptodate(bh) && ((bh_pos < pos &&
+						bh_end > pos) ||
+						(bh_end > end &&
+						bh_end > end))) {
+					/*
+					 * If the buffer is fully or partially
+					 * within the initialized size, do an
+					 * actual read.  Otherwise, simply zero
+					 * the buffer.
+					 */
+					read_lock_irqsave(&ni->size_lock,
+							flags);
+					initialized_size = ni->initialized_size;
+					read_unlock_irqrestore(&ni->size_lock,
+							flags);
+					if (bh_pos < initialized_size) {
+						ntfs_submit_bh_for_read(bh);
+						*wait_bh++ = bh;
+					} else {
+						u8 *kaddr = kmap_atomic(page,
+								KM_USER0);
+						memset(kaddr + bh_offset(bh),
+								0, blocksize);
+						kunmap_atomic(kaddr, KM_USER0);
+						flush_dcache_page(page);
+						set_buffer_uptodate(bh);
+					}
+				}
+				continue;
+			}
+			/* We allocated the buffer. */
+			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			/*
+			 * If the buffer is fully outside the write, zero it,
+			 * set it uptodate, and mark it dirty so it gets
+			 * written out.  If it is partially being written to,
+			 * zero region surrounding the write but leave it to
+			 * commit write to do anything else.  Finally, if the
+			 * buffer is fully being overwritten, do nothing.
+			 */
+			if (bh_end <= pos || bh_pos >= end) {
+				if (!buffer_uptodate(bh)) {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+				mark_buffer_dirty(bh);
+				continue;
+			}
+			set_buffer_new(bh);
+			if (!buffer_uptodate(bh) &&
+					(bh_pos < pos || bh_end > end)) {
+				u8 *kaddr;
+				unsigned pofs;
+					
+				kaddr = kmap_atomic(page, KM_USER0);
+				if (bh_pos < pos) {
+					pofs = bh_pos & ~PAGE_CACHE_MASK;
+					memset(kaddr + pofs, 0, pos - bh_pos);
+				}
+				if (bh_end > end) {
+					pofs = end & ~PAGE_CACHE_MASK;
+					memset(kaddr + pofs, 0, bh_end - end);
+				}
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+			}
+			continue;
+		}
+		/*
+		 * Slow path: this is the first buffer in the cluster.  If it
+		 * is outside allocated size and is not uptodate, zero it and
+		 * set it uptodate.
+		 */
+		read_lock_irqsave(&ni->size_lock, flags);
+		initialized_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (bh_pos > initialized_size) {
+			if (PageUptodate(page)) {
+				if (!buffer_uptodate(bh))
+					set_buffer_uptodate(bh);
+			} else if (!buffer_uptodate(bh)) {
+				u8 *kaddr = kmap_atomic(page, KM_USER0);
+				memset(kaddr + bh_offset(bh), 0, blocksize);
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+				set_buffer_uptodate(bh);
+			}
+			continue;
+		}
+		is_retry = FALSE;
+		if (!rl) {
+			down_read(&ni->runlist.lock);
+retry_remap:
+			rl = ni->runlist.rl;
+		}
+		if (likely(rl != NULL)) {
+			/* Seek to element containing target cluster. */
+			while (rl->length && rl[1].vcn <= bh_cpos)
+				rl++;
+			lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
+			if (likely(lcn >= 0)) {
+				/*
+				 * Successful remap, setup the map cache and
+				 * use that to deal with the buffer.
+				 */
+				was_hole = FALSE;
+				vcn = bh_cpos;
+				vcn_len = rl[1].vcn - vcn;
+				lcn_block = lcn << (vol->cluster_size_bits -
+						blocksize_bits);
+				/*
+				 * If the number of remaining clusters in the
+				 * @pages is smaller or equal to the number of
+				 * cached clusters, unlock the runlist as the
+				 * map cache will be used from now on.
+				 */
+				if (likely(vcn + vcn_len >= cend)) {
+					if (rl_write_locked) {
+						up_write(&ni->runlist.lock);
+						rl_write_locked = FALSE;
+					} else
+						up_read(&ni->runlist.lock);
+					rl = NULL;
+				}
+				goto map_buffer_cached;
+			}
+		} else
+			lcn = LCN_RL_NOT_MAPPED;
+		/*
+		 * If it is not a hole and not out of bounds, the runlist is
+		 * probably unmapped so try to map it now.
+		 */
+		if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
+			if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
+				/* Attempt to map runlist. */
+				if (!rl_write_locked) {
+					/*
+					 * We need the runlist locked for
+					 * writing, so if it is locked for
+					 * reading relock it now and retry in
+					 * case it changed whilst we dropped
+					 * the lock.
+					 */
+					up_read(&ni->runlist.lock);
+					down_write(&ni->runlist.lock);
+					rl_write_locked = TRUE;
+					goto retry_remap;
+				}
+				err = ntfs_map_runlist_nolock(ni, bh_cpos,
+						NULL);
+				if (likely(!err)) {
+					is_retry = TRUE;
+					goto retry_remap;
+				}
+				/*
+				 * If @vcn is out of bounds, pretend @lcn is
+				 * LCN_ENOENT.  As long as the buffer is out
+				 * of bounds this will work fine.
+				 */
+				if (err == -ENOENT) {
+					lcn = LCN_ENOENT;
+					err = 0;
+					goto rl_not_mapped_enoent;
+				}
+			} else
+				err = -EIO;
+			/* Failed to map the buffer, even after retrying. */
+			bh->b_blocknr = -1;
+			ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
+					"attribute type 0x%x, vcn 0x%llx, "
+					"vcn offset 0x%x, because its "
+					"location on disk could not be "
+					"determined%s (error code %i).",
+					ni->mft_no, ni->type,
+					(unsigned long long)bh_cpos,
+					(unsigned)bh_pos &
+					vol->cluster_size_mask,
+					is_retry ? " even after retrying" : "",
+					err);
+			break;
+		}
+rl_not_mapped_enoent:
+		/*
+		 * The buffer is in a hole or out of bounds.  We need to fill
+		 * the hole, unless the buffer is in a cluster which is not
+		 * touched by the write, in which case we just leave the buffer
+		 * unmapped.  This can only happen when the cluster size is
+		 * less than the page cache size.
+		 */
+		if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+			bh_cend = (bh_end + vol->cluster_size - 1) >>
+					vol->cluster_size_bits;
+			if ((bh_cend <= cpos || bh_cpos >= cend)) {
+				bh->b_blocknr = -1;
+				/*
+				 * If the buffer is uptodate we skip it.  If it
+				 * is not but the page is uptodate, we can set
+				 * the buffer uptodate.  If the page is not
+				 * uptodate, we can clear the buffer and set it
+				 * uptodate.  Whether this is worthwhile is
+				 * debatable and this could be removed.
+				 */
+				if (PageUptodate(page)) {
+					if (!buffer_uptodate(bh))
+						set_buffer_uptodate(bh);
+				} else if (!buffer_uptodate(bh)) {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+				continue;
+			}
+		}
+		/*
+		 * Out of bounds buffer is invalid if it was not really out of
+		 * bounds.
+		 */
+		BUG_ON(lcn != LCN_HOLE);
+		/*
+		 * We need the runlist locked for writing, so if it is locked
+		 * for reading relock it now and retry in case it changed
+		 * whilst we dropped the lock.
+		 */
+		BUG_ON(!rl);
+		if (!rl_write_locked) {
+			up_read(&ni->runlist.lock);
+			down_write(&ni->runlist.lock);
+			rl_write_locked = TRUE;
+			goto retry_remap;
+		}
+		/* Find the previous last allocated cluster. */
+		BUG_ON(rl->lcn != LCN_HOLE);
+		lcn = -1;
+		rl2 = rl;
+		while (--rl2 >= ni->runlist.rl) {
+			if (rl2->lcn >= 0) {
+				lcn = rl2->lcn + rl2->length;
+				break;
+			}
+		}
+		rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
+				FALSE);
+		if (IS_ERR(rl2)) {
+			err = PTR_ERR(rl2);
+			ntfs_debug("Failed to allocate cluster, error code %i.",
+					err);
+			break;
+		}
+		lcn = rl2->lcn;
+		rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+		if (IS_ERR(rl)) {
+			err = PTR_ERR(rl);
+			if (err != -ENOMEM)
+				err = -EIO;
+			if (ntfs_cluster_free_from_rl(vol, rl2)) {
+				ntfs_error(vol->sb, "Failed to release "
+						"allocated cluster in error "
+						"code path.  Run chkdsk to "
+						"recover the lost cluster.");
+				NVolSetErrors(vol);
+			}
+			ntfs_free(rl2);
+			break;
+		}
+		ni->runlist.rl = rl;
+		status.runlist_merged = 1;
+		ntfs_debug("Allocated cluster, lcn 0x%llx.", lcn);
+		/* Map and lock the mft record and get the attribute record. */
+		if (!NInoAttr(ni))
+			base_ni = ni;
+		else
+			base_ni = ni->ext.base_ntfs_ino;
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m)) {
+			err = PTR_ERR(m);
+			break;
+		}
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			unmap_mft_record(base_ni);
+			break;
+		}
+		status.mft_attr_mapped = 1;
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			break;
+		}
+		m = ctx->mrec;
+		a = ctx->attr;
+		/*
+		 * Find the runlist element with which the attribute extent
+		 * starts.  Note, we cannot use the _attr_ version because we
+		 * have mapped the mft record.  That is ok because we know the
+		 * runlist fragment must be mapped already to have ever gotten
+		 * here, so we can just use the _rl_ version.
+		 */
+		vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+		rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
+		BUG_ON(!rl2);
+		BUG_ON(!rl2->length);
+		BUG_ON(rl2->lcn < LCN_HOLE);
+		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		/*
+		 * If @highest_vcn is zero, calculate the real highest_vcn
+		 * (which can really be zero).
+		 */
+		if (!highest_vcn)
+			highest_vcn = (sle64_to_cpu(
+					a->data.non_resident.allocated_size) >>
+					vol->cluster_size_bits) - 1;
+		/*
+		 * Determine the size of the mapping pairs array for the new
+		 * extent, i.e. the old extent with the hole filled.
+		 */
+		mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
+				highest_vcn);
+		if (unlikely(mp_size <= 0)) {
+			if (!(err = mp_size))
+				err = -EIO;
+			ntfs_debug("Failed to get size for mapping pairs "
+					"array, error code %i.", err);
+			break;
+		}
+		/*
+		 * Resize the attribute record to fit the new mapping pairs
+		 * array.
+		 */
+		attr_rec_len = le32_to_cpu(a->length);
+		err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset));
+		if (unlikely(err)) {
+			BUG_ON(err != -ENOSPC);
+			// TODO: Deal with this by using the current attribute
+			// and fill it with as much of the mapping pairs
+			// array as possible.  Then loop over each attribute
+			// extent rewriting the mapping pairs arrays as we go
+			// along and if when we reach the end we have not
+			// enough space, try to resize the last attribute
+			// extent and if even that fails, add a new attribute
+			// extent.
+			// We could also try to resize at each step in the hope
+			// that we will not need to rewrite every single extent.
+			// Note, we may need to decompress some extents to fill
+			// the runlist as we are walking the extents...
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record for the extended attribute "
+					"record.  This case is not "
+					"implemented yet.");
+			err = -EOPNOTSUPP;
+			break ;
+		}
+		status.mp_rebuilt = 1;
+		/*
+		 * Generate the mapping pairs array directly into the attribute
+		 * record.
+		 */
+		err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset),
+				mp_size, rl2, vcn, highest_vcn, NULL);
+		if (unlikely(err)) {
+			ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
+					"attribute type 0x%x, because building "
+					"the mapping pairs failed with error "
+					"code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			err = -EIO;
+			break;
+		}
+		/* Update the highest_vcn but only if it was not set. */
+		if (unlikely(!a->data.non_resident.highest_vcn))
+			a->data.non_resident.highest_vcn =
+					cpu_to_sle64(highest_vcn);
+		/*
+		 * If the attribute is sparse/compressed, update the compressed
+		 * size in the ntfs_inode structure and the attribute record.
+		 */
+		if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
+			/*
+			 * If we are not in the first attribute extent, switch
+			 * to it, but first ensure the changes will make it to
+			 * disk later.
+			 */
+			if (a->data.non_resident.lowest_vcn) {
+				flush_dcache_mft_record_page(ctx->ntfs_ino);
+				mark_mft_record_dirty(ctx->ntfs_ino);
+				ntfs_attr_reinit_search_ctx(ctx);
+				err = ntfs_attr_lookup(ni->type, ni->name,
+						ni->name_len, CASE_SENSITIVE,
+						0, NULL, 0, ctx);
+				if (unlikely(err)) {
+					status.attr_switched = 1;
+					break;
+				}
+				/* @m is not used any more so do not set it. */
+				a = ctx->attr;
+			}
+			write_lock_irqsave(&ni->size_lock, flags);
+			ni->itype.compressed.size += vol->cluster_size;
+			a->data.non_resident.compressed_size =
+					cpu_to_sle64(ni->itype.compressed.size);
+			write_unlock_irqrestore(&ni->size_lock, flags);
+		}
+		/* Ensure the changes make it to disk. */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+		/* Successfully filled the hole. */
+		status.runlist_merged = 0;
+		status.mft_attr_mapped = 0;
+		status.mp_rebuilt = 0;
+		/* Setup the map cache and use that to deal with the buffer. */
+		was_hole = TRUE;
+		vcn = bh_cpos;
+		vcn_len = 1;
+		lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
+		cdelta = 0;
+		/*
+		 * If the number of remaining clusters in the @pages is smaller
+		 * or equal to the number of cached clusters, unlock the
+		 * runlist as the map cache will be used from now on.
+		 */
+		if (likely(vcn + vcn_len >= cend)) {
+			up_write(&ni->runlist.lock);
+			rl_write_locked = FALSE;
+			rl = NULL;
+		}
+		goto map_buffer_cached;
+	} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+	/* If there are no errors, do the next page. */
+	if (likely(!err && ++u < nr_pages))
+		goto do_next_page;
+	/* If there are no errors, release the runlist lock if we took it. */
+	if (likely(!err)) {
+		if (unlikely(rl_write_locked)) {
+			up_write(&ni->runlist.lock);
+			rl_write_locked = FALSE;
+		} else if (unlikely(rl))
+			up_read(&ni->runlist.lock);
+		rl = NULL;
+	}
+	/* If we issued read requests, let them complete. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	while (wait_bh > wait) {
+		bh = *--wait_bh;
+		wait_on_buffer(bh);
+		if (likely(buffer_uptodate(bh))) {
+			page = bh->b_page;
+			bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+					bh_offset(bh);
+			/*
+			 * If the buffer overflows the initialized size, need
+			 * to zero the overflowing region.
+			 */
+			if (unlikely(bh_pos + blocksize > initialized_size)) {
+				u8 *kaddr;
+				int ofs = 0;
+
+				if (likely(bh_pos < initialized_size))
+					ofs = initialized_size - bh_pos;
+				kaddr = kmap_atomic(page, KM_USER0);
+				memset(kaddr + bh_offset(bh) + ofs, 0,
+						blocksize - ofs);
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+			}
+		} else /* if (unlikely(!buffer_uptodate(bh))) */
+			err = -EIO;
+	}
+	if (likely(!err)) {
+		/* Clear buffer_new on all buffers. */
+		u = 0;
+		do {
+			bh = head = page_buffers(pages[u]);
+			do {
+				if (buffer_new(bh))
+					clear_buffer_new(bh);
+			} while ((bh = bh->b_this_page) != head);
+		} while (++u < nr_pages);
+		ntfs_debug("Done.");
+		return err;
+	}
+	if (status.attr_switched) {
+		/* Get back to the attribute extent we modified. */
+		ntfs_attr_reinit_search_ctx(ctx);
+		if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
+			ntfs_error(vol->sb, "Failed to find required "
+					"attribute extent of attribute in "
+					"error code path.  Run chkdsk to "
+					"recover.");
+			write_lock_irqsave(&ni->size_lock, flags);
+			ni->itype.compressed.size += vol->cluster_size;
+			write_unlock_irqrestore(&ni->size_lock, flags);
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+			/*
+			 * The only thing that is now wrong is the compressed
+			 * size of the base attribute extent which chkdsk
+			 * should be able to fix.
+			 */
+			NVolSetErrors(vol);
+		} else {
+			m = ctx->mrec;
+			a = ctx->attr;
+			status.attr_switched = 0;
+		}
+	}
+	/*
+	 * If the runlist has been modified, need to restore it by punching a
+	 * hole into it and we then need to deallocate the on-disk cluster as
+	 * well.  Note, we only modify the runlist if we are able to generate a
+	 * new mapping pairs array, i.e. only when the mapped attribute extent
+	 * is not switched.
+	 */
+	if (status.runlist_merged && !status.attr_switched) {
+		BUG_ON(!rl_write_locked);
+		/* Make the file cluster we allocated sparse in the runlist. */
+		if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
+			ntfs_error(vol->sb, "Failed to punch hole into "
+					"attribute runlist in error code "
+					"path.  Run chkdsk to recover the "
+					"lost cluster.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			status.runlist_merged = 0;
+			/*
+			 * Deallocate the on-disk cluster we allocated but only
+			 * if we succeeded in punching its vcn out of the
+			 * runlist.
+			 */
+			down_write(&vol->lcnbmp_lock);
+			if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
+				ntfs_error(vol->sb, "Failed to release "
+						"allocated cluster in error "
+						"code path.  Run chkdsk to "
+						"recover the lost cluster.");
+				NVolSetErrors(vol);
+			}
+			up_write(&vol->lcnbmp_lock);
+		}
+	}
+	/*
+	 * Resize the attribute record to its old size and rebuild the mapping
+	 * pairs array.  Note, we only can do this if the runlist has been
+	 * restored to its old state which also implies that the mapped
+	 * attribute extent is not switched.
+	 */
+	if (status.mp_rebuilt && !status.runlist_merged) {
+		if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
+			ntfs_error(vol->sb, "Failed to restore attribute "
+					"record in error code path.  Run "
+					"chkdsk to recover.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			if (ntfs_mapping_pairs_build(vol, (u8*)a +
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), attr_rec_len -
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), ni->runlist.rl,
+					vcn, highest_vcn, NULL)) {
+				ntfs_error(vol->sb, "Failed to restore "
+						"mapping pairs array in error "
+						"code path.  Run chkdsk to "
+						"recover.");
+				make_bad_inode(vi);
+				make_bad_inode(VFS_I(base_ni));
+				NVolSetErrors(vol);
+			}
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+		}
+	}
+	/* Release the mft record and the attribute. */
+	if (status.mft_attr_mapped) {
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+	}
+	/* Release the runlist lock. */
+	if (rl_write_locked)
+		up_write(&ni->runlist.lock);
+	else if (rl)
+		up_read(&ni->runlist.lock);
+	/*
+	 * Zero out any newly allocated blocks to avoid exposing stale data.
+	 * If BH_New is set, we know that the block was newly allocated above
+	 * and that it has not been fully zeroed and marked dirty yet.
+	 */
+	nr_pages = u;
+	u = 0;
+	end = bh_cpos << vol->cluster_size_bits;
+	do {
+		page = pages[u];
+		bh = head = page_buffers(page);
+		do {
+			if (u == nr_pages &&
+					((s64)page->index << PAGE_CACHE_SHIFT) +
+					bh_offset(bh) >= end)
+				break;
+			if (!buffer_new(bh))
+				continue;
+			clear_buffer_new(bh);
+			if (!buffer_uptodate(bh)) {
+				if (PageUptodate(page))
+					set_buffer_uptodate(bh);
+				else {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+			}
+			mark_buffer_dirty(bh);
+		} while ((bh = bh->b_this_page) != head);
+	} while (++u <= nr_pages);
+	ntfs_error(vol->sb, "Failed.  Returning error code %i.", err);
+	return err;
+}
+
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were sucessfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static inline size_t ntfs_copy_from_user(struct page **pages,
+		unsigned nr_pages, unsigned ofs, const char __user *buf,
+		size_t bytes)
+{
+	struct page **last_page = pages + nr_pages;
+	char *kaddr;
+	size_t total = 0;
+	unsigned len;
+	int left;
+
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		left = __copy_from_user_inatomic(kaddr + ofs, buf, len);
+		kunmap_atomic(kaddr, KM_USER0);
+		if (unlikely(left)) {
+			/* Do it the slow way. */
+			kaddr = kmap(*pages);
+			left = __copy_from_user(kaddr + ofs, buf, len);
+			kunmap(*pages);
+			if (unlikely(left))
+				goto err_out;
+		}
+		total += len;
+		bytes -= len;
+		if (!bytes)
+			break;
+		buf += len;
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err_out:
+	total += len - left;
+	/* Zero the rest of the target like __copy_from_user(). */
+	while (++pages < last_page) {
+		bytes -= len;
+		if (!bytes)
+			break;
+		len = PAGE_CACHE_SIZE;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		memset(kaddr, 0, len);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	goto out;
+}
+
+static size_t __ntfs_copy_from_user_iovec(char *vaddr,
+		const struct iovec *iov, size_t iov_ofs, size_t bytes)
+{
+	size_t total = 0;
+
+	while (1) {
+		const char __user *buf = iov->iov_base + iov_ofs;
+		unsigned len;
+		size_t left;
+
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		left = __copy_from_user_inatomic(vaddr, buf, len);
+		total += len;
+		bytes -= len;
+		vaddr += len;
+		if (unlikely(left)) {
+			/*
+			 * Zero the rest of the target like __copy_from_user().
+			 */
+			memset(vaddr, 0, bytes);
+			total -= left;
+			break;
+		}
+		if (!bytes)
+			break;
+		iov++;
+		iov_ofs = 0;
+	}
+	return total;
+}
+
+static inline void ntfs_set_next_iovec(const struct iovec **iovp,
+		size_t *iov_ofsp, size_t bytes)
+{
+	const struct iovec *iov = *iovp;
+	size_t iov_ofs = *iov_ofsp;
+
+	while (bytes) {
+		unsigned len;
+
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		bytes -= len;
+		iov_ofs += len;
+		if (iov->iov_len == iov_ofs) {
+			iov++;
+			iov_ofs = 0;
+		}
+	}
+	*iovp = iov;
+	*iov_ofsp = iov_ofs;
+}
+
+/*
+ * This has the same side-effects and return value as ntfs_copy_from_user().
+ * The difference is that on a fault we need to memset the remainder of the
+ * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
+ * single-segment behaviour.
+ *
+ * We call the same helper (__ntfs_copy_from_user_iovec()) both when atomic and
+ * when not atomic.  This is ok because __ntfs_copy_from_user_iovec() calls
+ * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
+ * fact, the only difference between __copy_from_user_inatomic() and
+ * __copy_from_user() is that the latter calls might_sleep().  And on many
+ * architectures __copy_from_user_inatomic() is just defined to
+ * __copy_from_user() so it makes no difference at all on those architectures.
+ */
+static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
+		unsigned nr_pages, unsigned ofs, const struct iovec **iov,
+		size_t *iov_ofs, size_t bytes)
+{
+	struct page **last_page = pages + nr_pages;
+	char *kaddr;
+	size_t copied, len, total = 0;
+
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+				*iov, *iov_ofs, len);
+		kunmap_atomic(kaddr, KM_USER0);
+		if (unlikely(copied != len)) {
+			/* Do it the slow way. */
+			kaddr = kmap(*pages);
+			copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+					*iov, *iov_ofs, len);
+			kunmap(*pages);
+			if (unlikely(copied != len))
+				goto err_out;
+		}
+		total += len;
+		bytes -= len;
+		if (!bytes)
+			break;
+		ntfs_set_next_iovec(iov, iov_ofs, len);
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err_out:
+	total += copied;
+	/* Zero the rest of the target like __copy_from_user(). */
+	while (++pages < last_page) {
+		bytes -= len;
+		if (!bytes)
+			break;
+		len = PAGE_CACHE_SIZE;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		memset(kaddr, 0, len);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	goto out;
+}
+
+static inline void ntfs_flush_dcache_pages(struct page **pages,
+		unsigned nr_pages)
+{
+	BUG_ON(!nr_pages);
+	do {
+		/*
+		 * Warning: Do not do the decrement at the same time as the
+		 * call because flush_dcache_page() is a NULL macro on i386
+		 * and hence the decrement never happens.
+		 */
+		flush_dcache_page(pages[nr_pages]);
+	} while (--nr_pages > 0);
+}
+
+/**
+ * ntfs_commit_pages_after_non_resident_write - commit the received data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * See description of ntfs_commit_pages_after_write(), below.
+ */
+static inline int ntfs_commit_pages_after_non_resident_write(
+		struct page **pages, const unsigned nr_pages,
+		s64 pos, size_t bytes)
+{
+	s64 end, initialized_size;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni;
+	struct buffer_head *bh, *head;
+	ntfs_attr_search_ctx *ctx;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	unsigned long flags;
+	unsigned blocksize, u;
+	int err;
+
+	vi = pages[0]->mapping->host;
+	ni = NTFS_I(vi);
+	blocksize = 1 << vi->i_blkbits;
+	end = pos + bytes;
+	u = 0;
+	do {
+		s64 bh_pos;
+		struct page *page;
+		BOOL partial;
+
+		page = pages[u];
+		bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+		bh = head = page_buffers(page);
+		partial = FALSE;
+		do {
+			s64 bh_end;
+
+			bh_end = bh_pos + blocksize;
+			if (bh_end <= pos || bh_pos >= end) {
+				if (!buffer_uptodate(bh))
+					partial = TRUE;
+			} else {
+				set_buffer_uptodate(bh);
+				mark_buffer_dirty(bh);
+			}
+		} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+		/*
+		 * If all buffers are now uptodate but the page is not, set the
+		 * page uptodate.
+		 */
+		if (!partial && !PageUptodate(page))
+			SetPageUptodate(page);
+	} while (++u < nr_pages);
+	/*
+	 * Finally, if we do not need to update initialized_size or i_size we
+	 * are finished.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end <= initialized_size) {
+		ntfs_debug("Done.");
+		return 0;
+	}
+	/*
+	 * Update initialized_size/i_size as appropriate, both in the inode and
+	 * the mft record.
+	 */
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/* Map, pin, and lock the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	BUG_ON(!NInoNonResident(ni));
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	a = ctx->attr;
+	BUG_ON(!a->non_resident);
+	write_lock_irqsave(&ni->size_lock, flags);
+	BUG_ON(end > ni->allocated_size);
+	ni->initialized_size = end;
+	a->data.non_resident.initialized_size = cpu_to_sle64(end);
+	if (end > i_size_read(vi)) {
+		i_size_write(vi, end);
+		a->data.non_resident.data_size =
+				a->data.non_resident.initialized_size;
+	}
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	ntfs_debug("Done.");
+	return 0;
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
+			"code %i).", err);
+	if (err != -ENOMEM) {
+		NVolSetErrors(ni->vol);
+		make_bad_inode(VFS_I(base_ni));
+		make_bad_inode(vi);
+	}
+	return err;
+}
+
+/**
+ * ntfs_commit_pages_after_write - commit the received data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * This is called from ntfs_file_buffered_write() with i_sem held on the inode
+ * (@pages[0]->mapping->host).  There are @nr_pages pages in @pages which are
+ * locked but not kmap()ped.  The source data has already been copied into the
+ * @page.  ntfs_prepare_pages_for_non_resident_write() has been called before
+ * the data was copied (for non-resident attributes only) and it returned
+ * success.
+ *
+ * Need to set uptodate and mark dirty all buffers within the boundary of the
+ * write.  If all buffers in a page are uptodate we set the page uptodate, too.
+ *
+ * Setting the buffers dirty ensures that they get written out later when
+ * ntfs_writepage() is invoked by the VM.
+ *
+ * Finally, we need to update i_size and initialized_size as appropriate both
+ * in the inode and the mft record.
+ *
+ * This is modelled after fs/buffer.c::generic_commit_write(), which marks
+ * buffers uptodate and dirty, sets the page uptodate if all buffers in the
+ * page are uptodate, and updates i_size if the end of io is beyond i_size.  In
+ * that case, it also marks the inode dirty.
+ *
+ * If things have gone as outlined in
+ * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
+ * content modifications here for non-resident attributes.  For resident
+ * attributes we need to do the uptodate bringing here which we combine with
+ * the copying into the mft record which means we save one atomic kmap.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_commit_pages_after_write(struct page **pages,
+		const unsigned nr_pages, s64 pos, size_t bytes)
+{
+	s64 end, initialized_size;
+	loff_t i_size;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni;
+	struct page *page;
+	ntfs_attr_search_ctx *ctx;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	char *kattr, *kaddr;
+	unsigned long flags;
+	u32 attr_len;
+	int err;
+
+	BUG_ON(!nr_pages);
+	BUG_ON(!pages);
+	page = pages[0];
+	BUG_ON(!page);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%x.",
+			vi->i_ino, ni->type, page->index, nr_pages,
+			(long long)pos, bytes);
+	if (NInoNonResident(ni))
+		return ntfs_commit_pages_after_non_resident_write(pages,
+				nr_pages, pos, bytes);
+	BUG_ON(nr_pages > 1);
+	/*
+	 * Attribute is resident, implying it is not compressed, encrypted, or
+	 * sparse.
+	 */
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	BUG_ON(NInoNonResident(ni));
+	/* Map, pin, and lock the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	a = ctx->attr;
+	BUG_ON(a->non_resident);
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	i_size = i_size_read(vi);
+	BUG_ON(attr_len != i_size);
+	BUG_ON(pos > attr_len);
+	end = pos + bytes;
+	BUG_ON(end > le32_to_cpu(a->length) -
+			le16_to_cpu(a->data.resident.value_offset));
+	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+	kaddr = kmap_atomic(page, KM_USER0);
+	/* Copy the received data from the page to the mft record. */
+	memcpy(kattr + pos, kaddr + pos, bytes);
+	/* Update the attribute length if necessary. */
+	if (end > attr_len) {
+		attr_len = end;
+		a->data.resident.value_length = cpu_to_le32(attr_len);
+	}
+	/*
+	 * If the page is not uptodate, bring the out of bounds area(s)
+	 * uptodate by copying data from the mft record to the page.
+	 */
+	if (!PageUptodate(page)) {
+		if (pos > 0)
+			memcpy(kaddr, kattr, pos);
+		if (end < attr_len)
+			memcpy(kaddr + end, kattr + end, attr_len - end);
+		/* Zero the region outside the end of the attribute value. */
+		memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	}
+	kunmap_atomic(kaddr, KM_USER0);
+	/* Update initialized_size/i_size if necessary. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	BUG_ON(end > ni->allocated_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	BUG_ON(initialized_size != i_size);
+	if (end > initialized_size) {
+		unsigned long flags;
+
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->initialized_size = end;
+		i_size_write(vi, end);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+	}
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	ntfs_debug("Done.");
+	return 0;
+err_out:
+	if (err == -ENOMEM) {
+		ntfs_warning(vi->i_sb, "Error allocating memory required to "
+				"commit the write.");
+		if (PageUptodate(page)) {
+			ntfs_warning(vi->i_sb, "Page is uptodate, setting "
+					"dirty so the write will be retried "
+					"later on by the VM.");
+			/*
+			 * Put the page on mapping->dirty_pages, but leave its
+			 * buffers' dirty state as-is.
+			 */
+			__set_page_dirty_nobuffers(page);
+			err = 0;
+		} else
+			ntfs_error(vi->i_sb, "Page is not uptodate.  Written "
+					"data has been lost.");
+	} else {
+		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
+				"with error %i.", err);
+		NVolSetErrors(ni->vol);
+		make_bad_inode(VFS_I(base_ni));
+		make_bad_inode(vi);
+	}
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	return err;
+}
+
+/**
+ * ntfs_file_buffered_write -
+ *
+ * Locking: The vfs is holding ->i_sem on the inode.
+ */
+static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs,
+		loff_t pos, loff_t *ppos, size_t count)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *vi = mapping->host;
+	ntfs_inode *ni = NTFS_I(vi);
+	ntfs_volume *vol = ni->vol;
+	struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
+	struct page *cached_page = NULL;
+	char __user *buf = NULL;
+	s64 end, ll;
+	VCN last_vcn;
+	LCN lcn;
+	unsigned long flags;
+	size_t bytes, iov_ofs;
+	ssize_t status, written;
+	unsigned nr_pages;
+	int err;
+	struct pagevec lru_pvec;
+
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"pos 0x%llx, count 0x%lx.",
+			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)pos, (unsigned long)count);
+	if (unlikely(!count))
+		return 0;
+	BUG_ON(NInoMstProtected(ni));
+	/*
+	 * If the attribute is not an index root and it is encrypted or
+	 * compressed, we cannot write to it yet.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
+	 */
+	if (ni->type != AT_INDEX_ALLOCATION) {
+		/* If file is encrypted, deny access, just like NT4. */
+		if (NInoEncrypted(ni)) {
+			ntfs_debug("Denying write access to encrypted file.");
+			return -EACCES;
+		}
+		if (NInoCompressed(ni)) {
+			ntfs_error(vi->i_sb, "Writing to compressed files is "
+					"not implemented yet.  Sorry.");
+			return -EOPNOTSUPP;
+		}
+	}
+	/*
+	 * If a previous ntfs_truncate() failed, repeat it and abort if it
+	 * fails again.
+	 */
+	if (unlikely(NInoTruncateFailed(ni))) {
+		down_write(&vi->i_alloc_sem);
+		err = ntfs_truncate(vi);
+		up_write(&vi->i_alloc_sem);
+		if (err || NInoTruncateFailed(ni)) {
+			if (!err)
+				err = -EIO;
+			ntfs_error(vol->sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"ntfs_truncate() failed (error code "
+					"%i).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			return err;
+		}
+	}
+	/* The first byte after the write. */
+	end = pos + count;
+	/*
+	 * If the write goes beyond the allocated size, extend the allocation
+	 * to cover the whole of the write, rounded up to the nearest cluster.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end > ll) {
+		/* Extend the allocation without changing the data size. */
+		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+		if (likely(ll >= 0)) {
+			BUG_ON(pos >= ll);
+			/* If the extension was partial truncate the write. */
+			if (end > ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"the allocation was only "
+						"partially extended.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+				end = ll;
+				count = ll - pos;
+			}
+		} else {
+			err = ll;
+			read_lock_irqsave(&ni->size_lock, flags);
+			ll = ni->allocated_size;
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			/* Perform a partial write if possible or fail. */
+			if (pos < ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"extending the allocation "
+						"failed (error code %i).",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type), err);
+				end = ll;
+				count = ll - pos;
+			} else {
+				ntfs_error(vol->sb, "Cannot perform write to "
+						"inode 0x%lx, attribute type "
+						"0x%x, because extending the "
+						"allocation failed (error "
+						"code %i).", vi->i_ino,
+						(unsigned)
+						le32_to_cpu(ni->type), err);
+				return err;
+			}
+		}
+	}
+	pagevec_init(&lru_pvec, 0);
+	written = 0;
+	/*
+	 * If the write starts beyond the initialized size, extend it up to the
+	 * beginning of the write and initialize all non-sparse space between
+	 * the old initialized size and the new one.  This automatically also
+	 * increments the vfs inode->i_size to keep it above or equal to the
+	 * initialized_size.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (pos > ll) {
+		err = ntfs_attr_extend_initialized(ni, pos, &cached_page,
+				&lru_pvec);
+		if (err < 0) {
+			ntfs_error(vol->sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"extending the initialized size "
+					"failed (error code %i).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			status = err;
+			goto err_out;
+		}
+	}
+	/*
+	 * Determine the number of pages per cluster for non-resident
+	 * attributes.
+	 */
+	nr_pages = 1;
+	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
+		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+	/* Finally, perform the actual write. */
+	last_vcn = -1;
+	if (likely(nr_segs == 1))
+		buf = iov->iov_base;
+	else
+		iov_ofs = 0;	/* Offset in the current iovec. */
+	do {
+		VCN vcn;
+		pgoff_t idx, start_idx;
+		unsigned ofs, do_pages, u;
+		size_t copied;
+
+		start_idx = idx = pos >> PAGE_CACHE_SHIFT;
+		ofs = pos & ~PAGE_CACHE_MASK;
+		bytes = PAGE_CACHE_SIZE - ofs;
+		do_pages = 1;
+		if (nr_pages > 1) {
+			vcn = pos >> vol->cluster_size_bits;
+			if (vcn != last_vcn) {
+				last_vcn = vcn;
+				/*
+				 * Get the lcn of the vcn the write is in.  If
+				 * it is a hole, need to lock down all pages in
+				 * the cluster.
+				 */
+				down_read(&ni->runlist.lock);
+				lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
+						vol->cluster_size_bits, FALSE);
+				up_read(&ni->runlist.lock);
+				if (unlikely(lcn < LCN_HOLE)) {
+					status = -EIO;
+					if (lcn == LCN_ENOMEM)
+						status = -ENOMEM;
+					else
+						ntfs_error(vol->sb, "Cannot "
+							"perform write to "
+							"inode 0x%lx, "
+							"attribute type 0x%x, "
+							"because the attribute "
+							"is corrupt.",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type));
+					break;
+				}
+				if (lcn == LCN_HOLE) {
+					start_idx = (pos & ~(s64)
+							vol->cluster_size_mask)
+							>> PAGE_CACHE_SHIFT;
+					bytes = vol->cluster_size - (pos &
+							vol->cluster_size_mask);
+					do_pages = nr_pages;
+				}
+			}
+		}
+		if (bytes > count)
+			bytes = count;
+		/*
+		 * Bring in the user page(s) that we will copy from _first_.
+		 * Otherwise there is a nasty deadlock on copying from the same
+		 * page(s) as we are writing to, without it/them being marked
+		 * up-to-date.  Note, at present there is nothing to stop the
+		 * pages being swapped out between us bringing them into memory
+		 * and doing the actual copying.
+		 */
+		if (likely(nr_segs == 1))
+			ntfs_fault_in_pages_readable(buf, bytes);
+		else
+			ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+		/* Get and lock @do_pages starting at index @start_idx. */
+		status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
+				pages, &cached_page, &lru_pvec);
+		if (unlikely(status))
+			break;
+		/*
+		 * For non-resident attributes, we need to fill any holes with
+		 * actual clusters and ensure all bufferes are mapped.  We also
+		 * need to bring uptodate any buffers that are only partially
+		 * being written to.
+		 */
+		if (NInoNonResident(ni)) {
+			status = ntfs_prepare_pages_for_non_resident_write(
+					pages, do_pages, pos, bytes);
+			if (unlikely(status)) {
+				loff_t i_size;
+
+				do {
+					unlock_page(pages[--do_pages]);
+					page_cache_release(pages[do_pages]);
+				} while (do_pages);
+				/*
+				 * The write preparation may have instantiated
+				 * allocated space outside i_size.  Trim this
+				 * off again.  We can ignore any errors in this
+				 * case as we will just be waisting a bit of
+				 * allocated space, which is not a disaster.
+				 */
+				i_size = i_size_read(vi);
+				if (pos + bytes > i_size)
+					vmtruncate(vi, i_size);
+				break;
+			}
+		}
+		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+		if (likely(nr_segs == 1)) {
+			copied = ntfs_copy_from_user(pages + u, do_pages - u,
+					ofs, buf, bytes);
+			buf += copied;
+		} else
+			copied = ntfs_copy_from_user_iovec(pages + u,
+					do_pages - u, ofs, &iov, &iov_ofs,
+					bytes);
+		ntfs_flush_dcache_pages(pages + u, do_pages - u);
+		status = ntfs_commit_pages_after_write(pages, do_pages, pos,
+				bytes);
+		if (likely(!status)) {
+			written += copied;
+			count -= copied;
+			pos += copied;
+			if (unlikely(copied != bytes))
+				status = -EFAULT;
+		}
+		do {
+			unlock_page(pages[--do_pages]);
+			mark_page_accessed(pages[do_pages]);
+			page_cache_release(pages[do_pages]);
+		} while (do_pages);
+		if (unlikely(status))
+			break;
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+	} while (count);
+err_out:
+	*ppos = pos;
+	if (cached_page)
+		page_cache_release(cached_page);
+	/* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
+	if (likely(!status)) {
+		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
+			if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
+				status = generic_osync_inode(vi, mapping,
+						OSYNC_METADATA|OSYNC_DATA);
+		}
+  	}
+	pagevec_lru_add(&lru_pvec);
+	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
+			written ? "written" : "status", (unsigned long)written,
+			(long)status);
+	return written ? written : status;
+}
+
+/**
+ * ntfs_file_aio_write_nolock -
+ */
+static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos;
+	unsigned long seg;
+	size_t count;		/* after file limit checks */
+	ssize_t written, err;
+
+	count = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		const struct iovec *iv = &iov[seg];
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		count += iv->iov_len;
+		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+			continue;
+		if (!seg)
+			return -EFAULT;
+		nr_segs = seg;
+		count -= iv->iov_len;	/* This segment is no good */
+		break;
+	}
+	pos = *ppos;
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+	/* We can write back this queue in page reclaim. */
+	current->backing_dev_info = mapping->backing_dev_info;
+	written = 0;
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+	if (!count)
+		goto out;
+	err = remove_suid(file->f_dentry);
+	if (err)
+		goto out;
+	inode_update_time(inode, 1);
+	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
+			count);
+out:
+	current->backing_dev_info = NULL;
+	return written ? written : err;
+}
+
+/**
+ * ntfs_file_aio_write -
+ */
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
+		size_t count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	struct iovec local_iov = { .iov_base = (void __user *)buf,
+				   .iov_len = count };
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	down(&inode->i_sem);
+	ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+	up(&inode->i_sem);
+	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+		int err = sync_page_range(inode, mapping, pos, ret);
+		if (err < 0)
+			ret = err;
+	}
+	return ret;
+}
+
+/**
+ * ntfs_file_writev -
+ *
+ * Basically the same as generic_file_writev() except that it ends up calling
+ * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock().
+ */
+static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
+		unsigned long nr_segs, loff_t *ppos)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	down(&inode->i_sem);
+	init_sync_kiocb(&kiocb, file);
+	ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+	up(&inode->i_sem);
+	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+		int err = sync_page_range(inode, mapping, *ppos - ret, ret);
+		if (err < 0)
+			ret = err;
+	}
+	return ret;
+}
+
+/**
+ * ntfs_file_write - simple wrapper for ntfs_file_writev()
+ */
+static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct iovec local_iov = { .iov_base = (void __user *)buf,
+				   .iov_len = count };
+
+	return ntfs_file_writev(file, &local_iov, 1, ppos);
+}
+
 /**
  * ntfs_file_fsync - sync a file to disk
  * @filp:	file to be synced
@@ -113,39 +2296,39 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 #endif /* NTFS_RW */
 
 struct file_operations ntfs_file_ops = {
-	.llseek		= generic_file_llseek,	  /* Seek inside file. */
-	.read		= generic_file_read,	  /* Read from file. */
-	.aio_read	= generic_file_aio_read,  /* Async read from file. */
-	.readv		= generic_file_readv,	  /* Read from file. */
+	.llseek		= generic_file_llseek,	 /* Seek inside file. */
+	.read		= generic_file_read,	 /* Read from file. */
+	.aio_read	= generic_file_aio_read, /* Async read from file. */
+	.readv		= generic_file_readv,	 /* Read from file. */
 #ifdef NTFS_RW
-	.write		= generic_file_write,	  /* Write to file. */
-	.aio_write	= generic_file_aio_write, /* Async write to file. */
-	.writev		= generic_file_writev,	  /* Write to file. */
-	/*.release	= ,*/			  /* Last file is closed.  See
-						     fs/ext2/file.c::
-						     ext2_release_file() for
-						     how to use this to discard
-						     preallocated space for
-						     write opened files. */
-	.fsync		= ntfs_file_fsync,	  /* Sync a file to disk. */
-	/*.aio_fsync	= ,*/			  /* Sync all outstanding async
-						     i/o operations on a
-						     kiocb. */
+	.write		= ntfs_file_write,	 /* Write to file. */
+	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */
+	.writev		= ntfs_file_writev,	 /* Write to file. */
+	/*.release	= ,*/			 /* Last file is closed.  See
+						    fs/ext2/file.c::
+						    ext2_release_file() for
+						    how to use this to discard
+						    preallocated space for
+						    write opened files. */
+	.fsync		= ntfs_file_fsync,	 /* Sync a file to disk. */
+	/*.aio_fsync	= ,*/			 /* Sync all outstanding async
+						    i/o operations on a
+						    kiocb. */
 #endif /* NTFS_RW */
-	/*.ioctl	= ,*/			  /* Perform function on the
-						     mounted filesystem. */
-	.mmap		= generic_file_mmap,	  /* Mmap file. */
-	.open		= ntfs_file_open,	  /* Open file. */
-	.sendfile	= generic_file_sendfile,  /* Zero-copy data send with
-						     the data source being on
-						     the ntfs partition.  We
-						     do not need to care about
-						     the data destination. */
-	/*.sendpage	= ,*/			  /* Zero-copy data send with
-						     the data destination being
-						     on the ntfs partition.  We
-						     do not need to care about
-						     the data source. */
+	/*.ioctl	= ,*/			 /* Perform function on the
+						    mounted filesystem. */
+	.mmap		= generic_file_mmap,	 /* Mmap file. */
+	.open		= ntfs_file_open,	 /* Open file. */
+	.sendfile	= generic_file_sendfile, /* Zero-copy data send with
+						    the data source being on
+						    the ntfs partition.  We do
+						    not need to care about the
+						    data destination. */
+	/*.sendpage	= ,*/			 /* Zero-copy data send with
+						    the data destination being
+						    on the ntfs partition.  We
+						    do not need to care about
+						    the data source. */
 };
 
 struct inode_operations ntfs_file_inode_ops = {
-- 
cgit v1.2.3


From 7d0ffdb279105d9a87b447758ce4a634496abfd1 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 19 Oct 2005 12:21:19 +0100
Subject: NTFS: $EA attributes can be both resident non-resident.       Minor
 tidying.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/aops.c    |  5 ++---
 fs/ntfs/attrib.c  |  2 +-
 fs/ntfs/file.c    | 14 ++++++++++++++
 fs/ntfs/layout.h  | 27 +++++++++++++++++----------
 5 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 03015c7b236c..bc6ec16ad1f8 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -75,6 +75,7 @@ ToDo/Notes:
 	  for highly fragmented files, i.e. ones whose data attribute is split
 	  across multiple extents.   When such a case is encountered,
 	  EOPNOTSUPP is returned.
+	- $EA attributes can be both resident non-resident.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 8f23c60030c0..1c0a4315876a 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1391,8 +1391,7 @@ retry_writepage:
 		if (NInoEncrypted(ni)) {
 			unlock_page(page);
 			BUG_ON(ni->type != AT_DATA);
-			ntfs_debug("Denying write access to encrypted "
-					"file.");
+			ntfs_debug("Denying write access to encrypted file.");
 			return -EACCES;
 		}
 		/* Compressed data streams are handled in compress.c. */
@@ -1508,8 +1507,8 @@ retry_writepage:
 	/* Zero out of bounds area in the page cache page. */
 	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
 	kunmap_atomic(kaddr, KM_USER0);
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	flush_dcache_page(page);
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	/* We are done with the page. */
 	end_page_writeback(page);
 	/* Finally, mark the mft record dirty, so it gets written back. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 338e47144fc9..df2e2091f936 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1411,7 +1411,7 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
  */
 int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
 {
-	if (type == AT_INDEX_ALLOCATION || type == AT_EA)
+	if (type == AT_INDEX_ALLOCATION)
 		return -EPERM;
 	return 0;
 }
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index cf2a0e2330df..5fb341a16b52 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1857,10 +1857,24 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 	if (ni->type != AT_INDEX_ALLOCATION) {
 		/* If file is encrypted, deny access, just like NT4. */
 		if (NInoEncrypted(ni)) {
+			/*
+			 * Reminder for later: Encrypted files are _always_
+			 * non-resident so that the content can always be
+			 * encrypted.
+			 */
 			ntfs_debug("Denying write access to encrypted file.");
 			return -EACCES;
 		}
 		if (NInoCompressed(ni)) {
+			/* Only unnamed $DATA attribute can be compressed. */
+			BUG_ON(ni->type != AT_DATA);
+			BUG_ON(ni->name_len);
+			/*
+			 * Reminder for later: If resident, the data is not
+			 * actually compressed.  Only on the switch to non-
+			 * resident does compression kick in.  This is in
+			 * contrast to encrypted files (see above).
+			 */
 			ntfs_error(vi->i_sb, "Writing to compressed files is "
 					"not implemented yet.  Sorry.");
 			return -EOPNOTSUPP;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 5c248d404f05..71b25dab8199 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -1021,10 +1021,17 @@ enum {
 	FILE_NAME_POSIX		= 0x00,
 	/* This is the largest namespace. It is case sensitive and allows all
 	   Unicode characters except for: '\0' and '/'.  Beware that in
-	   WinNT/2k files which eg have the same name except for their case
-	   will not be distinguished by the standard utilities and thus a "del
-	   filename" will delete both "filename" and "fileName" without
-	   warning. */
+	   WinNT/2k/2003 by default files which eg have the same name except
+	   for their case will not be distinguished by the standard utilities
+	   and thus a "del filename" will delete both "filename" and "fileName"
+	   without warning.  However if for example Services For Unix (SFU) are
+	   installed and the case sensitive option was enabled at installation
+	   time, then you can create/access/delete such files.
+	   Note that even SFU places restrictions on the filenames beyond the
+	   '\0' and '/' and in particular the following set of characters is
+	   not allowed: '"', '/', '<', '>', '\'.  All other characters,
+	   including the ones no allowed in WIN32 namespace are allowed.
+	   Tested with SFU 3.5 (this is now free) running on Windows XP. */
 	FILE_NAME_WIN32		= 0x01,
 	/* The standard WinNT/2k NTFS long filenames. Case insensitive.  All
 	   Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\',
@@ -2375,20 +2382,20 @@ typedef u8 EA_FLAGS;
 /*
  * Attribute: Extended attribute (EA) (0xe0).
  *
- * NOTE: Always non-resident. (Is this true?)
+ * NOTE: Can be resident or non-resident.
  *
  * Like the attribute list and the index buffer list, the EA attribute value is
  * a sequence of EA_ATTR variable length records.
- *
- * FIXME: It appears weird that the EA name is not unicode. Is it true?
  */
 typedef struct {
 	le32 next_entry_offset;	/* Offset to the next EA_ATTR. */
 	EA_FLAGS flags;		/* Flags describing the EA. */
-	u8 ea_name_length;	/* Length of the name of the EA in bytes. */
+	u8 ea_name_length;	/* Length of the name of the EA in bytes
+				   excluding the '\0' byte terminator. */
 	le16 ea_value_length;	/* Byte size of the EA's value. */
-	u8 ea_name[0];		/* Name of the EA. */
-	u8 ea_value[0];		/* The value of the EA. Immediately follows
+	u8 ea_name[0];		/* Name of the EA.  Note this is ASCII, not
+				   Unicode and it is zero terminated. */
+	u8 ea_value[0];		/* The value of the EA.  Immediately follows
 				   the name. */
 } __attribute__ ((__packed__)) EA_ATTR;
 
-- 
cgit v1.2.3


From d5aeaef37dc9cb009ab5cb8abf325338d21d2b1a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 19 Oct 2005 12:23:10 +0100
Subject: NTFS: Fix serious data corruption issue when writing.       Many
 thanks to Alberto Patino for testing and reporting the data       corruption.
  And many apologies for corrupting his partition.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5fb341a16b52..a142bf3ba1a5 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -787,6 +787,7 @@ retry_remap:
 				vcn_len = rl[1].vcn - vcn;
 				lcn_block = lcn << (vol->cluster_size_bits -
 						blocksize_bits);
+				cdelta = 0;
 				/*
 				 * If the number of remaining clusters in the
 				 * @pages is smaller or equal to the number of
-- 
cgit v1.2.3


From d04bd1fb60252f30f4f41a56613ade48df130588 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 24 Oct 2005 08:41:24 +0100
Subject: NTFS: Use %z for size_t to fix compilation warnings.  (Andrew Morton)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 3 ++-
 fs/ntfs/file.c    | 4 ++--
 fs/ntfs/super.c   | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index bc6ec16ad1f8..2a76b1fbbfc0 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -75,7 +75,8 @@ ToDo/Notes:
 	  for highly fragmented files, i.e. ones whose data attribute is split
 	  across multiple extents.   When such a case is encountered,
 	  EOPNOTSUPP is returned.
-	- $EA attributes can be both resident non-resident.
+	- $EA attributes can be both resident and non-resident.
+	- Use %z for size_t to fix compilation warnings.  (Andrew Morton)
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index a142bf3ba1a5..cdedc84e1372 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -531,7 +531,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
 	ni = NTFS_I(vi);
 	vol = ni->vol;
 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
-			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%x.",
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
 			vi->i_ino, ni->type, pages[0]->index, nr_pages,
 			(long long)pos, bytes);
 	blocksize_bits = vi->i_blkbits;
@@ -1693,7 +1693,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
 	vi = page->mapping->host;
 	ni = NTFS_I(vi);
 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
-			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%x.",
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
 			vi->i_ino, ni->type, page->index, nr_pages,
 			(long long)pos, bytes);
 	if (NInoNonResident(ni))
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 453d0d51ea4b..6c16db9e1a8a 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1447,7 +1447,7 @@ not_enabled:
 	if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
 		ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
 				"attribute (size is 0x%llx but should be at "
-				"least 0x%x bytes).", i_size_read(tmp_ino),
+				"least 0x%zx bytes).", i_size_read(tmp_ino),
 				sizeof(USN_HEADER));
 		return FALSE;
 	}
-- 
cgit v1.2.3


From dda65b941f992ab10fda3d9f09539c68206b7114 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 24 Oct 2005 08:57:59 +0100
Subject: NTFS: Fix compilation warnings with gcc-4.0.2 on SUSE 10.0.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/attrib.c  |  2 +-
 fs/ntfs/file.c    | 23 ++++++++---------------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 2a76b1fbbfc0..dea742405161 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -77,6 +77,7 @@ ToDo/Notes:
 	  EOPNOTSUPP is returned.
 	- $EA attributes can be both resident and non-resident.
 	- Use %z for size_t to fix compilation warnings.  (Andrew Morton)
+	- Fix compilation warnings with gcc-4.0.2 on SUSE 10.0.
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index df2e2091f936..eda056bac256 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -91,7 +91,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
 	struct page *put_this_page = NULL;
 	int err = 0;
 	BOOL ctx_is_temporary, ctx_needs_reset;
-	ntfs_attr_search_ctx old_ctx;
+	ntfs_attr_search_ctx old_ctx = { NULL, };
 
 	ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
 			(unsigned long long)vcn);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index cdedc84e1372..cf3e6ced2d01 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -78,12 +78,8 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
  * Extend the initialized size of an attribute described by the ntfs inode @ni
  * to @new_init_size bytes.  This involves zeroing any non-sparse space between
  * the old initialized size and @new_init_size both in the page cache and on
- * disk (if relevant complete pages are zeroed in the page cache then these may
- * simply be marked dirty for later writeout).  There is one caveat and that is
- * that if any uptodate page cache pages between the old initialized size and
- * the smaller of @new_init_size and the file size (vfs inode->i_size) are in
- * memory, these need to be marked dirty without being zeroed since they could
- * be non-zero due to mmap() based writes.
+ * disk (if relevant complete pages are already uptodate in the page cache then
+ * these are simply marked dirty).
  *
  * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
  * in the resident attribute case, it is tied to the initialized size and, in
@@ -98,10 +94,10 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
  * with new data via mmap() based writes, so we cannot just zero it.  And since
  * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
  * is unspecified, we choose not to do zeroing and thus we do not need to touch
- * the page at all.  For a more detailed explanation see ntfs_truncate() which
- * is in fs/ntfs/inode.c.
+ * the page at all.  For a more detailed explanation see ntfs_truncate() in
+ * fs/ntfs/inode.c.
  *
- * @cached_page and @lru_pvec are just optimisations for dealing with multiple
+ * @cached_page and @lru_pvec are just optimizations for dealing with multiple
  * pages.
  *
  * Return 0 on success and -errno on error.  In the case that an error is
@@ -110,9 +106,8 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
  * this is the case, the necessary zeroing will also have happened and that all
  * metadata is self-consistent.
  *
- * Locking: This function locks the mft record of the base ntfs inode and
- * maintains the lock throughout execution of the function.  This is required
- * so that the initialized size of the attribute can be modified safely.
+ * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be
+ *	    held by the caller.
  */
 static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
 		struct page **cached_page, struct pagevec *lru_pvec)
@@ -1836,7 +1831,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 	VCN last_vcn;
 	LCN lcn;
 	unsigned long flags;
-	size_t bytes, iov_ofs;
+	size_t bytes, iov_ofs = 0;	/* Offset in the current iovec. */
 	ssize_t status, written;
 	unsigned nr_pages;
 	int err;
@@ -1988,8 +1983,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 	last_vcn = -1;
 	if (likely(nr_segs == 1))
 		buf = iov->iov_base;
-	else
-		iov_ofs = 0;	/* Offset in the current iovec. */
 	do {
 		VCN vcn;
 		pgoff_t idx, start_idx;
-- 
cgit v1.2.3


From c9c2009a4e915db17f32701d1f0535b400e61b58 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 24 Oct 2005 09:00:51 +0100
Subject: NTFS: Document extended attribute ($EA) NEED_EA flag.  (Based on
 libntfs       patch by Yura Pakhuchiy.)

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/layout.h  | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index dea742405161..50a7749cfca1 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -78,6 +78,8 @@ ToDo/Notes:
 	- $EA attributes can be both resident and non-resident.
 	- Use %z for size_t to fix compilation warnings.  (Andrew Morton)
 	- Fix compilation warnings with gcc-4.0.2 on SUSE 10.0.
+	- Document extended attribute ($EA) NEED_EA flag.  (Based on libntfs
+	  patch by Yura Pakhuchiy.)
 
 2.1.24 - Lots of bug fixes and support more clean journal states.
 
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 71b25dab8199..f5678d5d7919 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2374,7 +2374,9 @@ typedef struct {
  * Extended attribute flags (8-bit).
  */
 enum {
-	NEED_EA	= 0x80
+	NEED_EA	= 0x80		/* If set the file to which the EA belongs
+				   cannot be interpreted without understanding
+				   the associates extended attributes. */
 } __attribute__ ((__packed__));
 
 typedef u8 EA_FLAGS;
-- 
cgit v1.2.3


From d052d1beff706920e82c5d55006b08e256b5df09 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 29 Oct 2005 19:07:23 +0100
Subject: Create platform_device.h to contain all the platform device details.
 Convert everyone who uses platform_bus_type to include
 linux/platform_device.h.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/common/locomo.c                 |  2 +-
 arch/arm/common/sa1111.c                 |  2 +-
 arch/arm/common/scoop.c                  |  2 +-
 arch/arm/mach-aaec2000/core.c            |  2 +-
 arch/arm/mach-h720x/h7202-eval.c         |  2 +-
 arch/arm/mach-imx/generic.c              |  2 +-
 arch/arm/mach-imx/mx1ads.c               |  1 +
 arch/arm/mach-integrator/integrator_ap.c |  2 +-
 arch/arm/mach-integrator/integrator_cp.c |  2 +-
 arch/arm/mach-iop3xx/iop321-setup.c      |  2 +-
 arch/arm/mach-iop3xx/iop331-setup.c      |  2 +-
 arch/arm/mach-ixp2000/enp2611.c          |  2 +-
 arch/arm/mach-ixp2000/ixdp2x00.c         |  2 +-
 arch/arm/mach-ixp2000/ixdp2x01.c         |  2 +-
 arch/arm/mach-ixp4xx/common.c            |  1 +
 arch/arm/mach-lh7a40x/arch-lpd7a40x.c    |  2 +-
 arch/arm/mach-omap1/board-h2.c           |  2 +-
 arch/arm/mach-omap1/board-h3.c           |  2 +-
 arch/arm/mach-omap1/board-innovator.c    |  2 +-
 arch/arm/mach-omap1/board-netstar.c      |  2 +-
 arch/arm/mach-omap1/board-osk.c          |  2 +-
 arch/arm/mach-omap1/board-perseus2.c     |  2 +-
 arch/arm/mach-omap1/board-voiceblue.c    |  2 +-
 arch/arm/mach-omap1/devices.c            |  2 +-
 arch/arm/mach-pxa/corgi.c                |  2 +-
 arch/arm/mach-pxa/corgi_lcd.c            |  2 +-
 arch/arm/mach-pxa/corgi_ssp.c            |  2 +-
 arch/arm/mach-pxa/generic.c              |  2 +-
 arch/arm/mach-pxa/idp.c                  |  2 +-
 arch/arm/mach-pxa/lubbock.c              |  2 +-
 arch/arm/mach-pxa/mainstone.c            |  2 +-
 arch/arm/mach-pxa/poodle.c               |  2 +-
 arch/arm/mach-pxa/pxa27x.c               |  2 +-
 arch/arm/mach-pxa/spitz.c                |  2 +-
 arch/arm/mach-s3c2410/clock.c            |  2 +-
 arch/arm/mach-s3c2410/cpu.c              |  2 +-
 arch/arm/mach-s3c2410/devs.c             |  2 +-
 arch/arm/mach-s3c2410/devs.h             |  1 +
 arch/arm/mach-s3c2410/mach-anubis.c      |  2 +-
 arch/arm/mach-s3c2410/mach-bast.c        |  2 +-
 arch/arm/mach-s3c2410/mach-h1940.c       |  1 +
 arch/arm/mach-s3c2410/mach-n30.c         |  2 +-
 arch/arm/mach-s3c2410/mach-nexcoder.c    |  2 +-
 arch/arm/mach-s3c2410/mach-otom.c        |  2 +-
 arch/arm/mach-s3c2410/mach-rx3715.c      |  1 +
 arch/arm/mach-s3c2410/mach-smdk2410.c    |  1 +
 arch/arm/mach-s3c2410/mach-smdk2440.c    |  1 +
 arch/arm/mach-s3c2410/s3c2410.c          |  2 +-
 arch/arm/mach-s3c2410/s3c2440.c          |  2 +-
 arch/arm/mach-sa1100/badge4.c            |  2 +-
 arch/arm/mach-sa1100/cerf.c              |  2 +-
 arch/arm/mach-sa1100/collie.c            |  2 +-
 arch/arm/mach-sa1100/generic.c           |  1 +
 arch/arm/mach-sa1100/jornada720.c        |  2 +-
 arch/arm/mach-sa1100/neponset.c          |  2 +-
 arch/arm/mach-sa1100/pleb.c              |  2 +-
 arch/arm/mach-sa1100/simpad.c            |  2 +-
 arch/arm/mach-versatile/core.c           |  1 +
 arch/arm/plat-omap/usb.c                 |  2 +-
 arch/m32r/kernel/setup_m32700ut.c        |  2 +-
 arch/m32r/kernel/setup_mappi.c           |  2 +-
 arch/m32r/kernel/setup_mappi2.c          |  2 +-
 arch/m32r/kernel/setup_mappi3.c          |  2 +-
 arch/m32r/kernel/setup_opsput.c          |  2 +-
 arch/mips/au1000/common/platform.c       |  2 +-
 arch/ppc/platforms/4xx/ibm440ep.c        |  1 +
 arch/ppc/platforms/4xx/ibmstb4.c         |  1 +
 arch/ppc/platforms/4xx/redwood5.c        |  2 +-
 arch/ppc/platforms/4xx/redwood6.c        |  2 +-
 arch/ppc/platforms/chrp_pegasos_eth.c    |  2 +-
 arch/ppc/platforms/cpci690.c             |  1 +
 arch/ppc/platforms/ev64260.c             |  1 +
 arch/ppc/platforms/ev64360.c             |  1 +
 arch/ppc/platforms/hdpu.c                |  1 +
 arch/ppc/platforms/katana.c              |  1 +
 arch/ppc/platforms/radstone_ppc7d.c      |  1 +
 arch/ppc/syslib/mpc52xx_devices.c        |  1 +
 arch/ppc/syslib/mv64x60.c                |  1 +
 arch/ppc/syslib/pq2_devices.c            |  2 +-
 arch/sh/boards/superh/microdev/setup.c   |  2 +-
 arch/um/drivers/net_kern.c               |  1 +
 arch/um/drivers/ubd_kern.c               |  1 +
 arch/xtensa/platform-iss/network.c       |  1 +
 drivers/base/platform.c                  |  2 +-
 drivers/block/floppy.c                   |  2 +-
 drivers/char/s3c2410-rtc.c               |  2 +-
 drivers/char/sonypi.c                    |  1 +
 drivers/char/tb0219.c                    |  2 +-
 drivers/char/vr41xx_giu.c                |  2 +-
 drivers/char/vr41xx_rtc.c                |  2 +-
 drivers/char/watchdog/mpcore_wdt.c       |  2 +-
 drivers/char/watchdog/mv64x60_wdt.c      |  2 ++
 drivers/char/watchdog/s3c2410_wdt.c      |  2 +-
 drivers/eisa/virtual_root.c              |  2 +-
 drivers/firmware/dcdbas.c                |  2 +-
 drivers/firmware/dell_rbu.c              |  2 +-
 drivers/hwmon/hdaps.c                    |  2 +-
 drivers/i2c/busses/i2c-iop3xx.c          |  2 +-
 drivers/i2c/busses/i2c-isa.c             |  1 +
 drivers/i2c/busses/i2c-ixp2000.c         |  2 +-
 drivers/i2c/busses/i2c-ixp4xx.c          |  2 +-
 drivers/i2c/busses/i2c-mpc.c             |  2 ++
 drivers/i2c/busses/i2c-mv64xxx.c         |  2 ++
 drivers/i2c/busses/i2c-pxa.c             |  1 +
 drivers/i2c/busses/i2c-s3c2410.c         |  2 +-
 drivers/i2c/chips/isp1301_omap.c         |  2 +-
 drivers/i2c/i2c-core.c                   |  1 +
 drivers/i2c/i2c-dev.c                    |  1 +
 drivers/input/keyboard/corgikbd.c        |  2 +-
 drivers/input/keyboard/spitzkbd.c        |  2 +-
 drivers/input/serio/ct82c710.c           |  1 +
 drivers/input/serio/i8042.c              |  1 +
 drivers/input/serio/maceps2.c            |  2 +-
 drivers/input/serio/q40kbd.c             |  1 +
 drivers/input/serio/rpckbd.c             |  1 +
 drivers/input/touchscreen/corgi_ts.c     |  2 +-
 drivers/mfd/mcp-sa11x0.c                 |  2 +-
 drivers/misc/hdpuftrs/hdpu_cpustate.c    |  2 +-
 drivers/misc/hdpuftrs/hdpu_nexus.c       |  2 +-
 drivers/mmc/pxamci.c                     |  2 +-
 drivers/mmc/wbsd.c                       |  2 +-
 drivers/mtd/maps/bast-flash.c            |  2 +-
 drivers/mtd/maps/integrator-flash.c      |  2 +-
 drivers/mtd/maps/ixp2000.c               |  2 +-
 drivers/mtd/maps/ixp4xx.c                |  2 +-
 drivers/mtd/maps/omap_nor.c              |  2 +-
 drivers/mtd/maps/plat-ram.c              |  2 +-
 drivers/mtd/maps/sa1100-flash.c          |  2 +-
 drivers/mtd/nand/s3c2410.c               |  2 +-
 drivers/net/depca.c                      |  2 +-
 drivers/net/dm9000.c                     |  1 +
 drivers/net/gianfar.c                    |  2 +-
 drivers/net/gianfar_mii.c                |  1 +
 drivers/net/irda/pxaficp_ir.c            |  1 +
 drivers/net/irda/sa1100_ir.c             |  2 +-
 drivers/net/irda/smsc-ircc2.c            |  1 +
 drivers/net/jazzsonic.c                  |  2 +-
 drivers/net/macsonic.c                   |  2 +-
 drivers/net/mipsnet.c                    |  1 +
 drivers/net/mv643xx_eth.c                |  2 ++
 drivers/net/smc91x.c                     |  2 +-
 drivers/net/tokenring/proteon.c          |  1 +
 drivers/net/tokenring/skisa.c            |  1 +
 drivers/pcmcia/au1000_generic.c          |  2 +-
 drivers/pcmcia/hd64465_ss.c              |  2 +-
 drivers/pcmcia/i82365.c                  |  2 +-
 drivers/pcmcia/m32r_cfc.c                |  2 +-
 drivers/pcmcia/m32r_pcc.c                |  2 +-
 drivers/pcmcia/omap_cf.c                 |  2 +-
 drivers/pcmcia/pxa2xx_base.c             |  1 +
 drivers/pcmcia/pxa2xx_mainstone.c        |  2 +-
 drivers/pcmcia/pxa2xx_sharpsl.c          |  2 +-
 drivers/pcmcia/sa1100_generic.c          |  1 +
 drivers/pcmcia/tcic.c                    |  2 +-
 drivers/pcmcia/vrc4171_card.c            |  1 +
 drivers/scsi/hosts.c                     |  1 +
 drivers/serial/8250.c                    |  2 +-
 drivers/serial/imx.c                     |  2 +-
 drivers/serial/mpc52xx_uart.c            |  2 +-
 drivers/serial/mpsc.c                    |  2 ++
 drivers/serial/pxa.c                     |  2 +-
 drivers/serial/s3c2410.c                 |  2 +-
 drivers/serial/sa1100.c                  |  2 +-
 drivers/serial/vr41xx_siu.c              |  2 +-
 drivers/usb/gadget/dummy_hcd.c           |  2 +-
 drivers/usb/gadget/lh7a40x_udc.c         |  2 ++
 drivers/usb/gadget/omap_udc.c            |  2 +-
 drivers/usb/gadget/pxa2xx_udc.c          |  2 +-
 drivers/usb/host/isp116x-hcd.c           |  1 +
 drivers/usb/host/ohci-au1xxx.c           |  2 ++
 drivers/usb/host/ohci-lh7a404.c          |  2 ++
 drivers/usb/host/ohci-omap.c             |  2 ++
 drivers/usb/host/ohci-ppc-soc.c          |  2 ++
 drivers/usb/host/ohci-pxa27x.c           |  2 +-
 drivers/usb/host/ohci-s3c2410.c          |  2 ++
 drivers/usb/host/sl811-hcd.c             |  1 +
 drivers/usb/host/sl811_cs.c              |  1 +
 drivers/video/acornfb.c                  |  2 +-
 drivers/video/arcfb.c                    |  1 +
 drivers/video/backlight/corgi_bl.c       |  2 +-
 drivers/video/dnfb.c                     |  2 ++
 drivers/video/epson1355fb.c              |  2 ++
 drivers/video/gbefb.c                    |  2 +-
 drivers/video/imxfb.c                    |  2 +-
 drivers/video/pxafb.c                    |  2 +-
 drivers/video/q40fb.c                    |  1 +
 drivers/video/s1d13xxxfb.c               |  2 +-
 drivers/video/s3c2410fb.c                |  1 +
 drivers/video/sa1100fb.c                 |  2 +-
 drivers/video/sgivwfb.c                  |  2 ++
 drivers/video/vesafb.c                   |  2 ++
 drivers/video/vfb.c                      |  2 ++
 drivers/video/w100fb.c                   |  2 +-
 include/asm-ppc/ppc_sys.h                |  2 +-
 include/linux/device.h                   | 26 --------------------------
 include/linux/serial_8250.h              |  2 +-
 sound/arm/pxa2xx-ac97.c                  |  2 +-
 sound/core/init.c                        |  2 ++
 198 files changed, 214 insertions(+), 158 deletions(-)

diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 5cdb4122f057..ad55680726ed 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -22,7 +22,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 21e2a518ad3a..174aa86ee816 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -22,7 +22,7 @@
 #include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/dma-mapping.h>
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index e8356b76d7c6..68b06d16f253 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/io.h>
 #include <asm/hardware/scoop.h>
 
diff --git a/arch/arm/mach-aaec2000/core.c b/arch/arm/mach-aaec2000/core.c
index 0c53dab80905..4e706d9ad368 100644
--- a/arch/arm/mach-aaec2000/core.c
+++ b/arch/arm/mach-aaec2000/core.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/dma-mapping.h>
diff --git a/arch/arm/mach-h720x/h7202-eval.c b/arch/arm/mach-h720x/h7202-eval.c
index db9078ad008c..d75c8221d2a5 100644
--- a/arch/arm/mach-h720x/h7202-eval.c
+++ b/arch/arm/mach-h720x/h7202-eval.c
@@ -18,7 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/setup.h>
 #include <asm/types.h>
diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c
index cb14b0682cef..60e2361e98e8 100644
--- a/arch/arm/mach-imx/generic.c
+++ b/arch/arm/mach-imx/generic.c
@@ -22,7 +22,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  */
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/arch/arm/mach-imx/mx1ads.c b/arch/arm/mach-imx/mx1ads.c
index 4cbdc1fe04b1..708e1b3faa14 100644
--- a/arch/arm/mach-imx/mx1ads.c
+++ b/arch/arm/mach-imx/mx1ads.c
@@ -14,6 +14,7 @@
 
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <asm/system.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index f368b85f0447..1f9061ca7ef4 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -21,7 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sysdev.h>
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index aa34c58b96c4..93f7ccb22c27 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/string.h>
diff --git a/arch/arm/mach-iop3xx/iop321-setup.c b/arch/arm/mach-iop3xx/iop321-setup.c
index bb5091223b63..80770233b8d4 100644
--- a/arch/arm/mach-iop3xx/iop321-setup.c
+++ b/arch/arm/mach-iop3xx/iop321-setup.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/major.h>
 #include <linux/fs.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/serial_core.h>
diff --git a/arch/arm/mach-iop3xx/iop331-setup.c b/arch/arm/mach-iop3xx/iop331-setup.c
index a2533c3ab42f..53f60614498b 100644
--- a/arch/arm/mach-iop3xx/iop331-setup.c
+++ b/arch/arm/mach-iop3xx/iop331-setup.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/major.h>
 #include <linux/fs.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/serial_core.h>
diff --git a/arch/arm/mach-ixp2000/enp2611.c b/arch/arm/mach-ixp2000/enp2611.c
index 9aa54de44740..643f5e1c3d93 100644
--- a/arch/arm/mach-ixp2000/enp2611.c
+++ b/arch/arm/mach-ixp2000/enp2611.c
@@ -32,7 +32,7 @@
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/serial_core.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index 8b4a839b6279..05dfcb48c2b6 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -20,7 +20,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <linux/pci.h>
 #include <linux/ioport.h>
diff --git a/arch/arm/mach-ixp2000/ixdp2x01.c b/arch/arm/mach-ixp2000/ixdp2x01.c
index fee1d7b73503..b21249908ae4 100644
--- a/arch/arm/mach-ixp2000/ixdp2x01.c
+++ b/arch/arm/mach-ixp2000/ixdp2x01.c
@@ -29,7 +29,7 @@
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/serial_core.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 6c396447c4e0..f3c687cf0071 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -20,6 +20,7 @@
 #include <linux/serial.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
+#include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/bootmem.h>
 #include <linux/interrupt.h>
diff --git a/arch/arm/mach-lh7a40x/arch-lpd7a40x.c b/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
index a20eabc132b0..4eb962fdb3a8 100644
--- a/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
+++ b/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
@@ -10,7 +10,7 @@
 
 #include <linux/tty.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 
 #include <asm/hardware.h>
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index d46a70063b0c..4ee6bd8a50b8 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -21,7 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index 2798613696fa..fc824361430d 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -19,7 +19,7 @@
 #include <linux/init.h>
 #include <linux/major.h>
 #include <linux/kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/errno.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c
index fd9183ff2ed5..a2eac853b2da 100644
--- a/arch/arm/mach-omap1/board-innovator.c
+++ b/arch/arm/mach-omap1/board-innovator.c
@@ -18,7 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-omap1/board-netstar.c b/arch/arm/mach-omap1/board-netstar.c
index d904e643f5ec..c851c2e4dfcb 100644
--- a/arch/arm/mach-omap1/board-netstar.c
+++ b/arch/arm/mach-omap1/board-netstar.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index 21103df50415..a88524e7c315 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -28,7 +28,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index 2ba26e239108..354b157acb3a 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -13,7 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c
index bf30b1acda0b..3f018b296861 100644
--- a/arch/arm/mach-omap1/board-voiceblue.c
+++ b/arch/arm/mach-omap1/board-voiceblue.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c
index e8b3981444cd..3c5d901efeaa 100644
--- a/arch/arm/mach-omap1/devices.c
+++ b/arch/arm/mach-omap1/devices.c
@@ -13,7 +13,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/io.h>
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 60c8b9d8bb9c..247147f29b93 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
diff --git a/arch/arm/mach-pxa/corgi_lcd.c b/arch/arm/mach-pxa/corgi_lcd.c
index 370df113dc06..54162ba95414 100644
--- a/arch/arm/mach-pxa/corgi_lcd.c
+++ b/arch/arm/mach-pxa/corgi_lcd.c
@@ -17,7 +17,7 @@
 
 #include <linux/delay.h>
 #include <linux/kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <asm/arch/akita.h>
 #include <asm/arch/corgi.h>
diff --git a/arch/arm/mach-pxa/corgi_ssp.c b/arch/arm/mach-pxa/corgi_ssp.c
index 136c269db0b7..591e5f32dbec 100644
--- a/arch/arm/mach-pxa/corgi_ssp.c
+++ b/arch/arm/mach-pxa/corgi_ssp.c
@@ -15,7 +15,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/hardware.h>
 #include <asm/mach-types.h>
 
diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c
index 3248bc9b9495..afd5063b0ebe 100644
--- a/arch/arm/mach-pxa/generic.c
+++ b/arch/arm/mach-pxa/generic.c
@@ -20,7 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <linux/pm.h>
 
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 01a83ab09ac3..7de159e2ab42 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -18,7 +18,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/fb.h>
 
 #include <asm/setup.h>
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index beccf455f796..1f6857d7747d 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/sysdev.h>
 #include <linux/major.h>
 #include <linux/fb.h>
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index a48c64026e1f..887a8cb7b721 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index f25638810017..86326307ab9f 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -16,7 +16,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/fb.h>
 
 #include <asm/hardware.h>
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 09a5d593f04b..c722a9a91fcc 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -16,7 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/pm.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index d0ab428c2d7d..4182ddf330da 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/major.h>
 #include <linux/fs.h>
diff --git a/arch/arm/mach-s3c2410/clock.c b/arch/arm/mach-s3c2410/clock.c
index 8b3d5dc35de5..82e8253b1fa0 100644
--- a/arch/arm/mach-s3c2410/clock.c
+++ b/arch/arm/mach-s3c2410/clock.c
@@ -32,7 +32,7 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/sysdev.h>
 
 #include <linux/interrupt.h>
diff --git a/arch/arm/mach-s3c2410/cpu.c b/arch/arm/mach-s3c2410/cpu.c
index ca366e9e264d..687fe371369d 100644
--- a/arch/arm/mach-s3c2410/cpu.c
+++ b/arch/arm/mach-s3c2410/cpu.c
@@ -26,7 +26,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-s3c2410/devs.c b/arch/arm/mach-s3c2410/devs.c
index 08bc7d95a45d..f58406e6ef5a 100644
--- a/arch/arm/mach-s3c2410/devs.c
+++ b/arch/arm/mach-s3c2410/devs.c
@@ -24,7 +24,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/devs.h b/arch/arm/mach-s3c2410/devs.h
index d6328f96728b..52c4bab5c761 100644
--- a/arch/arm/mach-s3c2410/devs.h
+++ b/arch/arm/mach-s3c2410/devs.h
@@ -15,6 +15,7 @@
  *	10-Feb-2005 BJD	 Added camera from guillaume.gourat@nexvision.tv
 */
 #include <linux/config.h>
+#include <linux/platform_device.h>
 
 extern struct platform_device *s3c24xx_uart_devs[];
 
diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c
index 5ae80f4e3e67..8390b685c2b6 100644
--- a/arch/arm/mach-s3c2410/mach-anubis.c
+++ b/arch/arm/mach-s3c2410/mach-anubis.c
@@ -21,7 +21,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index c1b5c63ec24a..0b71c896bbd1 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -41,7 +41,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dm9000.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 7efeaaad2361..0aa8760598f7 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -34,6 +34,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 5c0f2b091f95..378d640ab00b 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -20,7 +20,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/kthread.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-s3c2410/mach-nexcoder.c b/arch/arm/mach-s3c2410/mach-nexcoder.c
index c22f8216032d..42b0eeff2e0f 100644
--- a/arch/arm/mach-s3c2410/mach-nexcoder.c
+++ b/arch/arm/mach-s3c2410/mach-nexcoder.c
@@ -19,7 +19,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <linux/mtd/map.h>
 
diff --git a/arch/arm/mach-s3c2410/mach-otom.c b/arch/arm/mach-s3c2410/mach-otom.c
index ad1459e402e2..a2eb9ed48fcd 100644
--- a/arch/arm/mach-s3c2410/mach-otom.c
+++ b/arch/arm/mach-s3c2410/mach-otom.c
@@ -15,7 +15,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/mach-rx3715.c b/arch/arm/mach-s3c2410/mach-rx3715.c
index 22d9e070fd68..8f2a90bf940b 100644
--- a/arch/arm/mach-s3c2410/mach-rx3715.c
+++ b/arch/arm/mach-s3c2410/mach-rx3715.c
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/tty.h>
 #include <linux/console.h>
+#include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 
diff --git a/arch/arm/mach-s3c2410/mach-smdk2410.c b/arch/arm/mach-s3c2410/mach-smdk2410.c
index 2eda55a6b678..2c91965ee1c8 100644
--- a/arch/arm/mach-s3c2410/mach-smdk2410.c
+++ b/arch/arm/mach-s3c2410/mach-smdk2410.c
@@ -38,6 +38,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/mach-smdk2440.c b/arch/arm/mach-s3c2410/mach-smdk2440.c
index 6950e61b7914..d666c621ad06 100644
--- a/arch/arm/mach-s3c2410/mach-smdk2440.c
+++ b/arch/arm/mach-s3c2410/mach-smdk2440.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index a8bf5ec82602..0a2013a76549 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -27,7 +27,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c
index 833fa36bce05..4d63e7133b48 100644
--- a/arch/arm/mach-s3c2410/s3c2440.c
+++ b/arch/arm/mach-s3c2410/s3c2440.c
@@ -26,7 +26,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/sysdev.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c
index c92cebff7f8e..edccd5eb06be 100644
--- a/arch/arm/mach-sa1100/badge4.c
+++ b/arch/arm/mach-sa1100/badge4.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/tty.h>
 #include <linux/mtd/mtd.h>
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 23cb74885275..508593722bc7 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 7fd6e29c36b7..522abc036d3a 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -21,7 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/timer.h>
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 93619497779c..976380bde417 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -17,6 +17,7 @@
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
+#include <linux/platform_device.h>
 
 #include <asm/div64.h>
 #include <asm/hardware.h>
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index 9c363bfcf310..9fb65cffa578 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 
 #include <asm/hardware.h>
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 052e4caedb89..69f1970646c6 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -8,7 +8,7 @@
 #include <linux/tty.h>
 #include <linux/ioport.h>
 #include <linux/serial_core.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include <asm/hardware.h>
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index e17b58fb9c9c..58c18f9e9b7b 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <linux/mtd/partitions.h>
 
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index cfb6658e5cdf..439ddc9b06d6 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -10,7 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/string.h> 
 #include <linux/pm.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 7e4bdd07f4af..a1ca46630dda 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 #include <linux/sysdev.h>
 #include <linux/interrupt.h>
 
diff --git a/arch/arm/plat-omap/usb.c b/arch/arm/plat-omap/usb.c
index 14a836d7ac25..205e2d0b826d 100644
--- a/arch/arm/plat-omap/usb.c
+++ b/arch/arm/plat-omap/usb.c
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/usb_otg.h>
 
 #include <asm/io.h>
diff --git a/arch/m32r/kernel/setup_m32700ut.c b/arch/m32r/kernel/setup_m32700ut.c
index 708634b685e4..cb76916b014d 100644
--- a/arch/m32r/kernel/setup_m32700ut.c
+++ b/arch/m32r/kernel/setup_m32700ut.c
@@ -15,7 +15,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/setup_mappi.c b/arch/m32r/kernel/setup_mappi.c
index 4e709809efc5..501d798cf050 100644
--- a/arch/m32r/kernel/setup_mappi.c
+++ b/arch/m32r/kernel/setup_mappi.c
@@ -11,7 +11,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/setup_mappi2.c b/arch/m32r/kernel/setup_mappi2.c
index a1d801598aa4..7f2db5bfd626 100644
--- a/arch/m32r/kernel/setup_mappi2.c
+++ b/arch/m32r/kernel/setup_mappi2.c
@@ -11,7 +11,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/setup_mappi3.c b/arch/m32r/kernel/setup_mappi3.c
index a76412e883e8..9c79341a7b45 100644
--- a/arch/m32r/kernel/setup_mappi3.c
+++ b/arch/m32r/kernel/setup_mappi3.c
@@ -11,7 +11,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/setup_opsput.c b/arch/m32r/kernel/setup_opsput.c
index d7b7ec6d30f8..1fbb140854e7 100644
--- a/arch/m32r/kernel/setup_opsput.c
+++ b/arch/m32r/kernel/setup_opsput.c
@@ -16,7 +16,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/m32r.h>
diff --git a/arch/mips/au1000/common/platform.c b/arch/mips/au1000/common/platform.c
index 0776b2db5641..3c778d0f58a6 100644
--- a/arch/mips/au1000/common/platform.c
+++ b/arch/mips/au1000/common/platform.c
@@ -7,7 +7,7 @@
  * License version 2.  This program is licensed "as is" without any
  * warranty of any kind, whether express or implied.
  */
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/resource.h>
diff --git a/arch/ppc/platforms/4xx/ibm440ep.c b/arch/ppc/platforms/4xx/ibm440ep.c
index 4712de8ff80f..65ac0b9c2d05 100644
--- a/arch/ppc/platforms/4xx/ibm440ep.c
+++ b/arch/ppc/platforms/4xx/ibm440ep.c
@@ -14,6 +14,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <platforms/4xx/ibm440ep.h>
 #include <asm/ocp.h>
 #include <asm/ppc4xx_pic.h>
diff --git a/arch/ppc/platforms/4xx/ibmstb4.c b/arch/ppc/platforms/4xx/ibmstb4.c
index d90627b68faa..7e33bb635443 100644
--- a/arch/ppc/platforms/4xx/ibmstb4.c
+++ b/arch/ppc/platforms/4xx/ibmstb4.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <asm/ocp.h>
 #include <asm/ppc4xx_pic.h>
 #include <platforms/4xx/ibmstb4.h>
diff --git a/arch/ppc/platforms/4xx/redwood5.c b/arch/ppc/platforms/4xx/redwood5.c
index bee8b4ac8afd..611ac861804d 100644
--- a/arch/ppc/platforms/4xx/redwood5.c
+++ b/arch/ppc/platforms/4xx/redwood5.c
@@ -14,7 +14,7 @@
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/ppc/platforms/4xx/redwood6.c b/arch/ppc/platforms/4xx/redwood6.c
index 8b1012994dfc..b13116691289 100644
--- a/arch/ppc/platforms/4xx/redwood6.c
+++ b/arch/ppc/platforms/4xx/redwood6.c
@@ -12,7 +12,7 @@
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <asm/io.h>
 #include <asm/ppc4xx_pic.h>
diff --git a/arch/ppc/platforms/chrp_pegasos_eth.c b/arch/ppc/platforms/chrp_pegasos_eth.c
index cad5bfa153b2..d1af11c73ea1 100644
--- a/arch/ppc/platforms/chrp_pegasos_eth.c
+++ b/arch/ppc/platforms/chrp_pegasos_eth.c
@@ -13,7 +13,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/mv643xx.h>
 #include <linux/pci.h>
 
diff --git a/arch/ppc/platforms/cpci690.c b/arch/ppc/platforms/cpci690.c
index f64ac2acb603..6ca7bcac9474 100644
--- a/arch/ppc/platforms/cpci690.c
+++ b/arch/ppc/platforms/cpci690.c
@@ -21,6 +21,7 @@
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
 #include <linux/mv643xx.h>
+#include <linux/platform_device.h>
 #include <asm/bootinfo.h>
 #include <asm/machdep.h>
 #include <asm/todc.h>
diff --git a/arch/ppc/platforms/ev64260.c b/arch/ppc/platforms/ev64260.c
index aa50637a5cfb..32358b3fb236 100644
--- a/arch/ppc/platforms/ev64260.c
+++ b/arch/ppc/platforms/ev64260.c
@@ -33,6 +33,7 @@
 #include <linux/console.h>
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
+#include <linux/platform_device.h>
 #if !defined(CONFIG_SERIAL_MPSC_CONSOLE)
 #include <linux/serial.h>
 #include <linux/tty.h>
diff --git a/arch/ppc/platforms/ev64360.c b/arch/ppc/platforms/ev64360.c
index 9811a8a52c25..4e6cc64b3efd 100644
--- a/arch/ppc/platforms/ev64360.c
+++ b/arch/ppc/platforms/ev64360.c
@@ -25,6 +25,7 @@
 #include <linux/bootmem.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx.h>
+#include <linux/platform_device.h>
 #ifdef CONFIG_BOOTIMG
 #include <linux/bootimg.h>
 #endif
diff --git a/arch/ppc/platforms/hdpu.c b/arch/ppc/platforms/hdpu.c
index ff3796860123..0f07e963de3c 100644
--- a/arch/ppc/platforms/hdpu.c
+++ b/arch/ppc/platforms/hdpu.c
@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/ide.h>
 #include <linux/seq_file.h>
+#include <linux/platform_device.h>
 
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/katana.c b/arch/ppc/platforms/katana.c
index 2b53afae0e9c..beb617141456 100644
--- a/arch/ppc/platforms/katana.c
+++ b/arch/ppc/platforms/katana.c
@@ -29,6 +29,7 @@
 #include <linux/seq_file.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx.h>
+#include <linux/platform_device.h>
 #ifdef CONFIG_BOOTIMG
 #include <linux/bootimg.h>
 #endif
diff --git a/arch/ppc/platforms/radstone_ppc7d.c b/arch/ppc/platforms/radstone_ppc7d.c
index 0376c8cff5d1..7e65b7f1f626 100644
--- a/arch/ppc/platforms/radstone_ppc7d.c
+++ b/arch/ppc/platforms/radstone_ppc7d.c
@@ -40,6 +40,7 @@
 #include <linux/serial_core.h>
 #include <linux/mv643xx.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
diff --git a/arch/ppc/syslib/mpc52xx_devices.c b/arch/ppc/syslib/mpc52xx_devices.c
index ad5182efca1d..da3c74bfdc92 100644
--- a/arch/ppc/syslib/mpc52xx_devices.c
+++ b/arch/ppc/syslib/mpc52xx_devices.c
@@ -15,6 +15,7 @@
 
 #include <linux/fsl_devices.h>
 #include <linux/resource.h>
+#include <linux/platform_device.h>
 #include <asm/mpc52xx.h>
 #include <asm/ppc_sys.h>
 
diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c
index 4849850a59ed..1227521c0da2 100644
--- a/arch/ppc/syslib/mv64x60.c
+++ b/arch/ppc/syslib/mv64x60.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/mv643xx.h>
+#include <linux/platform_device.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/arch/ppc/syslib/pq2_devices.c b/arch/ppc/syslib/pq2_devices.c
index 1d3869768f96..61668aad86e2 100644
--- a/arch/ppc/syslib/pq2_devices.c
+++ b/arch/ppc/syslib/pq2_devices.c
@@ -13,7 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <asm/cpm2.h>
 #include <asm/irq.h>
diff --git a/arch/sh/boards/superh/microdev/setup.c b/arch/sh/boards/superh/microdev/setup.c
index c18919941ec0..1c1d65fb12df 100644
--- a/arch/sh/boards/superh/microdev/setup.c
+++ b/arch/sh/boards/superh/microdev/setup.c
@@ -13,7 +13,7 @@
 
 #include <linux/config.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <asm/io.h>
 #include <asm/mach/irq.h>
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1495007bf6c0..721e2601a75d 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -20,6 +20,7 @@
 #include "linux/ctype.h"
 #include "linux/bootmem.h"
 #include "linux/ethtool.h"
+#include "linux/platform_device.h"
 #include "asm/uaccess.h"
 #include "user_util.h"
 #include "kern_util.h"
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f73134333f64..b2c86257b0f8 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -35,6 +35,7 @@
 #include "linux/blkpg.h"
 #include "linux/genhd.h"
 #include "linux/spinlock.h"
+#include "linux/platform_device.h"
 #include "asm/segment.h"
 #include "asm/uaccess.h"
 #include "asm/irq.h"
diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c
index 498d7dced1f4..0682ffd38175 100644
--- a/arch/xtensa/platform-iss/network.c
+++ b/arch/xtensa/platform-iss/network.c
@@ -33,6 +33,7 @@
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/timer.h>
+#include <linux/platform_device.h>
 
 #include <xtensa/simcall.h>
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 75ce8711bca5..95f2af322c8f 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -10,7 +10,7 @@
  * information.
  */
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 00895477155e..5eadbb9d4d71 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -177,7 +177,7 @@ static int print_unex = 1;
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/devfs_fs_kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/buffer_head.h>	/* for invalidate_buffers() */
 
 /*
diff --git a/drivers/char/s3c2410-rtc.c b/drivers/char/s3c2410-rtc.c
index 887b8b2d7882..d724c0de4f28 100644
--- a/drivers/char/s3c2410-rtc.c
+++ b/drivers/char/s3c2410-rtc.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/rtc.h>
 #include <linux/bcd.h>
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index f86c15587238..d05067dcea01 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -48,6 +48,7 @@
 #include <linux/dmi.h>
 #include <linux/err.h>
 #include <linux/kfifo.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index eb7058cbf015..24355b23b2ca 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -17,7 +17,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 683278bc5241..94641085faf8 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -19,7 +19,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
diff --git a/drivers/char/vr41xx_rtc.c b/drivers/char/vr41xx_rtc.c
index a6dbe4da030c..5e3292df69d8 100644
--- a/drivers/char/vr41xx_rtc.c
+++ b/drivers/char/vr41xx_rtc.c
@@ -17,7 +17,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c
index 75ca84ed4adf..47a5f6ab4879 100644
--- a/drivers/char/watchdog/mpcore_wdt.c
+++ b/drivers/char/watchdog/mpcore_wdt.c
@@ -29,7 +29,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware/arm_twd.h>
 #include <asm/uaccess.h>
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c
index 6d3ff0836c44..04e0d7e9680d 100644
--- a/drivers/char/watchdog/mv64x60_wdt.c
+++ b/drivers/char/watchdog/mv64x60_wdt.c
@@ -22,6 +22,8 @@
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/watchdog.h>
+#include <linux/platform_device.h>
+
 #include <asm/mv64x60.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c
index b732020acadb..e7e20a6d64b0 100644
--- a/drivers/char/watchdog/s3c2410_wdt.c
+++ b/drivers/char/watchdog/s3c2410_wdt.c
@@ -44,7 +44,7 @@
 #include <linux/watchdog.h>
 #include <linux/fs.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/eisa/virtual_root.c b/drivers/eisa/virtual_root.c
index 15677f20bd85..0f97a0cb0ff4 100644
--- a/drivers/eisa/virtual_root.c
+++ b/drivers/eisa/virtual_root.c
@@ -9,7 +9,7 @@
 
 #include <linux/config.h>
 #include <linux/kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/eisa.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 955537fe9958..8ed6ddbb9c5d 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -20,7 +20,7 @@
  *  GNU General Public License for more details.
  */
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 4f4ba9b6d182..125929c9048f 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -41,7 +41,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/blkdev.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/moduleparam.h>
 #include <linux/firmware.h>
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index 0015da5668a1..1e5dfc7805e2 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -27,7 +27,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index 9888fae1f37a..13752bcb2afd 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -35,7 +35,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/i2c.h>
 
 #include <asm/io.h>
diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c
index 4fdc02411609..03672c9ca409 100644
--- a/drivers/i2c/busses/i2c-isa.c
+++ b/drivers/i2c/busses/i2c-isa.c
@@ -38,6 +38,7 @@
 #include <linux/errno.h>
 #include <linux/i2c.h>
 #include <linux/i2c-isa.h>
+#include <linux/platform_device.h>
 
 static u32 isa_func(struct i2c_adapter *adapter);
 
diff --git a/drivers/i2c/busses/i2c-ixp2000.c b/drivers/i2c/busses/i2c-ixp2000.c
index 42016ee6ef13..64552a376f2d 100644
--- a/drivers/i2c/busses/i2c-ixp2000.c
+++ b/drivers/i2c/busses/i2c-ixp2000.c
@@ -28,7 +28,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
diff --git a/drivers/i2c/busses/i2c-ixp4xx.c b/drivers/i2c/busses/i2c-ixp4xx.c
index 69303ab65e04..cc652c350814 100644
--- a/drivers/i2c/busses/i2c-ixp4xx.c
+++ b/drivers/i2c/busses/i2c-ixp4xx.c
@@ -28,7 +28,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 8491633005b8..65b939a059e9 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -19,6 +19,8 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/platform_device.h>
+
 #include <asm/io.h>
 #include <linux/fsl_devices.h>
 #include <linux/i2c.h>
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index d0d2a6f1386e..6b48027b2ee3 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -17,6 +17,8 @@
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/mv643xx.h>
+#include <linux/platform_device.h>
+
 #include <asm/io.h>
 
 /* Register defines */
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 44b595d90a4a..67ccbea24ba4 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -30,6 +30,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/i2c-pxa.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 6ced28e90070..a1268e534254 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -33,7 +33,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/drivers/i2c/chips/isp1301_omap.c b/drivers/i2c/chips/isp1301_omap.c
index eaa4742e04fa..9dbb72fffbe2 100644
--- a/drivers/i2c/chips/isp1301_omap.c
+++ b/drivers/i2c/chips/isp1301_omap.c
@@ -27,7 +27,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/usb_ch9.h>
 #include <linux/usb_gadget.h>
 #include <linux/usb.h>
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 02e335a04f09..82ea1b7ec914 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/idr.h>
 #include <linux/seq_file.h>
+#include <linux/platform_device.h>
 #include <asm/uaccess.h>
 
 
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index ea14c8f1c82b..8af0bd1424d2 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/i2c-dev.h>
+#include <linux/platform_device.h>
 #include <asm/uaccess.h>
 
 static struct i2c_client i2cdev_client_template;
diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c
index 3210d298b3bc..d00d14bb637a 100644
--- a/drivers/input/keyboard/corgikbd.c
+++ b/drivers/input/keyboard/corgikbd.c
@@ -12,7 +12,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c
index cee9c734a048..0fa38a559cdf 100644
--- a/drivers/input/keyboard/spitzkbd.c
+++ b/drivers/input/keyboard/spitzkbd.c
@@ -12,7 +12,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c
index dd0f5bd90241..4da6c86b5d76 100644
--- a/drivers/input/serio/ct82c710.c
+++ b/drivers/input/serio/ct82c710.c
@@ -37,6 +37,7 @@
 #include <linux/serio.h>
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 4bc40f159996..01e186422021 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -20,6 +20,7 @@
 #include <linux/serio.h>
 #include <linux/err.h>
 #include <linux/rcupdate.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 
diff --git a/drivers/input/serio/maceps2.c b/drivers/input/serio/maceps2.c
index 9880fc145d90..d857f7081adb 100644
--- a/drivers/input/serio/maceps2.c
+++ b/drivers/input/serio/maceps2.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/err.h>
diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index 46093c507988..b44d255596c2 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c
@@ -37,6 +37,7 @@
 #include <linux/interrupt.h>
 #include <linux/err.h>
 #include <linux/bitops.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c
index 106f5eefd89a..52c49258f8a4 100644
--- a/drivers/input/serio/rpckbd.c
+++ b/drivers/input/serio/rpckbd.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/serio.h>
 #include <linux/err.h>
+#include <linux/platform_device.h>
 
 #include <asm/irq.h>
 #include <asm/hardware.h>
diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c
index 0ba3e6562bff..15e88eeae8d6 100644
--- a/drivers/input/touchscreen/corgi_ts.c
+++ b/drivers/input/touchscreen/corgi_ts.c
@@ -11,7 +11,7 @@
 
 
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 720e7a326308..7daa0ed7331c 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -18,7 +18,7 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/dma.h>
 #include <asm/hardware.h>
diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c
index 46de5c940555..9c4dd682ac74 100644
--- a/drivers/misc/hdpuftrs/hdpu_cpustate.c
+++ b/drivers/misc/hdpuftrs/hdpu_cpustate.c
@@ -21,7 +21,7 @@
 #include <linux/miscdevice.h>
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/uaccess.h>
 #include <linux/hdpu_features.h>
 
diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c
index c203b27269ea..165f3405df27 100644
--- a/drivers/misc/hdpuftrs/hdpu_nexus.c
+++ b/drivers/misc/hdpuftrs/hdpu_nexus.c
@@ -21,7 +21,7 @@
 #include <linux/hdpu_features.h>
 #include <linux/pci.h>
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 static int hdpu_nexus_probe(struct device *ddev);
 static int hdpu_nexus_remove(struct device *ddev);
diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c
index 8eba373d42d7..4da4a98bd590 100644
--- a/drivers/mmc/pxamci.c
+++ b/drivers/mmc/pxamci.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c
index 3ace875decc4..942668e93a74 100644
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -26,7 +26,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
diff --git a/drivers/mtd/maps/bast-flash.c b/drivers/mtd/maps/bast-flash.c
index 0ba0ff7d43b9..5f248ebe68e0 100644
--- a/drivers/mtd/maps/bast-flash.c
+++ b/drivers/mtd/maps/bast-flash.c
@@ -32,7 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c
index e39a98a0171c..d14a0185b8f4 100644
--- a/drivers/mtd/maps/integrator-flash.c
+++ b/drivers/mtd/maps/integrator-flash.c
@@ -32,7 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index a9f86c7fbd52..6815baee89d7 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -26,7 +26,7 @@
 #include <linux/mtd/map.h>
 #include <linux/mtd/partitions.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/hardware.h>
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 3fcc32884074..06e1c7fffed3 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -24,7 +24,7 @@
 #include <linux/mtd/map.h>
 #include <linux/mtd/partitions.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/io.h>
 #include <asm/mach/flash.h>
 
diff --git a/drivers/mtd/maps/omap_nor.c b/drivers/mtd/maps/omap_nor.c
index b17bca657daf..9c9f4116e50a 100644
--- a/drivers/mtd/maps/omap_nor.c
+++ b/drivers/mtd/maps/omap_nor.c
@@ -30,7 +30,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 118b04544cad..e751e05fcc65 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -29,7 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 6a8e0caf9fdc..66b4c2780adc 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/err.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index b47ebcb31e0f..bf2325df80c7 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -48,7 +48,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index c4aa5fe2840e..4d26e5e7d18b 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -254,7 +254,7 @@
 #include <linux/unistd.h>
 #include <linux/ctype.h>
 #include <linux/moduleparam.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/bitops.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index abce1f730d00..c0af6fb1fbba 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -66,6 +66,7 @@
 #include <linux/mii.h>
 #include <linux/dm9000.h>
 #include <linux/delay.h>
+#include <linux/platform_device.h>
 
 #include <asm/delay.h>
 #include <asm/irq.h>
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ae5a2ed3b264..962580f2c4ab 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -81,7 +81,7 @@
 #include <linux/if_vlan.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c
index 1eca1dbca7f1..5a74d3d3dbe1 100644
--- a/drivers/net/gianfar_mii.c
+++ b/drivers/net/gianfar_mii.c
@@ -33,6 +33,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/version.h>
+#include <linux/platform_device.h>
 #include <asm/ocp.h>
 #include <linux/crc32.h>
 #include <linux/mii.h>
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index aef80f5e7c9c..9571145c2090 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -22,6 +22,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 #include <linux/pm.h>
 
 #include <net/irda/irda.h>
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 06883309916d..76e0b9fb5e96 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -29,7 +29,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <net/irda/irda.h>
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index bbac720cca63..424515d35932 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -53,6 +53,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/serial_reg.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c
index 8423cb6875f0..a74a5cfaf5bc 100644
--- a/drivers/net/jazzsonic.c
+++ b/drivers/net/jazzsonic.c
@@ -33,7 +33,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/bootinfo.h>
diff --git a/drivers/net/macsonic.c b/drivers/net/macsonic.c
index 405e18365ede..e9c999d7eb39 100644
--- a/drivers/net/macsonic.c
+++ b/drivers/net/macsonic.c
@@ -47,7 +47,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/bootinfo.h>
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index f79f7ee72ab8..bbffb585b3b3 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/etherdevice.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <asm/io.h>
 #include <asm/mips-boards/simint.h>
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 25c9a99c377b..6fe948c10e72 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -39,6 +39,8 @@
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/ethtool.h>
+#include <linux/platform_device.h>
+
 #include <asm/io.h>
 #include <asm/types.h>
 #include <asm/pgtable.h>
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 0ddaa611cc61..901c960d342a 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -77,7 +77,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/ioport.h>
 #include <linux/crc32.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
diff --git a/drivers/net/tokenring/proteon.c b/drivers/net/tokenring/proteon.c
index eb1423ede75c..d04c918ebef8 100644
--- a/drivers/net/tokenring/proteon.c
+++ b/drivers/net/tokenring/proteon.c
@@ -29,6 +29,7 @@ static const char version[] = "proteon.c: v1.00 02/01/2003 by Jochen Friedrich\n
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/trdevice.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
diff --git a/drivers/net/tokenring/skisa.c b/drivers/net/tokenring/skisa.c
index 3c7c66204f74..72cf708396be 100644
--- a/drivers/net/tokenring/skisa.c
+++ b/drivers/net/tokenring/skisa.c
@@ -36,6 +36,7 @@ static const char version[] = "skisa.c: v1.03 09/12/2002 by Jochen Friedrich\n";
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/trdevice.h>
+#include <linux/platform_device.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
diff --git a/drivers/pcmcia/au1000_generic.c b/drivers/pcmcia/au1000_generic.c
index d90a634cebf5..f591839ab9cd 100644
--- a/drivers/pcmcia/au1000_generic.c
+++ b/drivers/pcmcia/au1000_generic.c
@@ -42,7 +42,7 @@
 #include <linux/notifier.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
index b57a0b98b4d6..561706ba4499 100644
--- a/drivers/pcmcia/hd64465_ss.c
+++ b/drivers/pcmcia/hd64465_ss.c
@@ -37,7 +37,7 @@
 #include <asm/errno.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/hd64465/hd64465.h>
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 4a41f67d185d..7ce455d01cc9 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -47,7 +47,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index c6ed70ea4812..2c22b4b3619d 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -23,7 +23,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 3397ff28de6a..356a6fb416a1 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -23,7 +23,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/bitops.h>
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index 2558c3cc91ec..47b5ade95bde 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index c2a12d53f6c7..7fa18fb814bc 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -23,6 +23,7 @@
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/io.h>
diff --git a/drivers/pcmcia/pxa2xx_mainstone.c b/drivers/pcmcia/pxa2xx_mainstone.c
index bbe69b07ce50..5209d8c7764f 100644
--- a/drivers/pcmcia/pxa2xx_mainstone.c
+++ b/drivers/pcmcia/pxa2xx_mainstone.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <pcmcia/ss.h>
 
diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c
index a1178a600e3c..b54a8b8c0fca 100644
--- a/drivers/pcmcia/pxa2xx_sharpsl.c
+++ b/drivers/pcmcia/pxa2xx_sharpsl.c
@@ -16,7 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index b768fa81f043..122fb29b1e34 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/config.h>
+#include <linux/platform_device.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index f158b67f6610..e31263864377 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -44,7 +44,7 @@
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/bitops.h>
 
 #include <asm/io.h>
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index 3d2dca675e02..38a028c725d4 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/types.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f24d84538fd5..71dd1ebbe58f 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/transport_class.h>
+#include <linux/platform_device.h>
 
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index afb7ddf200e0..f47d2c454e33 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -33,7 +33,7 @@
 #include <linux/sysrq.h>
 #include <linux/mca.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial_reg.h>
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 5b3933b0c997..b9a1f523c9a8 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -36,7 +36,7 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/sysrq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index 8a79968f8ce1..0dd08a09e7e6 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -45,7 +45,7 @@
  */
 
 #include <linux/config.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/tty.h>
 #include <linux/serial.h>
diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c
index efe79b1fd431..f4c709bc464b 100644
--- a/drivers/serial/mpsc.c
+++ b/drivers/serial/mpsc.c
@@ -52,6 +52,8 @@
  * 4) AFAICT, hardware flow control isn't supported by the controller --MAG.
  */
 
+#include <linux/platform_device.h>
+
 #include "mpsc.h"
 
 /*
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index 8cc4cedadd99..16b2f9417af9 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -39,7 +39,7 @@
 #include <linux/circ_buf.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c
index 06a17dff1a73..036792328d49 100644
--- a/drivers/serial/s3c2410.c
+++ b/drivers/serial/s3c2410.c
@@ -63,7 +63,7 @@
 
 #include <linux/module.h>
 #include <linux/ioport.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/sysrq.h>
 #include <linux/console.h>
diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c
index c4a789e6af44..ed618cc7ae96 100644
--- a/drivers/serial/sa1100.c
+++ b/drivers/serial/sa1100.c
@@ -35,7 +35,7 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/sysrq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index 2b623ab0e36e..01696b3e3f61 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -26,7 +26,7 @@
 #endif
 
 #include <linux/console.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index 02106bebd5c1..975ace3f5b1e 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -50,7 +50,7 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/version.h>
-
+#include <linux/platform_device.h>
 #include <linux/usb.h>
 #include <linux/usb_gadget.h>
 
diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c
index 9b3673904daf..bc6269f10cbb 100644
--- a/drivers/usb/gadget/lh7a40x_udc.c
+++ b/drivers/usb/gadget/lh7a40x_udc.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include <linux/platform_device.h>
+
 #include "lh7a40x_udc.h"
 
 //#define DEBUG printk
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 41c96b0afbb3..387692a3611e 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -38,7 +38,7 @@
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/usb_ch9.h>
 #include <linux/usb_gadget.h>
 #include <linux/usb_otg.h>
diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c
index f83a9262f953..ee9cd7869d92 100644
--- a/drivers/usb/gadget/pxa2xx_udc.c
+++ b/drivers/usb/gadget/pxa2xx_udc.c
@@ -43,7 +43,7 @@
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/byteorder.h>
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index ddb8fc591466..f9c3f5b8dd1c 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -70,6 +70,7 @@
 #include <linux/interrupt.h>
 #include <linux/usb.h>
 #include <linux/usb_isp116x.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index a277e258eb6c..f0c78cf14b6c 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -18,6 +18,8 @@
  * This file is licenced under the GPL.
  */
 
+#include <linux/platform_device.h>
+
 #include <asm/mach-au1x00/au1000.h>
 
 #define USBH_ENABLE_BE (1<<0)
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c
index 238fa4ade615..336c766c6e29 100644
--- a/drivers/usb/host/ohci-lh7a404.c
+++ b/drivers/usb/host/ohci-lh7a404.c
@@ -16,6 +16,8 @@
  * This file is licenced under the GPL.
  */
 
+#include <linux/platform_device.h>
+
 #include <asm/hardware.h>
 
 
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 45efeed1fcc3..277bcb902d3b 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -14,6 +14,8 @@
  * This file is licenced under the GPL.
  */
 
+#include <linux/platform_device.h>
+
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/mach-types.h>
diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c
index 4832e57ae579..92cf6f4a1374 100644
--- a/drivers/usb/host/ohci-ppc-soc.c
+++ b/drivers/usb/host/ohci-ppc-soc.c
@@ -14,6 +14,8 @@
  * This file is licenced under the GPL.
  */
 
+#include <linux/platform_device.h>
+
 /* configure so an HC device and id are always provided */
 /* always called with process context; sleeping is OK */
 
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index d287dcccd415..5181999c56c9 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -19,7 +19,7 @@
  * This file is licenced under the GPL.
  */
 
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <asm/mach-types.h>
 #include <asm/hardware.h>
 #include <asm/arch/pxa-regs.h>
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index fab420a2ce71..ee1fc605b402 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -19,6 +19,8 @@
  * This file is licenced under the GPL.
 */
 
+#include <linux/platform_device.h>
+
 #include <asm/hardware.h>
 #include <asm/hardware/clock.h>
 #include <asm/arch/usb-control.h>
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 40169d9cf2b1..5607c0ae6835 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -54,6 +54,7 @@
 #include <linux/interrupt.h>
 #include <linux/usb.h>
 #include <linux/usb_sl811.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 38aebe361ca1..e73faf831b24 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/ioport.h>
+#include <linux/platform_device.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index f02965f39501..9b6a39348f81 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -26,7 +26,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/fb.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/hardware.h>
diff --git a/drivers/video/arcfb.c b/drivers/video/arcfb.c
index d28457e0c063..126daff1c848 100644
--- a/drivers/video/arcfb.c
+++ b/drivers/video/arcfb.c
@@ -47,6 +47,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/arcfb.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 
diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c
index 1991fdb32dfb..4867498f68e8 100644
--- a/drivers/video/backlight/corgi_bl.c
+++ b/drivers/video/backlight/corgi_bl.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/fb.h>
 #include <linux/backlight.h>
diff --git a/drivers/video/dnfb.c b/drivers/video/dnfb.c
index 1dbb82dca40b..1785686a7f11 100644
--- a/drivers/video/dnfb.c
+++ b/drivers/video/dnfb.c
@@ -6,6 +6,8 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/irq.h>
diff --git a/drivers/video/epson1355fb.c b/drivers/video/epson1355fb.c
index 116e808d71cd..7363d0b25fdf 100644
--- a/drivers/video/epson1355fb.c
+++ b/drivers/video/epson1355fb.c
@@ -54,6 +54,8 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/platform_device.h>
+
 #include <asm/types.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index d3c1922cb13a..fc0a1beef968 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -11,7 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 0b9301facbd3..64d9bcc38da3 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -31,7 +31,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/cpufreq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/hardware.h>
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 6206da9dd5da..efd9333b05c2 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -36,7 +36,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/cpufreq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/hardware.h>
diff --git a/drivers/video/q40fb.c b/drivers/video/q40fb.c
index 162012bb9264..8416b2e2b501 100644
--- a/drivers/video/q40fb.c
+++ b/drivers/video/q40fb.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/setup.h>
diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c
index cb2f7a1de947..f4437430dc5f 100644
--- a/drivers/video/s1d13xxxfb.c
+++ b/drivers/video/s1d13xxxfb.c
@@ -30,7 +30,7 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 
 #include <linux/types.h>
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 3862d3cb1fb2..3cef90456a4b 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -86,6 +86,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #include <linux/wait.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 78e5f194b0df..3d35b28aaac7 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -173,7 +173,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/cpufreq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/hardware.h>
diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c
index 8413907b379a..cf5106eab2d5 100644
--- a/drivers/video/sgivwfb.c
+++ b/drivers/video/sgivwfb.c
@@ -18,6 +18,8 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/platform_device.h>
+
 #include <asm/io.h>
 #include <asm/mtrr.h>
 
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index b1243da55fc5..3cc23106641d 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -19,6 +19,8 @@
 #include <linux/fb.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
+
 #include <video/vga.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index b137a3fe0752..92d46555dd86 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -20,6 +20,8 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
 #include <asm/uaccess.h>
 #include <linux/fb.h>
 #include <linux/init.h>
diff --git a/drivers/video/w100fb.c b/drivers/video/w100fb.c
index 752bf88906a9..cf8cdb108fd9 100644
--- a/drivers/video/w100fb.c
+++ b/drivers/video/w100fb.c
@@ -25,7 +25,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <asm/io.h>
diff --git a/include/asm-ppc/ppc_sys.h b/include/asm-ppc/ppc_sys.h
index 549f44843c5e..bba5305c29ed 100644
--- a/include/asm-ppc/ppc_sys.h
+++ b/include/asm-ppc/ppc_sys.h
@@ -18,7 +18,7 @@
 #define __ASM_PPC_SYS_H
 
 #include <linux/init.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #if defined(CONFIG_8260)
diff --git a/include/linux/device.h b/include/linux/device.h
index a9e72ac3fb9f..17cbc6db67b4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -396,32 +396,6 @@ extern struct device * get_device(struct device * dev);
 extern void put_device(struct device * dev);
 
 
-/* drivers/base/platform.c */
-
-struct platform_device {
-	const char	* name;
-	u32		id;
-	struct device	dev;
-	u32		num_resources;
-	struct resource	* resource;
-};
-
-#define to_platform_device(x) container_of((x), struct platform_device, dev)
-
-extern int platform_device_register(struct platform_device *);
-extern void platform_device_unregister(struct platform_device *);
-
-extern struct bus_type platform_bus_type;
-extern struct device platform_bus;
-
-extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int);
-extern int platform_get_irq(struct platform_device *, unsigned int);
-extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, char *);
-extern int platform_get_irq_byname(struct platform_device *, char *);
-extern int platform_add_devices(struct platform_device **, int);
-
-extern struct platform_device *platform_device_register_simple(char *, unsigned int, struct resource *, unsigned int);
-
 /* drivers/base/power.c */
 extern void device_shutdown(void);
 
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 317a979b24de..2b799d40d669 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -12,7 +12,7 @@
 #define _LINUX_SERIAL_8250_H
 
 #include <linux/serial_core.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 
 /*
  * This is the platform device platform_data structure
diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c
index 877bb00d3295..d1f9da498729 100644
--- a/sound/arm/pxa2xx-ac97.c
+++ b/sound/arm/pxa2xx-ac97.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
diff --git a/sound/core/init.c b/sound/core/init.c
index 59202de1d2ce..41e224986f35 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -28,6 +28,8 @@
 #include <linux/ctype.h>
 #include <linux/pci.h>
 #include <linux/pm.h>
+#include <linux/platform_device.h>
+
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
-- 
cgit v1.2.3


From bbbf508d6403f9dfeeb040d9cd7366e395632e59 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 29 Oct 2005 22:17:58 +0100
Subject: [DRIVER MODEL] Add missing platform_device.h header.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/platform_device.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/linux/platform_device.h

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
new file mode 100644
index 000000000000..a726225e0afe
--- /dev/null
+++ b/include/linux/platform_device.h
@@ -0,0 +1,40 @@
+/*
+ * platform_device.h - generic, centralized driver model
+ *
+ * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
+ *
+ * This file is released under the GPLv2
+ *
+ * See Documentation/driver-model/ for more information.
+ */
+
+#ifndef _PLATFORM_DEVICE_H_
+#define _PLATFORM_DEVICE_H_
+
+#include <linux/device.h>
+
+struct platform_device {
+	const char	* name;
+	u32		id;
+	struct device	dev;
+	u32		num_resources;
+	struct resource	* resource;
+};
+
+#define to_platform_device(x) container_of((x), struct platform_device, dev)
+
+extern int platform_device_register(struct platform_device *);
+extern void platform_device_unregister(struct platform_device *);
+
+extern struct bus_type platform_bus_type;
+extern struct device platform_bus;
+
+extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int);
+extern int platform_get_irq(struct platform_device *, unsigned int);
+extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, char *);
+extern int platform_get_irq_byname(struct platform_device *, char *);
+extern int platform_add_devices(struct platform_device **, int);
+
+extern struct platform_device *platform_device_register_simple(char *, unsigned int, struct resource *, unsigned int);
+
+#endif /* _PLATFORM_DEVICE_H_ */
-- 
cgit v1.2.3


From e32e28edc3d894201e15b19df627af66023aa91f Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 30 Oct 2005 16:32:27 +0000
Subject: [DRIVER MODEL] Add missing driver_unregister in i2c-s3c2410 failure
 path.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/i2c/busses/i2c-s3c2410.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index a1268e534254..1b582262e677 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -918,8 +918,11 @@ static int __init i2c_adap_s3c_init(void)
 	int ret;
 
 	ret = driver_register(&s3c2410_i2c_driver);
-	if (ret == 0)
-		ret = driver_register(&s3c2440_i2c_driver); 
+	if (ret == 0) {
+		ret = driver_register(&s3c2440_i2c_driver);
+		if (ret)
+			driver_unregister(&s3c2410_i2c_driver);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.3


From 8576762ff5d109b841fcf4e7d3883e0cf794f3cf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 30 Oct 2005 16:33:11 +0000
Subject: [DRIVER MODEL] Add missing driver_unregister to IMX serial driver

Fix the IMX serial driver to unregister its driver structure
when it is unloaded.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index b9a1f523c9a8..4a54ff584700 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -995,6 +995,7 @@ static int __init imx_serial_init(void)
 static void __exit imx_serial_exit(void)
 {
 	uart_unregister_driver(&imx_reg);
+	driver_unregister(&serial_imx_driver);
 }
 
 module_init(imx_serial_init);
-- 
cgit v1.2.3


From e533825447dcb60a82b7cc9d73d06423c849b9a2 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 30 Oct 2005 21:37:17 -0500
Subject: [libata] ata_tf_to_host cleanups

Integrate ata_exec() and ata_tf_to_host() into their only caller,
ata_bus_edd().

Rename ata_tf_to_host_nolock() to ata_tf_to_host().

This makes locking a bit easier to review, and may help pave the way for
future changes.
---
 drivers/scsi/libata-core.c | 57 ++++++++--------------------------------------
 drivers/scsi/libata.h      |  1 -
 2 files changed, 10 insertions(+), 48 deletions(-)

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 8be7dc0b47b8..a17e12032f65 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -294,28 +294,6 @@ void ata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf)
 		ata_exec_command_pio(ap, tf);
 }
 
-/**
- *	ata_exec - issue ATA command to host controller
- *	@ap: port to which command is being issued
- *	@tf: ATA taskfile register set
- *
- *	Issues PIO/MMIO write to ATA command register, with proper
- *	synchronization with interrupt handler / other threads.
- *
- *	LOCKING:
- *	Obtains host_set lock.
- */
-
-static inline void ata_exec(struct ata_port *ap, const struct ata_taskfile *tf)
-{
-	unsigned long flags;
-
-	DPRINTK("ata%u: cmd 0x%X\n", ap->id, tf->command);
-	spin_lock_irqsave(&ap->host_set->lock, flags);
-	ap->ops->exec_command(ap, tf);
-	spin_unlock_irqrestore(&ap->host_set->lock, flags);
-}
-
 /**
  *	ata_tf_to_host - issue ATA taskfile to host controller
  *	@ap: port to which command is being issued
@@ -326,30 +304,11 @@ static inline void ata_exec(struct ata_port *ap, const struct ata_taskfile *tf)
  *	other threads.
  *
  *	LOCKING:
- *	Obtains host_set lock.
- */
-
-static void ata_tf_to_host(struct ata_port *ap, const struct ata_taskfile *tf)
-{
-	ap->ops->tf_load(ap, tf);
-
-	ata_exec(ap, tf);
-}
-
-/**
- *	ata_tf_to_host_nolock - issue ATA taskfile to host controller
- *	@ap: port to which command is being issued
- *	@tf: ATA taskfile register set
- *
- *	Issues ATA taskfile register set to ATA host controller,
- *	with proper synchronization with interrupt handler and
- *	other threads.
- *
- *	LOCKING:
  *	spin_lock_irqsave(host_set lock)
  */
 
-void ata_tf_to_host_nolock(struct ata_port *ap, const struct ata_taskfile *tf)
+static inline void ata_tf_to_host(struct ata_port *ap,
+				  const struct ata_taskfile *tf)
 {
 	ap->ops->tf_load(ap, tf);
 	ap->ops->exec_command(ap, tf);
@@ -1912,12 +1871,14 @@ static void ata_bus_post_reset(struct ata_port *ap, unsigned int devmask)
  *
  *	LOCKING:
  *	PCI/etc. bus probe sem.
+ *	Obtains host_set lock.
  *
  */
 
 static unsigned int ata_bus_edd(struct ata_port *ap)
 {
 	struct ata_taskfile tf;
+	unsigned long flags;
 
 	/* set up execute-device-diag (bus reset) taskfile */
 	/* also, take interrupts to a known state (disabled) */
@@ -1928,7 +1889,9 @@ static unsigned int ata_bus_edd(struct ata_port *ap)
 	tf.protocol = ATA_PROT_NODATA;
 
 	/* do bus reset */
+	spin_lock_irqsave(&ap->host_set->lock, flags);
 	ata_tf_to_host(ap, &tf);
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
 
 	/* spec says at least 2ms.  but who knows with those
 	 * crazy ATAPI devices...
@@ -3555,7 +3518,7 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc)
 
 	switch (qc->tf.protocol) {
 	case ATA_PROT_NODATA:
-		ata_tf_to_host_nolock(ap, &qc->tf);
+		ata_tf_to_host(ap, &qc->tf);
 		break;
 
 	case ATA_PROT_DMA:
@@ -3566,20 +3529,20 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc)
 
 	case ATA_PROT_PIO: /* load tf registers, initiate polling pio */
 		ata_qc_set_polling(qc);
-		ata_tf_to_host_nolock(ap, &qc->tf);
+		ata_tf_to_host(ap, &qc->tf);
 		ap->hsm_task_state = HSM_ST;
 		queue_work(ata_wq, &ap->pio_task);
 		break;
 
 	case ATA_PROT_ATAPI:
 		ata_qc_set_polling(qc);
-		ata_tf_to_host_nolock(ap, &qc->tf);
+		ata_tf_to_host(ap, &qc->tf);
 		queue_work(ata_wq, &ap->packet_task);
 		break;
 
 	case ATA_PROT_ATAPI_NODATA:
 		ap->flags |= ATA_FLAG_NOINTR;
-		ata_tf_to_host_nolock(ap, &qc->tf);
+		ata_tf_to_host(ap, &qc->tf);
 		queue_work(ata_wq, &ap->packet_task);
 		break;
 
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
index 10ecd9e15e4f..fad051ca4672 100644
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -48,7 +48,6 @@ extern int ata_qc_issue(struct ata_queued_cmd *qc);
 extern int ata_check_atapi_dma(struct ata_queued_cmd *qc);
 extern void ata_dev_select(struct ata_port *ap, unsigned int device,
                            unsigned int wait, unsigned int can_sleep);
-extern void ata_tf_to_host_nolock(struct ata_port *ap, const struct ata_taskfile *tf);
 extern void swap_buf_le16(u16 *buf, unsigned int buf_words);
 extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
-- 
cgit v1.2.3


From 005a5a06a6dd13a0ca3f2c6a0218e8d94ed36d8a Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 30 Oct 2005 23:31:48 -0500
Subject: [libata] locking rewrite (== fix)

A lot of power packed into a little patch.

This change eliminates the sharing between our controller-wide spinlock
and the SCSI core's Scsi_Host lock.  As the locking in libata was
already highly compartmentalized, always referencing our own lock, and
never scsi_host::host_lock.

As a side effect, this change eliminates a deadlock from calling
scsi_finish_command() while inside our spinlock.
---
 drivers/scsi/libata-core.c | 2 --
 drivers/scsi/libata-scsi.c | 9 ++++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index a17e12032f65..ff18fa7044c5 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4089,8 +4089,6 @@ static void ata_host_init(struct ata_port *ap, struct Scsi_Host *host,
 	host->unique_id = ata_unique_id++;
 	host->max_cmd_len = 12;
 
-	scsi_assign_lock(host, &host_set->lock);
-
 	ap->flags = ATA_FLAG_PORT_DISABLED;
 	ap->id = host->unique_id;
 	ap->host = host;
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 1e3792f86fcf..248baae96486 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -39,6 +39,7 @@
 #include <scsi/scsi.h>
 #include "scsi.h"
 #include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
 #include <linux/libata.h>
 #include <linux/hdreg.h>
 #include <asm/uaccess.h>
@@ -2405,8 +2406,12 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 	struct ata_port *ap;
 	struct ata_device *dev;
 	struct scsi_device *scsidev = cmd->device;
+	struct Scsi_Host *shost = scsidev->host;
 
-	ap = (struct ata_port *) &scsidev->host->hostdata[0];
+	ap = (struct ata_port *) &shost->hostdata[0];
+
+	spin_unlock(shost->host_lock);
+	spin_lock(&ap->host_set->lock);
 
 	ata_scsi_dump_cdb(ap, cmd);
 
@@ -2429,6 +2434,8 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 		ata_scsi_translate(ap, dev, cmd, done, atapi_xlat);
 
 out_unlock:
+	spin_unlock(&ap->host_set->lock);
+	spin_lock(shost->host_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From e0be618d1e8ed0a1a94ee14025ec5e3022d2ec5b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Mon, 31 Oct 2005 01:29:23 -0500
Subject: Input: evdev - allow querying EV_SW bits from compat_ioctl

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index a4696cd0978c..d64d0290a7c7 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -565,6 +565,7 @@ static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned lon
 						case EV_LED: bits = dev->ledbit; max = LED_MAX; break;
 						case EV_SND: bits = dev->sndbit; max = SND_MAX; break;
 						case EV_FF:  bits = dev->ffbit;  max = FF_MAX;  break;
+						case EV_SW:  bits = dev->swbit;  max = SW_MAX;  break;
 						default: return -EINVAL;
 					}
 					bit_to_user(bits, max);
-- 
cgit v1.2.3


From 7972720aaa044d0bca40e2e1d4c176076a9f0e00 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Mon, 31 Oct 2005 01:29:37 -0500
Subject: Input: evdev - allow querying SW state from compat ioctl

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index d64d0290a7c7..9f2352bd8348 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -580,6 +580,9 @@ static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned lon
 				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
 					bit_to_user(dev->snd, SND_MAX);
 
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0)))
+					bit_to_user(dev->sw, SW_MAX);
+
 				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
 					int len;
 					if (!dev->name) return -ENOENT;
-- 
cgit v1.2.3


From 995fc4df0bc8d88417742e4b1d17889fc64b6ef3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Mon, 31 Oct 2005 01:29:51 -0500
Subject: Input: fix input_dev registration message

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 3b1685ff9d10..1a1654caedd5 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -730,7 +730,7 @@ static void input_register_classdevice(struct input_dev *dev)
 		 "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1);
 
 	path = kobject_get_path(&dev->cdev.class->subsys.kset.kobj, GFP_KERNEL);
-	printk(KERN_INFO "input: %s/%s as %s\n",
+	printk(KERN_INFO "input: %s as %s/%s\n",
 		dev->name ? dev->name : "Unspecified device",
 		path ? path : "", dev->cdev.class_id);
 	kfree(path);
-- 
cgit v1.2.3


From 1259f2b3657e21f18a7f31f9b1c027e304838b18 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Mon, 31 Oct 2005 01:30:05 -0500
Subject: Input: pcspkr - fix setting name and phys for the device

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/pcspkr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index e34633c37fdd..68ac97f101b0 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -71,7 +71,7 @@ static int __init pcspkr_init(void)
 		return -ENOMEM;
 
 	pcspkr_dev->name = "PC Speaker";
-	pcspkr_dev->name = "isa0061/input0";
+	pcspkr_dev->phys = "isa0061/input0";
 	pcspkr_dev->id.bustype = BUS_ISA;
 	pcspkr_dev->id.vendor = 0x001f;
 	pcspkr_dev->id.product = 0x0001;
-- 
cgit v1.2.3


From 76440d5e13d7dacd5763394ddb2071424e0b6921 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Mon, 31 Oct 2005 01:30:19 -0500
Subject: Input: lkkbd - fix debug message in lkkbd_interrupt()

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/lkkbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/lkkbd.c b/drivers/input/keyboard/lkkbd.c
index 7f06780a437f..9481132532d0 100644
--- a/drivers/input/keyboard/lkkbd.c
+++ b/drivers/input/keyboard/lkkbd.c
@@ -441,7 +441,7 @@ lkkbd_interrupt (struct serio *serio, unsigned char data, unsigned int flags,
 			input_sync (lk->dev);
 			break;
 		case LK_METRONOME:
-			DBG (KERN_INFO "Got %#d and don't "
+			DBG (KERN_INFO "Got LK_METRONOME and don't "
 					"know how to handle...\n");
 			break;
 		case LK_OUTPUT_ERROR:
-- 
cgit v1.2.3


From eb16292ba8a6655a560ab10a7d73a7816f0c0ac0 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 31 Oct 2005 01:30:32 -0500
Subject: Input: adbhid - fix OOPS introduced by dynalloc conversion

The problem is that adbhid[]->input is NULL, so the kernel oopses with
a null pointer dereference as soon as a key is pressed.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/macintosh/adbhid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index cdb6d0283195..8f02c155fdc0 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -723,6 +723,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 
 	sprintf(hid->phys, "adb%d:%d.%02x/input", id, default_id, original_handler_id);
 
+	hid->input = input_dev;
 	hid->id = default_id;
 	hid->original_handler_id = original_handler_id;
 	hid->current_handler_id = current_handler_id;
-- 
cgit v1.2.3


From 4fc207419d95e0e16fdc78e2f48f7c0b3640ffe5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 31 Oct 2005 13:51:33 +0100
Subject: [PATCH] Fix on-the-fly switch from cfq i/o scheduler

Don't clear ->elevator_data on exit, if we are switching queues we are
overwriting the data of the new io scheduler.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/cfq-iosched.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 5281f8e70510..ecacca9c877e 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -2059,10 +2059,8 @@ static void cfq_put_cfqd(struct cfq_data *cfqd)
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
-	blk_put_queue(q);
-
 	cfq_shutdown_timer_wq(cfqd);
-	q->elevator->elevator_data = NULL;
+	blk_put_queue(q);
 
 	mempool_destroy(cfqd->crq_pool);
 	kfree(cfqd->crq_hash);
-- 
cgit v1.2.3


From 581c1b14394aee60aff46ea67d05483261ed6527 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 31 Oct 2005 09:23:54 +0100
Subject: [PATCH] noop-iosched: avoid corrupted request merging

Tejun Heo notes:

   "I'm currently debugging this.  The problem is that we are using the
    generic dispatch queue directly in the noop sched and merging is NOT
    allowed on dispatch queues but generic handling of last_merge tries
    to merge requests.  I'm still trying to verify this, so I'll be back
    with results soon."

In the meantime, disable merging for noop by setting REQ_NOMERGE in
elevator_noop_add_request().

Eventually, we should add a noop_list and do the dispatching like in the
other io schedulers.  Merging is still beneficial for noop (and it has
always done it).

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/noop-iosched.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index f56b8edb06e4..e54f006e7e60 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -9,6 +9,7 @@
 
 static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
 {
+	rq->flags |= REQ_NOMERGE;
 	elv_dispatch_add_tail(q, rq);
 }
 
-- 
cgit v1.2.3


From f2c84c0e84bfa637a7161eac10157cf3b05b4a73 Mon Sep 17 00:00:00 2001
From: Arthur Othieno <a.othieno@bluewin.ch>
Date: Sun, 30 Oct 2005 23:04:05 -0500
Subject: [PATCH] i386: CONFIG_PC removal

CONFIG_PC is left-over cruft after the introduction of CONFIG_X86_PC with
the subarch split.  Remove it, and fixup the remaining users to depend on
CONFIG_X86_PC instead.

Signed-off-by: Arthur Othieno <a.othieno@bluewin.ch>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/Kconfig              | 5 -----
 drivers/input/keyboard/Kconfig | 4 ++--
 drivers/input/mouse/Kconfig    | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 5383e5e2d9b7..bac0da731ee3 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1042,8 +1042,3 @@ config X86_TRAMPOLINE
 	bool
 	depends on X86_SMP || (X86_VOYAGER && SMP)
 	default y
-
-config PC
-	bool
-	depends on X86 && !EMBEDDED
-	default y
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 571a68691a4a..4a917748fd9f 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -13,11 +13,11 @@ menuconfig INPUT_KEYBOARD
 if INPUT_KEYBOARD
 
 config KEYBOARD_ATKBD
-	tristate "AT keyboard" if !PC
+	tristate "AT keyboard" if !X86_PC
 	default y
 	select SERIO
 	select SERIO_LIBPS2
-	select SERIO_I8042 if PC
+	select SERIO_I8042 if X86_PC
 	select SERIO_GSCPS2 if GSC
 	help
 	  Say Y here if you want to use a standard AT or PS/2 keyboard. Usually
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 537154dd7a87..574b18a523af 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -17,7 +17,7 @@ config MOUSE_PS2
 	default y
 	select SERIO
 	select SERIO_LIBPS2
-	select SERIO_I8042 if PC
+	select SERIO_I8042 if X86_PC
 	select SERIO_GSCPS2 if GSC
 	---help---
 	  Say Y here if you have a PS/2 mouse connected to your system. This
-- 
cgit v1.2.3


From a717f77362d4fe044721c126c89e2a38e731a576 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 31 Oct 2005 14:08:53 -0800
Subject: [PATCH] revert ide-scsi highmem cleanup

Jeff Garzik <jgarzik@pobox.com> points out that this was wrong: we need to
disable local interrupts while holding KM_IRQ0 due to IRQ sharing.

And holding interrupts off during a big PIO opration is expensive, so we only
want to do that if we know the page was highmem.

So revert commit 17fd47ab4d33e764216b87006d8118fa050b4c92

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/scsi/ide-scsi.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 00d6a6657ebc..a440ea38efaa 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -180,12 +180,22 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
 			return;
 		}
 		count = min(pc->sg->length - pc->b_count, bcount);
-		buf = kmap_atomic(pc->sg->page, KM_IRQ0);
-		drive->hwif->atapi_input_bytes(drive,
-				buf + pc->b_count + pc->sg->offset, count);
-		kunmap_atomic(buf, KM_IRQ0);
-		bcount -= count;
-		pc->b_count += count;
+		if (PageHighMem(pc->sg->page)) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+					pc->sg->offset;
+			drive->hwif->atapi_input_bytes(drive,
+						buf + pc->b_count, count);
+			kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
+			local_irq_restore(flags);
+		} else {
+			buf = page_address(pc->sg->page) + pc->sg->offset;
+			drive->hwif->atapi_input_bytes(drive,
+						buf + pc->b_count, count);
+		}
+		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
 			pc->sg++;
 			pc->b_count = 0;
@@ -205,12 +215,22 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
 			return;
 		}
 		count = min(pc->sg->length - pc->b_count, bcount);
-		buf = kmap_atomic(pc->sg->page, KM_IRQ0);
-		drive->hwif->atapi_output_bytes(drive,
-				buf + pc->b_count + pc->sg->offset, count);
-		kunmap_atomic(buf, KM_IRQ0);
-		bcount -= count;
-		pc->b_count += count;
+		if (PageHighMem(pc->sg->page)) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+						pc->sg->offset;
+			drive->hwif->atapi_output_bytes(drive,
+						buf + pc->b_count, count);
+			kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
+			local_irq_restore(flags);
+		} else {
+			buf = page_address(pc->sg->page) + pc->sg->offset;
+			drive->hwif->atapi_output_bytes(drive,
+						buf + pc->b_count, count);
+		}
+		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
 			pc->sg++;
 			pc->b_count = 0;
-- 
cgit v1.2.3


From 659603ef692d3f6c7c216e80310990253864bf2e Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@suse.de>
Date: Mon, 31 Oct 2005 14:08:54 -0800
Subject: [PATCH] fix __writeback_single_inode WARN_ON

When the inode count is zero in inode writeback, the

	WARN_ON(!(inode->i_state & I_WILL_FREE));

is broken, and needs to test for either I_WILL_FREE|I_FREEING.

When the inode is in I_FREEING state, it's already out of the visibility
of the vm so it can't be freed so it doesn't require the __iget and the
generic_delete_inode path can call the sync internally to the lowlevel
fs callback during the last iput. So the inode being in I_FREEING is
also a valid condition for calling the sync with i_count == 0.

The specific stack trace is this:

  0xc00000007b8fb6e0  0xc00000000010118c  .__writeback_single_inode +0x5c
  0xc00000007b8fb6e0  0xc0000000001014dc (lr) .sync_inode +0x3c
  0xc00000007b8fb790  0xc0000000001014dc  .sync_inode +0x3c
  0xc00000007b8fb820  0xc0000000001a5020  .ext2_sync_inode +0x64
  0xc00000007b8fb8f0  0xc0000000001a65b4  .ext2_truncate +0x3f8
  0xc00000007b8fba40  0xc0000000001a6940  .ext2_delete_inode +0xdc
  0xc00000007b8fbac0  0xc0000000000f7a5c  .generic_delete_inode +0x124
  0xc00000007b8fbb50  0xc0000000000f5fe0  .iput +0xb8
  0xc00000007b8fbbe0  0xc0000000000e9fd4  .sys_unlink +0x2a8
  0xc00000007b8fbd10  0xc00000000001048c  .ret_from_syscall_1 +0x0

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ffab4783ac64..c27f8d4098be 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -247,7 +247,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	wait_queue_head_t *wqh;
 
 	if (!atomic_read(&inode->i_count))
-		WARN_ON(!(inode->i_state & I_WILL_FREE));
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
 	else
 		WARN_ON(inode->i_state & I_WILL_FREE);
 
-- 
cgit v1.2.3


From d83c671fb7023f69a9582e622d01525054f23b66 Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <James@superbug.co.uk>
Date: Mon, 31 Oct 2005 10:27:41 +0000
Subject: [PATCH] Creative Audigy 2 cardbus: Add IO window wakeup magic

This adds the magic IO wakeup code for the CardBus version of the
Creative Labs Audigy 2 to the snd-emu10k1 driver.

Without the magic IO enable sequence, reading from the IO region of the
card will fail spectacularly, and the machine will hang.

My next task will be getting the driver to actually play sound without
distortion.

Signed-off-by: James Courtier-Dutton <James@superbug.co.uk>

[ This is a work-in-progress, but since it avoids a total lockup
  if the emu10k module is loaded on a machine with the cardbus
  card inserted, we're better off with it than without it, even
  if sound quality is bad right now ]

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/sound/emu10k1.h          |  1 +
 sound/pci/emu10k1/emu10k1_main.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 14cb2718cb77..46e3c0bf3c94 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1055,6 +1055,7 @@ typedef struct {
 	unsigned char emu10k2_chip; /* Audigy 1 or Audigy 2. */
 	unsigned char ca0102_chip;  /* Audigy 1 or Audigy 2. Not SB Audigy 2 Value. */
 	unsigned char ca0108_chip;  /* Audigy 2 Value */
+	unsigned char ca_cardbus_chip; /* Audigy 2 ZS Notebook */
 	unsigned char ca0151_chip;  /* P16V */
 	unsigned char spk71;        /* Has 7.1 speakers */
 	unsigned char sblive51;	    /* SBLive! 5.1 - extout 0x11 -> center, 0x12 -> lfe */
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index e9cd8e054f25..53aeff0b783a 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -579,6 +579,30 @@ static int __devinit snd_emu10k1_ecard_init(emu10k1_t * emu)
 	return 0;
 }
 
+static int __devinit snd_emu10k1_cardbus_init(emu10k1_t * emu)
+{
+	unsigned long special_port;
+	unsigned int value;
+
+	/* Special initialisation routine
+	 * before the rest of the IO-Ports become active.
+	 */
+	special_port = emu->port + 0x38;
+	value = inl(special_port);
+	outl(0x00d00000, special_port);
+	value = inl(special_port);
+	outl(0x00d00001, special_port);
+	value = inl(special_port);
+	outl(0x00d0005f, special_port);
+	value = inl(special_port);
+	outl(0x00d0007f, special_port);
+	value = inl(special_port);
+	outl(0x0090007f, special_port);
+	value = inl(special_port);
+
+	return 0;
+}
+
 /*
  *  Create the EMU10K1 instance
  */
@@ -624,6 +648,16 @@ static emu_chip_details_t emu_chip_details[] = {
 	 .ca0108_chip = 1,
 	 .spk71 = 1,
 	 .ac97_chip = 1} ,
+	/* Audigy 2 ZS Notebook Cardbus card.*/
+	/* Tested by James@superbug.co.uk 30th October 2005 */
+	/* Not working yet, but progressing. */
+	{.vendor = 0x1102, .device = 0x0008, .subsystem = 0x20011102,
+	 .driver = "Audigy2", .name = "Audigy 2 ZS Notebook [SB0530]", 
+	 .id = "Audigy2",
+	 .emu10k2_chip = 1,
+	 .ca0108_chip = 1,
+	 .ca_cardbus_chip = 1,
+	 .spk71 = 1} ,
 	{.vendor = 0x1102, .device = 0x0008, 
 	 .driver = "Audigy2", .name = "Audigy 2 Value [Unknown]", 
 	 .id = "Audigy2",
@@ -1011,6 +1045,11 @@ int __devinit snd_emu10k1_create(snd_card_t * card,
 			snd_emu10k1_free(emu);
 			return err;
 		}
+	} else if (emu->card_capabilities->ca_cardbus_chip) {
+		if ((err = snd_emu10k1_cardbus_init(emu)) < 0) {
+			snd_emu10k1_free(emu);
+			return err;
+		}
 	} else {
 		/* 5.1: Enable the additional AC97 Slots. If the emu10k1 version
 			does not support this, it shouldn't do any harm */
-- 
cgit v1.2.3


From 1e4c85f97fe26fbd70da12148b3992c0e00361fd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Mon, 31 Oct 2005 19:16:17 -0800
Subject: Revert "i386: move apic init in init_IRQs"

Commit f2b36db692b7ff6972320ad9839ae656a3b0ee3e causes a bootup hang on
at least one machine.  Revert for now until we understand why.  The old
code may be ugly, but it works.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/apic.c                       | 77 ++++++---------------------
 arch/i386/kernel/i8259.c                      |  4 --
 arch/i386/kernel/io_apic.c                    |  6 +--
 arch/i386/kernel/smpboot.c                    | 68 +++++++++++++++++------
 arch/i386/kernel/time.c                       | 12 +----
 include/asm-i386/apic.h                       |  3 +-
 include/asm-i386/hw_irq.h                     |  1 -
 include/asm-i386/mach-default/smpboot_hooks.h | 15 ++++++
 include/asm-i386/mach-visws/smpboot_hooks.h   |  7 +++
 init/main.c                                   | 11 ++++
 10 files changed, 104 insertions(+), 100 deletions(-)

diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 9204be6eedb3..7c724ffa08bb 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -803,7 +803,6 @@ no_apic:
 
 void __init init_apic_mappings(void)
 {
-	unsigned int orig_apicid;
 	unsigned long apic_phys;
 
 	/*
@@ -825,11 +824,8 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	orig_apicid = boot_cpu_physical_apicid;
-	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-	if ((orig_apicid != -1U) && (orig_apicid != boot_cpu_physical_apicid))
-		printk(KERN_WARNING "Boot APIC ID in local APIC unexpected (%d vs %d)",
-			orig_apicid, boot_cpu_physical_apicid);
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 
 #ifdef CONFIG_X86_IO_APIC
 	{
@@ -1259,81 +1255,40 @@ fastcall void smp_error_interrupt(struct pt_regs *regs)
 }
 
 /*
- * This initializes the IO-APIC and APIC hardware.
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
  */
-int __init APIC_init(void)
+int __init APIC_init_uniprocessor (void)
 {
-	if (enable_local_apic < 0) {
-		printk(KERN_INFO "APIC disabled\n");
-		return -1;
-	}
+	if (enable_local_apic < 0)
+		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 
-	/* See if we have a SMP configuration or have forced enabled
-	 * the local apic.
-	 */
-	if (!smp_found_config && !acpi_lapic && !cpu_has_apic) {
-		enable_local_apic = -1;
+	if (!smp_found_config && !cpu_has_apic)
 		return -1;
-	}
 
 	/*
-	 * Complain if the BIOS pretends there is an apic.
-	 * Then get out because we don't have an a local apic.
+	 * Complain if the BIOS pretends there is one.
 	 */
 	if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 			boot_cpu_physical_apicid);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-		enable_local_apic = -1;
 		return -1;
 	}
 
 	verify_local_APIC();
 
-	/*
-	 * Should not be necessary because the MP table should list the boot
-	 * CPU too, but we do it for the sake of robustness anyway.
-	 * Makes no sense to do this check in clustered apic mode, so skip it
-	 */
-	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
-		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-				boot_cpu_physical_apicid);
-		physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
-	}
-
-	/*
-	 * Switch from PIC to APIC mode.
-	 */
 	connect_bsp_APIC();
-	setup_local_APIC();
 
-#ifdef CONFIG_X86_IO_APIC
-	/*
-	 * Now start the IO-APICs
-	 */
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-#endif
-	return 0;
-}
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
 
-void __init APIC_late_time_init(void)
-{
-	/* Improve our loops per jiffy estimate */
-	loops_per_jiffy = ((1000 + HZ - 1)/HZ)*cpu_khz;
-	boot_cpu_data.loops_per_jiffy = loops_per_jiffy;
-	cpu_data[0].loops_per_jiffy = loops_per_jiffy;
-
-	/* setup_apic_nmi_watchdog doesn't work properly before cpu_khz is
-	 * initialized.  So redo it here to ensure the boot cpu is setup
-	 * properly.
-	 */
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		setup_apic_nmi_watchdog();
+	setup_local_APIC();
 
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		IO_APIC_late_time_init();
+	if (smp_found_config)
+		if (!skip_ioapic_setup && nr_ioapics)
+			setup_IO_APIC();
 #endif
 	setup_boot_APIC_clock();
+
+	return 0;
 }
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d86f24909284..323ef8ab3244 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -435,8 +435,4 @@ void __init init_IRQ(void)
 		setup_irq(FPU_IRQ, &fpu_irq);
 
 	irq_ctx_init(smp_processor_id());
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	APIC_init();
-#endif
 }
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 5a77c52b20a9..cc5d7ac5b2e7 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -2387,15 +2387,11 @@ void __init setup_IO_APIC(void)
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
+	check_timer();
 	if (!acpi_ioapic)
 		print_IO_APIC();
 }
 
-void __init IO_APIC_late_time_init(void)
-{
-	check_timer();
-}
-
 /*
  *	Called after all the initialization is done. If we didnt find any
  *	APIC bugs then we can allow the modify fast path
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 5a2bbe0c4fff..01b618e73ecd 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1078,16 +1078,6 @@ void *xquad_portio;
 EXPORT_SYMBOL(xquad_portio);
 #endif
 
-/*
- * Fall back to non SMP mode after errors.
- *
- */
-static __init void disable_smp(void)
-{
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
-}
-
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
 	int apicid, cpu, bit, kicked;
@@ -1100,6 +1090,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	printk("CPU%d: ", 0);
 	print_cpu_info(&cpu_data[0]);
 
+	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 	boot_cpu_logical_apicid = logical_smp_processor_id();
 	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
 
@@ -1111,27 +1102,68 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	cpus_clear(cpu_core_map[0]);
 	cpu_set(0, cpu_core_map[0]);
 
-	map_cpu_to_logical_apicid();
-
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
 	 * get out of here now!
 	 */
 	if (!smp_found_config && !acpi_lapic) {
 		printk(KERN_NOTICE "SMP motherboard not detected.\n");
-		disable_smp();
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		if (APIC_init_uniprocessor())
+			printk(KERN_NOTICE "Local APIC not detected."
+					   " Using dummy APIC emulation.\n");
+		map_cpu_to_logical_apicid();
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
+		return;
+	}
+
+	/*
+	 * Should not be necessary because the MP table should list the boot
+	 * CPU too, but we do it for the sake of robustness anyway.
+	 * Makes no sense to do this check in clustered apic mode, so skip it
+	 */
+	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+				boot_cpu_physical_apicid);
+		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+	}
+
+	/*
+	 * If we couldn't find a local APIC, then get out of here now!
+	 */
+	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+			boot_cpu_physical_apicid);
+		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
 		return;
 	}
 
+	verify_local_APIC();
+
 	/*
 	 * If SMP should be disabled, then really disable it!
 	 */
-	if (!max_cpus || (enable_local_apic < 0)) {
-		printk(KERN_INFO "SMP mode deactivated.\n");
-		disable_smp();
+	if (!max_cpus) {
+		smp_found_config = 0;
+		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
 		return;
 	}
 
+	connect_bsp_APIC();
+	setup_local_APIC();
+	map_cpu_to_logical_apicid();
+
+
 	setup_portio_remap();
 
 	/*
@@ -1212,6 +1244,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	cpu_set(0, cpu_sibling_map[0]);
 	cpu_set(0, cpu_core_map[0]);
 
+	smpboot_setup_io_apic();
+
+	setup_boot_APIC_clock();
+
 	/*
 	 * Synchronize the TSC with the AP
 	 */
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 07471bba2dc6..41c5b2dc6200 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -440,8 +440,8 @@ static int time_init_device(void)
 
 device_initcall(time_init_device);
 
-extern void (*late_time_init)(void);
 #ifdef CONFIG_HPET_TIMER
+extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 static void __init hpet_time_init(void)
 {
@@ -458,11 +458,6 @@ static void __init hpet_time_init(void)
 	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
 	time_init_hook();
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (enable_local_apic >= 0)
-		APIC_late_time_init();
-#endif
 }
 #endif
 
@@ -487,9 +482,4 @@ void __init time_init(void)
 	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
 	time_init_hook();
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (enable_local_apic >= 0)
-		late_time_init = APIC_late_time_init;
-#endif
 }
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index a515e2aed829..8c454aa58ac6 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -118,8 +118,7 @@ extern void release_lapic_nmi(void);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
 extern void nmi_watchdog_tick (struct pt_regs * regs);
-extern int APIC_init(void);
-extern void APIC_late_time_init(void);
+extern int APIC_init_uniprocessor (void);
 extern void disable_APIC_timer(void);
 extern void enable_APIC_timer(void);
 
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index 9139b89497a1..622815bf3243 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -55,7 +55,6 @@ void init_8259A(int aeoi);
 void FASTCALL(send_IPI_self(int vector));
 void init_VISWS_APIC_irqs(void);
 void setup_IO_APIC(void);
-void IO_APIC_late_time_init(void);
 void disable_IO_APIC(void);
 void print_IO_APIC(void);
 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
diff --git a/include/asm-i386/mach-default/smpboot_hooks.h b/include/asm-i386/mach-default/smpboot_hooks.h
index d7c70c144f9f..7f45f6311059 100644
--- a/include/asm-i386/mach-default/smpboot_hooks.h
+++ b/include/asm-i386/mach-default/smpboot_hooks.h
@@ -1,6 +1,11 @@
 /* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
  * which needs to alter them. */
 
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+	io_apic_irqs = 0;
+}
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	CMOS_WRITE(0xa, 0xf);
@@ -27,3 +32,13 @@ static inline void smpboot_restore_warm_reset_vector(void)
 
 	*((volatile long *) phys_to_virt(0x467)) = 0;
 }
+
+static inline void smpboot_setup_io_apic(void)
+{
+	/*
+	 * Here we can be sure that there is an IO-APIC in the system. Let's
+	 * go and set it up:
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+}
diff --git a/include/asm-i386/mach-visws/smpboot_hooks.h b/include/asm-i386/mach-visws/smpboot_hooks.h
index 14d8e0375f7a..d926471fa359 100644
--- a/include/asm-i386/mach-visws/smpboot_hooks.h
+++ b/include/asm-i386/mach-visws/smpboot_hooks.h
@@ -11,7 +11,14 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 
 /* for visws do nothing for any of these */
 
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+}
+
 static inline void smpboot_restore_warm_reset_vector(void)
 {
 }
 
+static inline void smpboot_setup_io_apic(void)
+{
+}
diff --git a/init/main.c b/init/main.c
index 4075d97e94b1..f142d4035341 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,10 @@
 #endif
 #endif
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/smp.h>
+#endif
+
 /*
  * Versions of gcc older than that listed below may actually compile
  * and link okay, but the end product can have subtle run time bugs.
@@ -310,7 +314,14 @@ extern void setup_arch(char **);
 
 #ifndef CONFIG_SMP
 
+#ifdef CONFIG_X86_LOCAL_APIC
+static void __init smp_init(void)
+{
+	APIC_init_uniprocessor();
+}
+#else
 #define smp_init()	do { } while (0)
+#endif
 
 static inline void setup_per_cpu_areas(void) { }
 static inline void smp_prepare_cpus(unsigned int maxcpus) { }
-- 
cgit v1.2.3


From 541ab4af11d5b41b95cd633e9b1d96cea9947ac2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Mon, 31 Oct 2005 21:12:40 -0800
Subject: Don't touch USB controller IO registers when they are disabled

The USB "handoff" code is an early PCI quirk to make sure we own the USB
controller (as opposed to the BIOS/SMM).  But if the controller isn't
even enabled yet, don't try to access it.

Acked-by: Paul Mackerras <paulus@samba.org> (who had an alternate patch)
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/usb/host/pci-quirks.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index b7fd3f644e1e..b1aa350fd32f 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -138,11 +138,23 @@ reset_needed:
 }
 EXPORT_SYMBOL_GPL(uhci_check_and_reset_hc);
 
+static inline int io_type_enabled(struct pci_dev *pdev, unsigned int mask)
+{
+	u16 cmd;
+	return !pci_read_config_word(pdev, PCI_COMMAND, &cmd) && (cmd & mask);
+}
+
+#define pio_enabled(dev) io_type_enabled(dev, PCI_COMMAND_IO)
+#define mmio_enabled(dev) io_type_enabled(dev, PCI_COMMAND_MEMORY)
+
 static void __devinit quirk_usb_handoff_uhci(struct pci_dev *pdev)
 {
 	unsigned long base = 0;
 	int i;
 
+	if (!pio_enabled(pdev))
+		return;
+
 	for (i = 0; i < PCI_ROM_RESOURCE; i++)
 		if ((pci_resource_flags(pdev, i) & IORESOURCE_IO)) {
 			base = pci_resource_start(pdev, i);
@@ -153,12 +165,20 @@ static void __devinit quirk_usb_handoff_uhci(struct pci_dev *pdev)
 		uhci_check_and_reset_hc(pdev, base);
 }
 
+static int __devinit mmio_resource_enabled(struct pci_dev *pdev, int idx)
+{
+	return pci_resource_start(pdev, idx) && mmio_enabled(pdev);
+}
+
 static void __devinit quirk_usb_handoff_ohci(struct pci_dev *pdev)
 {
 	void __iomem *base;
 	int wait_time;
 	u32 control;
 
+	if (!mmio_resource_enabled(pdev, 0))
+		return;
+
 	base = ioremap_nocache(pci_resource_start(pdev, 0),
 				     pci_resource_len(pdev, 0));
 	if (base == NULL) return;
@@ -201,6 +221,9 @@ static void __devinit quirk_usb_disable_ehci(struct pci_dev *pdev)
 	u32 hcc_params, val, temp;
 	u8 cap_length;
 
+	if (!mmio_resource_enabled(pdev, 0))
+		return;
+
 	base = ioremap_nocache(pci_resource_start(pdev, 0),
 				pci_resource_len(pdev, 0));
 	if (base == NULL) return;
-- 
cgit v1.2.3


From 3aebf25bdcf030f3e4afeb9340486d5b46deb46e Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 1 Nov 2005 15:49:31 +0000
Subject: NTFS: Fix a stupid bug causing writes to non-initialized pages to
 segfault.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/file.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index cf3e6ced2d01..727533891813 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -668,10 +668,10 @@ map_buffer_cached:
 				 * to, we need to read it in before the write,
 				 * i.e. now.
 				 */
-				if (!buffer_uptodate(bh) && ((bh_pos < pos &&
-						bh_end > pos) ||
-						(bh_end > end &&
-						bh_end > end))) {
+				if (!buffer_uptodate(bh) && bh_pos < end &&
+						bh_end > pos &&
+						(bh_pos < pos ||
+						bh_end > end)) {
 					/*
 					 * If the buffer is fully or partially
 					 * within the initialized size, do an
@@ -784,10 +784,11 @@ retry_remap:
 						blocksize_bits);
 				cdelta = 0;
 				/*
-				 * If the number of remaining clusters in the
-				 * @pages is smaller or equal to the number of
-				 * cached clusters, unlock the runlist as the
-				 * map cache will be used from now on.
+				 * If the number of remaining clusters touched
+				 * by the write is smaller or equal to the
+				 * number of cached clusters, unlock the
+				 * runlist as the map cache will be used from
+				 * now on.
 				 */
 				if (likely(vcn + vcn_len >= cend)) {
 					if (rl_write_locked) {
-- 
cgit v1.2.3