From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 27 Oct 2009 14:41:35 -0400
Subject: nfs: new subdir Documentation/filesystems/nfs

We're adding enough nfs documentation that it may as well have its own
subdirectory.

Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/exportfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 27e772cefb6a..dc12f416a49f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -97,7 +97,7 @@ struct fid {
  * @get_name:       find the name for a given inode in a given directory
  * @get_parent:     find the parent of a given directory
  *
- * See Documentation/filesystems/Exporting for details on how to use
+ * See Documentation/filesystems/nfs/Exporting for details on how to use
  * this interface correctly.
  *
  * encode_fh:
-- 
cgit v1.2.3


From c017b4be3e84176cab10eca5e6c4faeb8cfc6f3e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 28 Oct 2009 13:33:09 +0000
Subject: kmemleak: Simplify the kmemleak_scan_area() function prototype

This function was taking non-necessary arguments which can be determined
by kmemleak. The patch also modifies the calling sites.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/kmemleak.h |  6 ++----
 kernel/module.c          |  7 ++-----
 mm/kmemleak.c            | 49 +++++++++++++++++++++---------------------------
 mm/slab.c                |  4 ++--
 4 files changed, 27 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 3c7497d46ee9..99d9a6766f7e 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -32,8 +32,7 @@ extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
 extern void kmemleak_ignore(const void *ptr) __ref;
-extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-			       size_t length, gfp_t gfp) __ref;
+extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
 extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
@@ -84,8 +83,7 @@ static inline void kmemleak_not_leak(const void *ptr)
 static inline void kmemleak_ignore(const void *ptr)
 {
 }
-static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
-				      size_t length, gfp_t gfp)
+static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 }
 static inline void kmemleak_erase(void **ptr)
diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..1eb952097077 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2043,9 +2043,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 	unsigned int i;
 
 	/* only scan the sections containing data */
-	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
-			   (unsigned long)mod->module_core,
-			   sizeof(struct module), GFP_KERNEL);
+	kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
 
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2054,8 +2052,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
 			continue;
 
-		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
-				   (unsigned long)mod->module_core,
+		kmemleak_scan_area((void *)sechdrs[i].sh_addr,
 				   sechdrs[i].sh_size, GFP_KERNEL);
 	}
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..96106358e042 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -119,8 +119,8 @@
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
 	struct hlist_node node;
-	unsigned long offset;
-	size_t length;
+	unsigned long start;
+	size_t size;
 };
 
 #define KMEMLEAK_GREY	0
@@ -241,8 +241,6 @@ struct early_log {
 	const void *ptr;		/* allocated/freed memory block */
 	size_t size;			/* memory block size */
 	int min_count;			/* minimum reference count */
-	unsigned long offset;		/* scan area offset */
-	size_t length;			/* scan area length */
 	unsigned long trace[MAX_TRACE];	/* stack trace */
 	unsigned int trace_len;		/* stack trace length */
 };
@@ -720,14 +718,13 @@ static void make_black_object(unsigned long ptr)
  * Add a scanning area to the object. If at least one such area is added,
  * kmemleak will only scan these ranges rather than the whole memory block.
  */
-static void add_scan_area(unsigned long ptr, unsigned long offset,
-			  size_t length, gfp_t gfp)
+static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
 	unsigned long flags;
 	struct kmemleak_object *object;
 	struct kmemleak_scan_area *area;
 
-	object = find_and_get_object(ptr, 0);
+	object = find_and_get_object(ptr, 1);
 	if (!object) {
 		kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
 			      ptr);
@@ -741,7 +738,7 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	spin_lock_irqsave(&object->lock, flags);
-	if (offset + length > object->size) {
+	if (ptr + size > object->pointer + object->size) {
 		kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
 		dump_object_info(object);
 		kmem_cache_free(scan_area_cache, area);
@@ -749,8 +746,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	INIT_HLIST_NODE(&area->node);
-	area->offset = offset;
-	area->length = length;
+	area->start = ptr;
+	area->size = size;
 
 	hlist_add_head(&area->node, &object->area_list);
 out_unlock:
@@ -786,7 +783,7 @@ static void object_no_scan(unsigned long ptr)
  * processed later once kmemleak is fully initialized.
  */
 static void __init log_early(int op_type, const void *ptr, size_t size,
-			     int min_count, unsigned long offset, size_t length)
+			     int min_count)
 {
 	unsigned long flags;
 	struct early_log *log;
@@ -808,8 +805,6 @@ static void __init log_early(int op_type, const void *ptr, size_t size,
 	log->ptr = ptr;
 	log->size = size;
 	log->min_count = min_count;
-	log->offset = offset;
-	log->length = length;
 	if (op_type == KMEMLEAK_ALLOC)
 		log->trace_len = __save_stack_trace(log->trace);
 	crt_early_log++;
@@ -858,7 +853,7 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		create_object((unsigned long)ptr, size, min_count, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
@@ -873,7 +868,7 @@ void __ref kmemleak_free(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_full((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
@@ -888,7 +883,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_part((unsigned long)ptr, size);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+		log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
@@ -903,7 +898,7 @@ void __ref kmemleak_not_leak(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_gray_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
@@ -919,22 +914,21 @@ void __ref kmemleak_ignore(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_black_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
-			      size_t length, gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
-		add_scan_area((unsigned long)ptr, offset, length, gfp);
+		add_scan_area((unsigned long)ptr, size, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+		log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
@@ -948,7 +942,7 @@ void __ref kmemleak_no_scan(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		object_no_scan((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
@@ -1075,9 +1069,9 @@ static void scan_object(struct kmemleak_object *object)
 		}
 	} else
 		hlist_for_each_entry(area, elem, &object->area_list, node)
-			scan_block((void *)(object->pointer + area->offset),
-				   (void *)(object->pointer + area->offset
-					    + area->length), object, 0);
+			scan_block((void *)area->start,
+				   (void *)(area->start + area->size),
+				   object, 0);
 out:
 	spin_unlock_irqrestore(&object->lock, flags);
 }
@@ -1642,8 +1636,7 @@ void __init kmemleak_init(void)
 			kmemleak_ignore(log->ptr);
 			break;
 		case KMEMLEAK_SCAN_AREA:
-			kmemleak_scan_area(log->ptr, log->offset, log->length,
-					   GFP_KERNEL);
+			kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
 			break;
 		case KMEMLEAK_NO_SCAN:
 			kmemleak_no_scan(log->ptr);
diff --git a/mm/slab.c b/mm/slab.c
index 646db3085193..d2713a944ebd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2584,8 +2584,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 		 * kmemleak does not treat the ->s_mem pointer as a reference
 		 * to the object. Otherwise we will not report the leak.
 		 */
-		kmemleak_scan_area(slabp, offsetof(struct slab, list),
-				   sizeof(struct list_head), local_flags);
+		kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
+				   local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
-- 
cgit v1.2.3


From 8c10cbdb4af642d9a2efb45ea89251aaab905360 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 19 Oct 2009 12:04:53 +0200
Subject: nfsd: use STATEID_FMT and STATEID_VAL for printing stateids

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 46 ++++++++++++++++------------------------------
 include/linux/nfsd/state.h |  7 +++++++
 2 files changed, 23 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 42dab9587afe..c8b621a120cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2416,11 +2416,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 
 	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
 
-	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
-	             dp->dl_stateid.si_boot,
-	             dp->dl_stateid.si_stateownerid,
-	             dp->dl_stateid.si_fileid,
-	             dp->dl_stateid.si_generation);
+	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
+		STATEID_VAL(&dp->dl_stateid));
 out:
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
 			&& flag == NFS4_OPEN_DELEGATE_NONE
@@ -2510,9 +2507,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	status = nfs_ok;
 
-	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
-	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
-	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+	dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(&stp->st_stateid));
 out:
 	if (fp)
 		put_nfs4_file(fp);
@@ -2678,9 +2674,8 @@ STALE_STATEID(stateid_t *stateid)
 {
 	if (time_after((unsigned long)boot_time,
 			(unsigned long)stateid->si_boot)) {
-		dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2692,9 +2687,8 @@ EXPIRED_STATEID(stateid_t *stateid)
 	if (time_before((unsigned long)boot_time,
 			((unsigned long)stateid->si_boot)) &&
 	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
-		dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2708,9 +2702,8 @@ stateid_error_map(stateid_t *stateid)
 	if (EXPIRED_STATEID(stateid))
 		return nfserr_expired;
 
-	dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
-		stateid->si_boot, stateid->si_stateownerid,
-		stateid->si_fileid, stateid->si_generation);
+	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+		STATEID_VAL(stateid));
 	return nfserr_bad_stateid;
 }
 
@@ -2896,10 +2889,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 status;
 
-	dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
-			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
-		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
-		stateid->si_generation);
+	dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
+		seqid, STATEID_VAL(stateid));
 
 	*stpp = NULL;
 	*sopp = NULL;
@@ -3031,12 +3022,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	sop->so_confirmed = 1;
 	update_stateid(&stp->st_stateid);
 	memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
-	dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 
-		"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
-		         stp->st_stateid.si_boot,
-		         stp->st_stateid.si_stateownerid,
-		         stp->st_stateid.si_fileid,
-		         stp->st_stateid.si_generation);
+	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
 
 	nfsd4_create_clid_dir(sop->so_client);
 out:
@@ -3295,9 +3282,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
 	struct nfs4_file *fp;
 	struct nfs4_delegation *dl;
 
-	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
-                    stid->si_boot, stid->si_stateownerid,
-                    stid->si_fileid, stid->si_generation);
+	dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(stid));
 
 	fp = find_file(ino);
 	if (!fp)
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index b38d11324189..5aadf8aa3a97 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,13 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
 struct nfsd4_cb_sequence {
 	/* args/res */
 	u32			cbs_minorversion;
-- 
cgit v1.2.3


From d81c45e1c9369855901420f79114852eba2ea16a Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Fri, 16 Oct 2009 09:20:41 +0800
Subject: ACPI: Notify the _PPC evaluation status to the platform

According to the ACPI spec(section 8.4.4.3) OSPM should convey the _PPC
evaluations status to the platform if there exists the _OST object.
The _OST contains two arguments:
	The first is the PERFORMANCE notificatin event.
	The second is the status of _PPC object.
OSPM will convey the _PPC evaluation status to the platform.
Of course when the module parameter of "ignore_ppc" is added, OSPM won't
evaluate the _PPC object. But it will call the _OST object.

At the same time the _OST object will be evaluated only when the PERFORMANCE
notification event is received.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_core.c    |  6 ++---
 drivers/acpi/processor_perflib.c | 50 +++++++++++++++++++++++++++++++++++++---
 include/acpi/processor.h         |  5 ++--
 3 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index c567b46dfa0f..773d7e76f301 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -722,7 +722,7 @@ static void acpi_processor_notify(struct acpi_device *device, u32 event)
 	switch (event) {
 	case ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
 		saved = pr->performance_platform_limit;
-		acpi_processor_ppc_has_changed(pr);
+		acpi_processor_ppc_has_changed(pr, 1);
 		if (saved == pr->performance_platform_limit)
 			break;
 		acpi_bus_generate_proc_event(device, event,
@@ -758,7 +758,7 @@ static int acpi_cpu_soft_notify(struct notifier_block *nfb,
 	struct acpi_processor *pr = per_cpu(processors, cpu);
 
 	if (action == CPU_ONLINE && pr) {
-		acpi_processor_ppc_has_changed(pr);
+		acpi_processor_ppc_has_changed(pr, 0);
 		acpi_processor_cst_has_changed(pr);
 		acpi_processor_tstate_has_changed(pr);
 	}
@@ -830,7 +830,7 @@ static int acpi_processor_add(struct acpi_device *device)
 	arch_acpi_processor_cleanup_pdc(pr);
 
 #ifdef CONFIG_CPU_FREQ
-	acpi_processor_ppc_has_changed(pr);
+	acpi_processor_ppc_has_changed(pr, 0);
 #endif
 	acpi_processor_get_throttling_info(pr);
 	acpi_processor_get_limit_info(pr);
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 8ba0ed0b9ddb..fc1f49bbbeef 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -152,15 +152,59 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 	return 0;
 }
 
-int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
+#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE	0x80
+/*
+ * acpi_processor_ppc_ost: Notify firmware the _PPC evaluation status
+ * @handle: ACPI processor handle
+ * @status: the status code of _PPC evaluation
+ *	0: success. OSPM is now using the performance state specificed.
+ *	1: failure. OSPM has not changed the number of P-states in use
+ */
+static void acpi_processor_ppc_ost(acpi_handle handle, int status)
+{
+	union acpi_object params[2] = {
+		{.type = ACPI_TYPE_INTEGER,},
+		{.type = ACPI_TYPE_INTEGER,},
+	};
+	struct acpi_object_list arg_list = {2, params};
+	acpi_handle temp;
+
+	params[0].integer.value = ACPI_PROCESSOR_NOTIFY_PERFORMANCE;
+	params[1].integer.value =  status;
+
+	/* when there is no _OST , skip it */
+	if (ACPI_FAILURE(acpi_get_handle(handle, "_OST", &temp)))
+		return;
+
+	acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
+	return;
+}
+
+int acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag)
 {
 	int ret;
 
-	if (ignore_ppc)
+	if (ignore_ppc) {
+		/*
+		 * Only when it is notification event, the _OST object
+		 * will be evaluated. Otherwise it is skipped.
+		 */
+		if (event_flag)
+			acpi_processor_ppc_ost(pr->handle, 1);
 		return 0;
+	}
 
 	ret = acpi_processor_get_platform_limit(pr);
-
+	/*
+	 * Only when it is notification event, the _OST object
+	 * will be evaluated. Otherwise it is skipped.
+	 */
+	if (event_flag) {
+		if (ret < 0)
+			acpi_processor_ppc_ost(pr->handle, 1);
+		else
+			acpi_processor_ppc_ost(pr->handle, 0);
+	}
 	if (ret < 0)
 		return (ret);
 	else
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 740ac3ad8fd0..a920237f7c62 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -294,7 +294,7 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
 #ifdef CONFIG_CPU_FREQ
 void acpi_processor_ppc_init(void);
 void acpi_processor_ppc_exit(void);
-int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
+int acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag);
 #else
 static inline void acpi_processor_ppc_init(void)
 {
@@ -304,7 +304,8 @@ static inline void acpi_processor_ppc_exit(void)
 {
 	return;
 }
-static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
+static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
+								int event_flag)
 {
 	static unsigned int printout = 1;
 	if (printout) {
-- 
cgit v1.2.3


From 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Thu, 12 Nov 2009 11:19:44 -0800
Subject: IB/mlx4: Remove limitation on LSO header size

Current code has a limitation: an LSO header is not allowed to cross a
64 byte boundary.  This patch removes this limitation by setting the
WQE RR for large headers thus allowing LSO headers of any size.  The
extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64
to 128 bytes, assuming this is reasonable upper limit for header
length.  Also, this patch will cause IB_DEVICE_UD_TSO to be set only
for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version
2.6.000 and higher.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c |  2 +-
 drivers/infiniband/hw/mlx4/qp.c   | 24 ++++++++++++------------
 drivers/net/mlx4/fw.c             |  1 +
 include/linux/mlx4/device.h       |  1 +
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47a10b8..e596537ff353 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
-	if (dev->dev->caps.max_gso_sz)
+	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 518d561970aa..847030c89a8d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@ enum {
 	/*
 	 * Largest possible UD header: send with GRH and immediate data.
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 72
+	MLX4_IB_UD_HEADER_SIZE		= 72,
+	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
 struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
 };
 
 enum {
-	MLX4_IB_MIN_SQ_STRIDE = 6
+	MLX4_IB_MIN_SQ_STRIDE	= 6,
+	MLX4_IB_CACHE_LINE_SIZE	= 64,
 };
 
 static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
 	case IB_QPT_UD:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_datagram_seg) +
-			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
 	case IB_QPT_UC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
@@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 
 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz)
+			 __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
 
-	/*
-	 * This is a temporary limitation and will be removed in
-	 * a forthcoming FW release:
-	 */
-	if (unlikely(halign > 64))
-		return -EINVAL;
+	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+		*blh = cpu_to_be32(1 << 6);
 
 	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
 		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	__be32 dummy;
 	__be32 *lso_wqe;
 	__be32 uninitialized_var(lso_hdr_sz);
+	__be32 blh;
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		lso_wqe = &dummy;
+		blh = 0;
 
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 			err = -ENOMEM;
@@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
 			if (wr->opcode == IB_WR_LSO) {
-				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
 				if (unlikely(err)) {
 					*bad_wr = wr;
 					goto out;
@@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		}
 
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
-			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
 		stamp = ind + qp->sq_spare_wqes;
 		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602172fc..7194be3a2894 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
 		[ 9] = "Q_Key violation counter",
 		[10] = "VMM",
 		[12] = "DPDP",
+		[15] = "Big LSO headers",
 		[16] = "MW support",
 		[17] = "APM support",
 		[18] = "Atomic ops support",
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c7bcbb..e92d1bfdb330 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@ enum {
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
 	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
 	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
+	MLX4_DEV_CAP_FLAG_BLH		= 1 << 15,
 	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
 	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
-- 
cgit v1.2.3


From 0a3adadee42f2865bb867b8c5f4955b7def9baad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 4 Nov 2009 18:12:35 -0500
Subject: nfsd: make fs/nfsd/vfs.h for common includes

None of this stuff is used outside nfsd, so move it out of the common
linux include directory.

Actually, probably none of the stuff in include/linux/nfsd/nfsd.h really
belongs there, so later we may remove that file entirely.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/lockd.c           |  1 +
 fs/nfsd/nfs2acl.c         |  1 +
 fs/nfsd/nfs3acl.c         |  1 +
 fs/nfsd/nfs3proc.c        |  1 +
 fs/nfsd/nfs4proc.c        |  1 +
 fs/nfsd/nfs4recover.c     |  1 +
 fs/nfsd/nfs4state.c       |  1 +
 fs/nfsd/nfs4xdr.c         |  1 +
 fs/nfsd/nfsfh.c           |  1 +
 fs/nfsd/nfsproc.c         |  1 +
 fs/nfsd/nfssvc.c          |  1 +
 fs/nfsd/vfs.c             |  1 +
 fs/nfsd/vfs.h             | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/nfsd.h | 87 +----------------------------------------
 14 files changed, 111 insertions(+), 86 deletions(-)
 create mode 100644 fs/nfsd/vfs.h

(limited to 'include')

diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..812bc64874f6 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index e2a17f0a96a7..38c883d48b02 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -14,6 +14,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index ff73596eb550..526d85a65f76 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -13,6 +13,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..1a259d313e14 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -25,6 +25,7 @@
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/nfs3.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..60a93cdefef5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/nfsd/xdr4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..c7a6b245c7ad 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -47,6 +47,7 @@
 #include <linux/crypto.h>
 #include <linux/sched.h>
 #include <linux/mount.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c8b621a120cd..850960e5d626 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -56,6 +56,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..db0fc55670b3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -57,6 +57,7 @@
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..d0d8a217a3ea 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/nfsd/nfsd.h>
+#include "vfs.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FH
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c5393d1b8955..6c967e1ba37b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -24,6 +24,7 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
+#include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
 typedef struct svc_buf	svc_buf;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..2944b31dcbe6 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -35,6 +35,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 638573968dcf..a7038ede671a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -56,6 +56,7 @@
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
 #include <linux/ima.h>
+#include "vfs.h"
 
 #include <asm/uaccess.h>
 
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..b8011fd2fcab
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_VFS_H
+#define LINUX_NFSD_VFS_H
+
+/*
+ * Flags for nfsd_permission
+ */
+#define NFSD_MAY_NOP		0
+#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
+#define NFSD_MAY_READ		4 /* == MAY_READ */
+#define NFSD_MAY_SATTR		8
+#define NFSD_MAY_TRUNC		16
+#define NFSD_MAY_LOCK		32
+#define NFSD_MAY_OWNER_OVERRIDE	64
+#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
+#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+
+#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+
+/*
+ * Callback function for readdir
+ */
+typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
+/* nfsd/vfs.c */
+int		fh_lock_parent(struct svc_fh *, struct dentry *);
+int		nfsd_racache_init(int);
+void		nfsd_racache_shutdown(void);
+int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+		                struct svc_export **expp);
+__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int, struct svc_fh *);
+__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int,
+				struct svc_export **, struct dentry **);
+__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+				struct iattr *, int, time_t);
+#ifdef CONFIG_NFSD_V4
+__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
+                    struct nfs4_acl *);
+int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
+#endif /* CONFIG_NFSD_V4 */
+__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				int type, dev_t rdev, struct svc_fh *res);
+#ifdef CONFIG_NFSD_V3
+__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				struct svc_fh *res, int createmode,
+				u32 *verifier, int *truncp, int *created);
+__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
+				loff_t, unsigned long);
+#endif /* CONFIG_NFSD_V3 */
+__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+				int, struct file **);
+void		nfsd_close(struct file *);
+__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
+				loff_t, struct kvec *, int, unsigned long *);
+__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+				loff_t, struct kvec *,int, unsigned long *, int *);
+__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+				char *, int *);
+__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, char *path, int plen,
+				struct svc_fh *res, struct iattr *);
+__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
+				char *, int, struct svc_fh *);
+__be32		nfsd_rename(struct svc_rqst *,
+				struct svc_fh *, char *, int,
+				struct svc_fh *, char *, int);
+__be32		nfsd_remove(struct svc_rqst *,
+				struct svc_fh *, char *, int);
+__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
+				char *name, int len);
+int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
+				unsigned long size);
+__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
+			     loff_t *, struct readdir_cd *, filldir_t);
+__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+				struct kstatfs *, int access);
+
+int		nfsd_notify_change(struct inode *, struct iattr *);
+__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
+				struct dentry *, int);
+int		nfsd_sync_dir(struct dentry *dp);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
+#endif /* LINUX_NFSD_VFS_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 510ffdd5020e..e4518d090a8c 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -25,30 +25,10 @@
  */
 #define NFSD_SUPPORTED_MINOR_VERSION	1
 
-/*
- * Flags for nfsd_permission
- */
-#define NFSD_MAY_NOP		0
-#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
-#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
-#define NFSD_MAY_READ		4 /* == MAY_READ */
-#define NFSD_MAY_SATTR		8
-#define NFSD_MAY_TRUNC		16
-#define NFSD_MAY_LOCK		32
-#define NFSD_MAY_OWNER_OVERRIDE	64
-#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
-#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
-
-#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
-#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
-
-/*
- * Callback function for readdir
- */
 struct readdir_cd {
 	__be32			err;	/* 0, nfserr, or nfserr_eof */
 };
-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
 
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
@@ -73,69 +53,6 @@ int		nfsd_nrpools(void);
 int		nfsd_get_nrthreads(int n, int *);
 int		nfsd_set_nrthreads(int n, int *);
 
-/* nfsd/vfs.c */
-int		fh_lock_parent(struct svc_fh *, struct dentry *);
-int		nfsd_racache_init(int);
-void		nfsd_racache_shutdown(void);
-int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
-		                struct svc_export **expp);
-__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int, struct svc_fh *);
-__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int,
-				struct svc_export **, struct dentry **);
-__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
-				struct iattr *, int, time_t);
-#ifdef CONFIG_NFSD_V4
-__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
-                    struct nfs4_acl *);
-int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
-#endif /* CONFIG_NFSD_V4 */
-__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
-__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				struct svc_fh *res, int createmode,
-				u32 *verifier, int *truncp, int *created);
-__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
-				loff_t, unsigned long);
-#endif /* CONFIG_NFSD_V3 */
-__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
-				int, struct file **);
-void		nfsd_close(struct file *);
-__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
-				loff_t, struct kvec *, int, unsigned long *);
-__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-				loff_t, struct kvec *,int, unsigned long *, int *);
-__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
-				char *, int *);
-__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, char *path, int plen,
-				struct svc_fh *res, struct iattr *);
-__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
-				char *, int, struct svc_fh *);
-__be32		nfsd_rename(struct svc_rqst *,
-				struct svc_fh *, char *, int,
-				struct svc_fh *, char *, int);
-__be32		nfsd_remove(struct svc_rqst *,
-				struct svc_fh *, char *, int);
-__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
-				char *name, int len);
-int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
-				unsigned long size);
-__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
-			     loff_t *, struct readdir_cd *, filldir_t);
-__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
-				struct kstatfs *, int access);
-
-int		nfsd_notify_change(struct inode *, struct iattr *);
-__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
-				struct dentry *, int);
-int		nfsd_sync_dir(struct dentry *dp);
-
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
 extern struct svc_version nfsd_acl_version2;
@@ -147,8 +64,6 @@ extern struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
-struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
-int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 #endif
 
 enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-- 
cgit v1.2.3


From a7ca1f00ed2921b804d7ebda0f6fca8c9078fa42 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 16 Nov 2009 09:30:33 -0800
Subject: RDMA/ucma: Add option to manually set IB path

Export rdma_set_ib_paths to user space to allow applications to
manually set the IB path used for connections.  This allows
alternative ways for a user space application or library to obtain
path record information, including retrieving path information
from cached data, avoiding direct interaction with the IB SA.
The IB SA is a single, centralized entity that can limit scaling
on large clusters running MPI applications.

Future changes to the rdma cm can expand on this framework to
support the full range of features allowed by the IB CM, such as
separate forward and reverse paths and APM.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Reviewed-By: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/sa_query.c |  6 +++++
 drivers/infiniband/core/ucma.c     | 49 ++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_sa.h               |  6 +++++
 include/rdma/ib_user_sa.h          | 16 +++++++++++++
 include/rdma/rdma_user_cm.h        |  6 +++--
 5 files changed, 81 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82543716d59e..7e1ffd8ccd5c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@ retry:
 	return ret ? ret : id;
 }
 
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+	ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				    int status,
 				    struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c4b0f4..f1cbd26a9de0 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -43,6 +43,7 @@
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
 	return ret;
 }
 
+static int ucma_set_ib_path(struct ucma_context *ctx,
+			    struct ib_path_rec_data *path_data, size_t optlen)
+{
+	struct ib_sa_path_rec sa_path;
+	struct rdma_cm_event event;
+	int ret;
+
+	if (optlen % sizeof(*path_data))
+		return -EINVAL;
+
+	for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+		if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+					 IB_PATH_BIDIRECTIONAL))
+			break;
+	}
+
+	if (!optlen)
+		return -EINVAL;
+
+	ib_sa_unpack_path(path_data->path_rec, &sa_path);
+	ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+	if (ret)
+		return ret;
+
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+			      void *optval, size_t optlen)
+{
+	int ret;
+
+	switch (optname) {
+	case RDMA_OPTION_IB_PATH:
+		ret = ucma_set_ib_path(ctx, optval, optlen);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
 static int ucma_set_option_level(struct ucma_context *ctx, int level,
 				 int optname, void *optval, size_t optlen)
 {
@@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
 	case RDMA_OPTION_ID:
 		ret = ucma_set_option_id(ctx, optname, optval, optlen);
 		break;
+	case RDMA_OPTION_IB:
+		ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 3841c1aff692..1082afaed158 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 			 struct ib_sa_path_rec *rec,
 			 struct ib_ah_attr *ah_attr);
 
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+
 #endif /* IB_SA_H */
diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h
index 659120157e14..cfc7c9ba781e 100644
--- a/include/rdma/ib_user_sa.h
+++ b/include/rdma/ib_user_sa.h
@@ -35,6 +35,22 @@
 
 #include <linux/types.h>
 
+enum {
+	IB_PATH_GMP		= 1,
+	IB_PATH_PRIMARY		= (1<<1),
+	IB_PATH_ALTERNATE	= (1<<2),
+	IB_PATH_OUTBOUND	= (1<<3),
+	IB_PATH_INBOUND		= (1<<4),
+	IB_PATH_INBOUND_REVERSE = (1<<5),
+	IB_PATH_BIDIRECTIONAL	= IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
+};
+
+struct ib_path_rec_data {
+	__u32	flags;
+	__u32	reserved;
+	__u32	path_rec[16];
+};
+
 struct ib_user_path_rec {
 	__u8	dgid[16];
 	__u8	sgid[16];
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index c55705460b87..1d165022c02d 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -215,12 +215,14 @@ struct rdma_ucm_event_resp {
 
 /* Option levels */
 enum {
-	RDMA_OPTION_ID		= 0
+	RDMA_OPTION_ID		= 0,
+	RDMA_OPTION_IB		= 1
 };
 
 /* Option details */
 enum {
-	RDMA_OPTION_ID_TOS	= 0
+	RDMA_OPTION_ID_TOS	= 0,
+	RDMA_OPTION_IB_PATH	= 1
 };
 
 struct rdma_ucm_set_option {
-- 
cgit v1.2.3


From 6266ed6e4164466177238b11ecb825a3a108a3e4 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 19 Nov 2009 12:55:22 -0800
Subject: RDMA/cma: Replace net_device pointer with index

Provide the device interface when resolving route information to
ensure that the correct outbound device is used.  This will also
simplify processing of sin6_scope_id for IPv6 support.

Based on work from:
David Wilder <dwilder@us.ibm.com>
Jason Gunthorpe <jgunthrope@obsidianresearch.com>

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/addr.c | 14 +++++++++++++-
 drivers/infiniband/core/cma.c  |  2 +-
 include/rdma/ib_addr.h         |  2 +-
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 373f1118d57b..788a02ef01dd 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -107,7 +107,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 	if (dst_dev_addr)
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
-	dev_addr->src_dev = dev;
+	dev_addr->bound_dev_if = dev->ifindex;
 	return 0;
 }
 EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +117,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 	struct net_device *dev;
 	int ret = -EADDRNOTAVAIL;
 
+	if (dev_addr->bound_dev_if) {
+		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		dev_put(dev);
+		return ret;
+	}
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		dev = ip_dev_find(&init_net,
@@ -231,6 +240,8 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
 	memset(&fl, 0, sizeof fl);
 	fl.nl_u.ip4_u.daddr = dst_ip;
 	fl.nl_u.ip4_u.saddr = src_ip;
+	fl.oif = addr->bound_dev_if;
+
 	ret = ip_route_output_key(&init_net, &rt, &fl);
 	if (ret)
 		goto out;
@@ -279,6 +290,7 @@ static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
 	memset(&fl, 0, sizeof fl);
 	fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
 	fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+	fl.oif = addr->bound_dev_if;
 
 	dst = ip6_route_output(&init_net, NULL, &fl);
 	if (!dst)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 052b4c01745d..699ad12b3a2f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2820,7 +2820,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
 
 	dev_addr = &id_priv->id.route.addr.dev_addr;
 
-	if ((dev_addr->src_dev == ndev) &&
+	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
 	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
 		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
 		       ndev->name, &id_priv->id);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 483057b2f4b4..27f17cc2c919 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -61,7 +61,7 @@ struct rdma_dev_addr {
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
 	enum rdma_node_type dev_type;
-	struct net_device *src_dev;
+	int bound_dev_if;
 };
 
 /**
-- 
cgit v1.2.3


From c4315d85f9b76834289fd503796c01b8311c4b84 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 19 Nov 2009 12:57:18 -0800
Subject: IB/addr: Store net_device type instead of translating to RDMA
 transport

The struct rdma_dev_addr stores net_device address information:
the source device address, destination hardware address, and
broadcast address.  For consistency, store the net_device type
rather than converting it to the rdma_node_type.

The type indicates the format of the various hardware addresses,
which is what we're concerned with, and not the RDMA node type
that the address may map to.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/addr.c | 13 +------------
 drivers/infiniband/core/cma.c  |  6 +++---
 include/rdma/ib_addr.h         |  3 ++-
 3 files changed, 6 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index b59ba7ccef0e..de5fe161a1b9 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -36,7 +36,6 @@
 #include <linux/mutex.h>
 #include <linux/inetdevice.h>
 #include <linux/workqueue.h>
-#include <linux/if_arp.h>
 #include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
@@ -92,17 +91,7 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		     const unsigned char *dst_dev_addr)
 {
-	switch (dev->type) {
-	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = RDMA_NODE_IB_CA;
-		break;
-	case ARPHRD_ETHER:
-		dev_addr->dev_type = RDMA_NODE_RNIC;
-		break;
-	default:
-		return -EADDRNOTAVAIL;
-	}
-
+	dev_addr->dev_type = dev->type;
 	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 	if (dst_dev_addr)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 699ad12b3a2f..b305b5c17f8d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,11 +330,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	switch (rdma_node_get_transport(dev_addr->dev_type)) {
-	case RDMA_TRANSPORT_IB:
+	switch (dev_addr->dev_type) {
+	case ARPHRD_INFINIBAND:
 		ib_addr_get_sgid(dev_addr, &gid);
 		break;
-	case RDMA_TRANSPORT_IWARP:
+	case ARPHRD_ETHER:
 		iw_addr_get_sgid(dev_addr, &gid);
 		break;
 	default:
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 27f17cc2c919..3a39c55d2b9a 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -36,6 +36,7 @@
 
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
@@ -60,7 +61,7 @@ struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
-	enum rdma_node_type dev_type;
+	unsigned short dev_type;
 	int bound_dev_if;
 };
 
-- 
cgit v1.2.3


From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 19 Nov 2009 13:26:06 -0800
Subject: RDMA/cm: fix loopback address support

The RDMA CM is intended to support the use of a loopback address
when establishing a connection; however, the behavior of the CM
when loopback addresses are used is confusing and does not always
work, depending on whether loopback was specified by the server,
the client, or both.

The defined behavior of rdma_bind_addr is to associate an RDMA
device with an rdma_cm_id, as long as the user specified a non-
zero address.  (ie they weren't just trying to reserve a port)
Currently, if the loopback address is passed to rdam_bind_addr,
no device is associated with the rdma_cm_id.  Fix this.

If a loopback address is specified by the client as the destination
address for a connection, it will fail to establish a connection.
This is true even if the server is listing across all addresses or
on the loopback address itself.  The issue is that the server tries
to translate the IP address carried in the REQ message to a local
net_device address, which fails.  The translation is not needed in
this case, since the REQ carries the actual HW address that should
be used.

Finally, cleanup loopback support to be more transport neutral.
Replace separate calls to get/set the sgid and dgid from the
device address to a single call that behaves correctly depending
on the format of the device address.  And support both IPv4 and
IPv6 address formats.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>

[ Fixed RDS build by s/ib_addr_get/rdma_addr_get/  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c  | 77 +++++++++++++++++++++++-------------------
 drivers/infiniband/core/ucma.c |  8 ++---
 include/rdma/ib_addr.h         | 31 ++++++-----------
 net/rds/ib.c                   |  4 +--
 net/rds/iw.c                   |  4 +--
 5 files changed, 61 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index b305b5c17f8d..38867a46d39e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	switch (dev_addr->dev_type) {
-	case ARPHRD_INFINIBAND:
-		ib_addr_get_sgid(dev_addr, &gid);
-		break;
-	case ARPHRD_ETHER:
-		iw_addr_get_sgid(dev_addr, &gid);
-		break;
-	default:
-		return -ENODEV;
-	}
-
+	rdma_addr_get_sgid(dev_addr, &gid);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	if (rt->num_paths == 2)
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
-	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
-	ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-				&id->route.addr.dev_addr);
-	if (ret)
-		goto destroy_id;
+	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+	} else {
+		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+					&rt->addr.dev_addr);
+		if (ret)
+			goto destroy_id;
+	}
+	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
 			  ip_ver, port, src, dst);
 
-	ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-				&id->route.addr.dev_addr);
-	if (ret)
-		goto err;
+	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+					&id->route.addr.dev_addr);
+		if (ret)
+			goto err;
+	}
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -1565,8 +1563,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	struct sockaddr_in6 *sin6;
 
 	memset(&path_rec, 0, sizeof path_rec);
-	ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
-	ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
@@ -1781,7 +1779,11 @@ port_found:
 	if (ret)
 		goto out;
 
-	ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	id_priv->id.route.addr.dev_addr.dev_type =
+		(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+		ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
 	id_priv->id.port_num = p;
 	cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1841,7 @@ out:
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
-	struct sockaddr_in *src_in, *dst_in;
+	struct sockaddr *src, *dst;
 	union ib_gid gid;
 	int ret;
 
@@ -1853,14 +1855,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 			goto err;
 	}
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
-	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
-	if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
-		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
-		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
-		src_in->sin_family = dst_in->sin_family;
-		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+	if (cma_zero_addr(src)) {
+		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+		if ((src->sa_family = dst->sa_family) == AF_INET) {
+			((struct sockaddr_in *) src)->sin_addr.s_addr =
+				((struct sockaddr_in *) dst)->sin_addr.s_addr;
+		} else {
+			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+				       &((struct sockaddr_in6 *) dst)->sin6_addr);
+		}
 	}
 
 	work->id = id_priv;
@@ -2089,7 +2096,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
 		return -EINVAL;
 
-	if (!cma_any_addr(addr)) {
+	if (cma_loopback_addr(addr)) {
+		ret = cma_bind_loopback(id_priv);
+	} else if (!cma_zero_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
 		if (ret)
 			goto err1;
@@ -2108,7 +2117,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	if (!cma_any_addr(addr)) {
+	if (id_priv->cma_dev) {
 		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 		mutex_unlock(&lock);
@@ -2721,7 +2730,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
 	if (id_priv->id.ps == RDMA_PS_UDP)
 		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
-	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
 	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	rec.join_state = 1;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index f1cbd26a9de0..b2e16c332d5b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -563,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 	switch (route->num_paths) {
 	case 0:
 		dev_addr = &route->addr.dev_addr;
-		ib_addr_get_dgid(dev_addr,
-				 (union ib_gid *) &resp->ib_route[0].dgid);
-		ib_addr_get_sgid(dev_addr,
-				 (union ib_gid *) &resp->ib_route[0].sgid);
+		rdma_addr_get_dgid(dev_addr,
+				   (union ib_gid *) &resp->ib_route[0].dgid);
+		rdma_addr_get_sgid(dev_addr,
+				   (union ib_gid *) &resp->ib_route[0].sgid);
 		resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 		break;
 	case 2:
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 3a39c55d2b9a..fa0d52b8e622 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -122,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
 	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
 }
 
-static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 {
-	memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
+	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 
-static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+	memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
+	memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
-static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+	memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
-{
-	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
-}
-
-static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+	memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
 #endif /* IB_ADDR_H */
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 536ebe5d3f6b..3b8992361042 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index db224f7c2937..b28fa8525b24 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;
-- 
cgit v1.2.3


From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 6 Aug 2009 15:41:34 -0400
Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load
 averages"

This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad.

This helps in an entirely cached workload but not necessarily in
workloads that require waiting on disk.

Conflicts:

	include/linux/sunrpc/svc.h
	net/sunrpc/svc_xprt.c

Reported-by: Simon Kirby <sim@hostway.ca>
Tested-by: Jesper Krogh <jesper@krogh.cc>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h |  3 ---
 net/sunrpc/svc_xprt.c      | 31 +++++++++----------------------
 2 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 52e8cb0a7569..d1567d627557 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -29,7 +29,6 @@ struct svc_pool_stats {
 	unsigned long	packets;
 	unsigned long	sockets_queued;
 	unsigned long	threads_woken;
-	unsigned long	overloads_avoided;
 	unsigned long	threads_timedout;
 };
 
@@ -50,7 +49,6 @@ struct svc_pool {
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
-	int			sp_nwaking;	/* number of threads woken but not yet active */
 	struct svc_pool_stats	sp_stats;	/* statistics on pool operation */
 } ____cacheline_aligned_in_smp;
 
@@ -284,7 +282,6 @@ struct svc_rqst {
 						 * cache pages */
 	wait_queue_head_t	rq_wait;	/* synchronization */
 	struct task_struct	*rq_task;	/* service thread */
-	int			rq_waking;	/* 1 if thread is being woken */
 };
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index df124f78ee48..2c58b75a236f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -16,8 +16,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-#define SVC_MAX_WAKING 5
-
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
-	int thread_avail;
 
 	if (!(xprt->xpt_flags &
 	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -318,6 +315,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 
 	spin_lock_bh(&pool->sp_lock);
 
+	if (!list_empty(&pool->sp_threads) &&
+	    !list_empty(&pool->sp_sockets))
+		printk(KERN_ERR
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
+
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		/* Don't enqueue dead transports */
 		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
@@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	}
 
  process:
-	/* Work out whether threads are available */
-	thread_avail = !list_empty(&pool->sp_threads);	/* threads are asleep */
-	if (pool->sp_nwaking >= SVC_MAX_WAKING) {
-		/* too many threads are runnable and trying to wake up */
-		thread_avail = 0;
-		pool->sp_stats.overloads_avoided++;
-	}
-
-	if (thread_avail) {
+	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
 				   rq_list);
@@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-		rqstp->rq_waking = 1;
-		pool->sp_nwaking++;
 		pool->sp_stats.threads_woken++;
 		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
@@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EINTR;
 
 	spin_lock_bh(&pool->sp_lock);
-	if (rqstp->rq_waking) {
-		rqstp->rq_waking = 0;
-		pool->sp_nwaking--;
-		BUG_ON(pool->sp_nwaking < 0);
-	}
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
@@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 	struct svc_pool *pool = p;
 
 	if (p == SEQ_START_TOKEN) {
-		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n");
 		return 0;
 	}
 
-	seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+	seq_printf(m, "%u %lu %lu %lu %lu\n",
 		pool->sp_id,
 		pool->sp_stats.packets,
 		pool->sp_stats.sockets_queued,
 		pool->sp_stats.threads_woken,
-		pool->sp_stats.overloads_avoided,
 		pool->sp_stats.threads_timedout);
 
 	return 0;
-- 
cgit v1.2.3


From ef61aae4ddf1dbd0e9b6ad21e2e57632a8fe76f6 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Mon, 7 Dec 2009 14:20:06 +0000
Subject: sh: add a start_transfer() callback to the LCDC driver

This patch adds a ->start_transfer() callback to the
driver sh_mobile_lcdcfb.c. The callback is used to
program the LCDC panel in the case of one-shot mode.

Needed by the LCD controller used on the KFR2R09 board.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/sh_mobile_lcdcfb.c | 10 +++++++++-
 include/video/sh_mobile_lcdc.h   |  2 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index b4b5de930cf5..d346bbab6cad 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -281,6 +281,7 @@ static void sh_mobile_lcdc_deferred_io(struct fb_info *info,
 				       struct list_head *pagelist)
 {
 	struct sh_mobile_lcdc_chan *ch = info->par;
+	struct sh_mobile_lcdc_board_cfg	*bcfg = &ch->cfg.board_cfg;
 
 	/* enable clocks before accessing hardware */
 	sh_mobile_lcdc_clk_on(ch->lcdc);
@@ -305,10 +306,17 @@ static void sh_mobile_lcdc_deferred_io(struct fb_info *info,
 
 		/* trigger panel update */
 		dma_map_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
+		if (bcfg->start_transfer)
+			bcfg->start_transfer(bcfg->board_data, ch,
+					     &sh_mobile_lcdc_sys_bus_ops);
 		lcdc_write_chan(ch, LDSM2R, 1);
 		dma_unmap_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
-	} else
+	} else {
+		if (bcfg->start_transfer)
+			bcfg->start_transfer(bcfg->board_data, ch,
+					     &sh_mobile_lcdc_sys_bus_ops);
 		lcdc_write_chan(ch, LDSM2R, 1);
+	}
 }
 
 static void sh_mobile_lcdc_deferred_io_touch(struct fb_info *info)
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 25144ab22b95..288205457713 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -50,6 +50,8 @@ struct sh_mobile_lcdc_board_cfg {
 	void *board_data;
 	int (*setup_sys)(void *board_data, void *sys_ops_handle,
 			 struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
+	void (*start_transfer)(void *board_data, void *sys_ops_handle,
+			       struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
 	void (*display_on)(void *board_data);
 	void (*display_off)(void *board_data);
 };
-- 
cgit v1.2.3


From 876fba43cc810e3c37ce26995933f9547b83cb0e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 11 Nov 2009 15:22:15 +0100
Subject: ACPI: add const to acpi_check_resource_conflict()

acpi_check_resource_conflict() doesn't change the resource
it operates on, so the res parameter can be marked const.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c   | 2 +-
 include/linux/acpi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 7c1c59ea9ec6..02e8464e480f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1118,7 +1118,7 @@ __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
 
 /* Check for resource conflicts between ACPI OperationRegions and native
  * drivers */
-int acpi_check_resource_conflict(struct resource *res)
+int acpi_check_resource_conflict(const struct resource *res)
 {
 	struct acpi_res_list *res_list_elem;
 	int ioport;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..c920d2def4d3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -240,7 +240,7 @@ extern int pnpacpi_disabled;
 #define PXM_INVAL	(-1)
 #define NID_INVAL	(-1)
 
-int acpi_check_resource_conflict(struct resource *res);
+int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
-- 
cgit v1.2.3


From 55464d461bdcffc4422aebfb750eacf99e3c0f27 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@gmail.com>
Date: Wed, 9 Dec 2009 14:20:04 -0800
Subject: IB: Clarify the documentation of ib_post_send()

Clarify the behavior of ib_post_send() when a list of work requests is
passed in and an immediate error is returned.

Signed-off-by: Bart Van Assche <bart.vanassche@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/ib_verbs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c179318edd92..09509edb1c5f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp);
  * @send_wr: A list of work requests to post on the send queue.
  * @bad_send_wr: On an immediate failure, this parameter will reference
  *   the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
  */
 static inline int ib_post_send(struct ib_qp *qp,
 			       struct ib_send_wr *send_wr,
-- 
cgit v1.2.3


From cc9b2e9f6603190c009e5d2629ce8e3f99571346 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 26 Nov 2009 09:50:20 -0600
Subject: [SCSI] enclosure: fix oops while iterating enclosure_status array

Based on patch originally by Jeff Mahoney <jeffm@suse.com>

 enclosure_status is expected to be a NULL terminated array of strings
 but isn't actually NULL terminated. When writing an invalid value to
 /sys/class/enclosure/.../.../status, it goes off the end of the array
 and Oopses.


Fix by making the assumption true and adding NULL at the end.

Reported-by: Artur Wojcik <artur.wojcik@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/misc/enclosure.c  | 1 +
 include/linux/enclosure.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index e9eae4a78402..1eac626e710a 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -391,6 +391,7 @@ static const char *const enclosure_status [] = {
 	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
 	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
 	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
 };
 
 static const char *const enclosure_type [] = {
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 90d1c2184112..9a33c5f7e126 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -42,6 +42,8 @@ enum enclosure_status {
 	ENCLOSURE_STATUS_NOT_INSTALLED,
 	ENCLOSURE_STATUS_UNKNOWN,
 	ENCLOSURE_STATUS_UNAVAILABLE,
+	/* last element for counting purposes */
+	ENCLOSURE_STATUS_MAX
 };
 
 /* SFF-8485 activity light settings */
-- 
cgit v1.2.3


From 5d0961fd1f25e117f907f3af3aaa870637049252 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 1 Dec 2009 17:36:21 +0200
Subject: [SCSI] libosd: Fix blk_put_request locking again

So libosd has decided to sacrifice some code simplicity for the sake of
a clean API. One of these things is the possibility for users to call
osd_end_request, in any condition at any state. This opens up some
problems with calling blk_put_request when out-side of the completion
callback but calling __blk_put_request when detecting a from-completion
state.

The current hack was working just fine until exofs decided to operate on
all devices in parallel and wait for the sum of the requests, before
deallocating all osd-requests at once. There are two new possible cases
1. All request in a group are deallocated as part of the last request's
   async-done, request_queue is locked.
2. All request in a group where executed asynchronously, but
   de-allocation was delayed to after the async-done, in the context of
   another thread. Async execution but request_queue is not locked.

The solution I chose was to separate the deallocation of the osd_request
which has the information users need, from the deallocation of the
internal(2) requests which impose the locking problem. The internal
block-requests are freed unconditionally inside the async-done-callback,
when we know the queue is always locked. If at osd_end_request time we
still have a bock-request, then we know it did not come from within an
async-done-callback and we can call the regular blk_put_request.

The internal requests were used for carrying error information after
execution. This information is now copied to osd_request members for
later analysis by user code.

The external API and behaviour was unchanged, except now it really
supports what was previously advertised.

Reported-by: Vineet Agarwal <checkout.vineet@gmail.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Stable Tree <stable@kernel.org>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/osd/osd_initiator.c | 88 +++++++++++++++++++++-------------------
 include/scsi/osd_initiator.h     |  5 ++-
 2 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 950202a70bcf..24223473f573 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -432,30 +432,23 @@ static void _osd_free_seg(struct osd_request *or __unused,
 	seg->alloc_size = 0;
 }
 
-static void _put_request(struct request *rq , bool is_async)
+static void _put_request(struct request *rq)
 {
-	if (is_async) {
-		WARN_ON(rq->bio);
-		__blk_put_request(rq->q, rq);
-	} else {
-		/*
-		 * If osd_finalize_request() was called but the request was not
-		 * executed through the block layer, then we must release BIOs.
-		 * TODO: Keep error code in or->async_error. Need to audit all
-		 *       code paths.
-		 */
-		if (unlikely(rq->bio))
-			blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
-		else
-			blk_put_request(rq);
-	}
+	/*
+	 * If osd_finalize_request() was called but the request was not
+	 * executed through the block layer, then we must release BIOs.
+	 * TODO: Keep error code in or->async_error. Need to audit all
+	 *       code paths.
+	 */
+	if (unlikely(rq->bio))
+		blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+	else
+		blk_put_request(rq);
 }
 
 void osd_end_request(struct osd_request *or)
 {
 	struct request *rq = or->request;
-	/* IMPORTANT: make sure this agrees with osd_execute_request_async */
-	bool is_async = (or->request->end_io_data == or);
 
 	_osd_free_seg(or, &or->set_attr);
 	_osd_free_seg(or, &or->enc_get_attr);
@@ -463,20 +456,34 @@ void osd_end_request(struct osd_request *or)
 
 	if (rq) {
 		if (rq->next_rq) {
-			_put_request(rq->next_rq, is_async);
+			_put_request(rq->next_rq);
 			rq->next_rq = NULL;
 		}
 
-		_put_request(rq, is_async);
+		_put_request(rq);
 	}
 	_osd_request_free(or);
 }
 EXPORT_SYMBOL(osd_end_request);
 
+static void _set_error_resid(struct osd_request *or, struct request *req,
+			     int error)
+{
+	or->async_error = error;
+	or->req_errors = req->errors ? : error;
+	or->sense_len = req->sense_len;
+	if (or->out.req)
+		or->out.residual = or->out.req->resid_len;
+	if (or->in.req)
+		or->in.residual = or->in.req->resid_len;
+}
+
 int osd_execute_request(struct osd_request *or)
 {
-	return or->async_error =
-			blk_execute_rq(or->request->q, NULL, or->request, 0);
+	int error = blk_execute_rq(or->request->q, NULL, or->request, 0);
+
+	_set_error_resid(or, or->request, error);
+	return error;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
@@ -484,15 +491,17 @@ static void osd_request_async_done(struct request *req, int error)
 {
 	struct osd_request *or = req->end_io_data;
 
-	or->async_error = error;
-
-	if (unlikely(error)) {
-		OSD_DEBUG("osd_request_async_done error recieved %d "
-			  "errors 0x%x\n", error, req->errors);
-		if (!req->errors) /* don't miss out on this one */
-			req->errors = error;
+	_set_error_resid(or, req, error);
+	if (req->next_rq) {
+		__blk_put_request(req->q, req->next_rq);
+		req->next_rq = NULL;
 	}
 
+	__blk_put_request(req->q, req);
+	or->request = NULL;
+	or->in.req = NULL;
+	or->out.req = NULL;
+
 	if (or->async_done)
 		or->async_done(or, or->async_private);
 	else
@@ -1489,21 +1498,18 @@ int osd_req_decode_sense_full(struct osd_request *or,
 #endif
 	int ret;
 
-	if (likely(!or->request->errors)) {
-		osi->out_resid = 0;
-		osi->in_resid = 0;
+	if (likely(!or->req_errors))
 		return 0;
-	}
 
 	osi = osi ? : &local_osi;
 	memset(osi, 0, sizeof(*osi));
 
-	ssdb = or->request->sense;
-	sense_len = or->request->sense_len;
+	ssdb = (typeof(ssdb))or->sense;
+	sense_len = or->sense_len;
 	if ((sense_len < (int)sizeof(*ssdb) || !ssdb->sense_key)) {
 		OSD_ERR("Block-layer returned error(0x%x) but "
 			"sense_len(%u) || key(%d) is empty\n",
-			or->request->errors, sense_len, ssdb->sense_key);
+			or->req_errors, sense_len, ssdb->sense_key);
 		goto analyze;
 	}
 
@@ -1525,7 +1531,7 @@ int osd_req_decode_sense_full(struct osd_request *or,
 			"additional_code=0x%x async_error=%d errors=0x%x\n",
 			osi->key, original_sense_len, sense_len,
 			osi->additional_code, or->async_error,
-			or->request->errors);
+			or->req_errors);
 
 	if (original_sense_len < sense_len)
 		sense_len = original_sense_len;
@@ -1695,10 +1701,10 @@ analyze:
 		ret = -EIO;
 	}
 
-	if (or->out.req)
-		osi->out_resid = or->out.req->resid_len ?: or->out.total_bytes;
-	if (or->in.req)
-		osi->in_resid = or->in.req->resid_len ?: or->in.total_bytes;
+	if (!or->out.residual)
+		or->out.residual = or->out.total_bytes;
+	if (!or->in.residual)
+		or->in.residual = or->in.total_bytes;
 
 	return ret;
 }
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 39d6d1097153..a8f370126632 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -142,6 +142,7 @@ struct osd_request {
 	struct _osd_io_info {
 		struct bio *bio;
 		u64 total_bytes;
+		u64 residual;
 		struct request *req;
 		struct _osd_req_data_segment *last_seg;
 		u8 *pad_buff;
@@ -150,12 +151,14 @@ struct osd_request {
 	gfp_t alloc_flags;
 	unsigned timeout;
 	unsigned retries;
+	unsigned sense_len;
 	u8 sense[OSD_MAX_SENSE_LEN];
 	enum osd_attributes_mode attributes_mode;
 
 	osd_req_done_fn *async_done;
 	void *async_private;
 	int async_error;
+	int req_errors;
 };
 
 static inline bool osd_req_is_ver1(struct osd_request *or)
@@ -297,8 +300,6 @@ enum osd_err_priority {
 };
 
 struct osd_sense_info {
-	u64 out_resid;		/* Zero on success otherwise out residual */
-	u64 in_resid;		/* Zero on success otherwise in residual */
 	enum osd_err_priority osd_err_pri;
 
 	int key;		/* one of enum scsi_sense_keys */
-- 
cgit v1.2.3


From a88f6667078412e5eff37ead68a043ee0ec9f1da Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 10 Dec 2009 18:35:15 +0100
Subject: dmaengine: clarify the meaning of the DMA_CTRL_ACK flag

DMA_CTRL_ACK's description applies to its clear state, not to its set state.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2b9f2ac7ed60..78784982b33e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -74,7 +74,7 @@ enum dma_transaction_type {
  *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
  *  this transaction
- * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
+ * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
-- 
cgit v1.2.3


From 03889384cee7a198a79447c1ea6aca2c8e54d155 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Dec 2009 09:48:22 -0500
Subject: tracing: Add trace_dump_stack()

I've been asked a few times about how to find out what is calling
some location in the kernel. One way is to use dynamic function tracing
and implement the func_stack_trace. But this only finds out who is
calling a particular function. It does not tell you who is calling
that function and entering a specific if conditional.

I have myself implemented a quick version of trace_dump_stack() for
this purpose a few times, and just needed it now. This is when I realized
that this would be a good tool to have in the kernel like trace_printk().

Using trace_dump_stack() is similar to dump_stack() except that it
writes to the trace buffer instead and can be used in critical locations.

For example:

@@ -5485,8 +5485,12 @@ need_resched_nonpreemptible:
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
-		else
+		else {
 			deactivate_task(rq, prev, 1);
+			trace_printk("Deactivating task %s:%d\n",
+				     prev->comm, prev->pid);
+			trace_dump_stack();
+		}
 		switch_count = &prev->nvcsw;
 	}

Produces:

           <...>-3249  [001]   296.105269: schedule: Deactivating task ntpd:3249
           <...>-3249  [001]   296.105270: <stack trace>
 => schedule
 => schedule_hrtimeout_range
 => poll_schedule_timeout
 => do_select
 => core_sys_select
 => sys_select
 => system_call_fastpath

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h |  2 ++
 kernel/trace/trace.c   | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fa4c590cf12..5ad4199fb073 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -492,6 +492,8 @@ extern int
 __trace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 
+extern void trace_dump_stack(void);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..f531301b7a3b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1151,6 +1151,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+	unsigned long flags;
+
+	if (tracing_disabled || tracing_selftest_running)
+		return 0;
+
+	local_save_flags(flags);
+
+	/* skipping 3 traces, seems to get us at the caller of this function */
+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
-- 
cgit v1.2.3


From 967c9ef9b8c3bdec1bd3a380edac19e0b9fbeadc Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 11 Dec 2009 22:00:57 -0800
Subject: Input: i8042 - allow installing platform filters for incoming data

Some hardware (such as Dell laptops) signal a variety of events through
the i8042 controller, even if these don't map to keyboard events. Add
support for drivers to filter the i8042 event stream in order to respond
to these events and (if appropriate) block them from entering the input
stream.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042.c | 58 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/i8042.h       | 18 +++++++++++++-
 2 files changed, 72 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 634da68f7f35..d84a36e545f6 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -126,6 +126,8 @@ static unsigned char i8042_suppress_kbd_ack;
 static struct platform_device *i8042_platform_device;
 
 static irqreturn_t i8042_interrupt(int irq, void *dev_id);
+static bool (*i8042_platform_filter)(unsigned char data, unsigned char str,
+				     struct serio *serio);
 
 void i8042_lock_chip(void)
 {
@@ -139,6 +141,48 @@ void i8042_unlock_chip(void)
 }
 EXPORT_SYMBOL(i8042_unlock_chip);
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	i8042_platform_filter = filter;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_install_filter);
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *port))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter != filter) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	i8042_platform_filter = NULL;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_remove_filter);
+
 /*
  * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
  * be ready for reading values from it / writing values to it.
@@ -373,7 +417,8 @@ static void i8042_stop(struct serio *serio)
  * It is called from i8042_interrupt and thus is running with interrupts
  * off and i8042_lock held.
  */
-static bool i8042_filter(unsigned char data, unsigned char str)
+static bool i8042_filter(unsigned char data, unsigned char str,
+			 struct serio *serio)
 {
 	if (unlikely(i8042_suppress_kbd_ack)) {
 		if ((~str & I8042_STR_AUXDATA) &&
@@ -384,6 +429,11 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 		}
 	}
 
+	if (i8042_platform_filter && i8042_platform_filter(data, str, serio)) {
+		dbg("Filtered out by platfrom filter\n");
+		return true;
+	}
+
 	return false;
 }
 
@@ -396,6 +446,7 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 {
 	struct i8042_port *port;
+	struct serio *serio;
 	unsigned long flags;
 	unsigned char str, data;
 	unsigned int dfl;
@@ -462,18 +513,19 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 	}
 
 	port = &i8042_ports[port_no];
+	serio = port->exists ? port->serio : NULL;
 
 	dbg("%02x <- i8042 (interrupt, %d, %d%s%s)",
 	    data, port_no, irq,
 	    dfl & SERIO_PARITY ? ", bad parity" : "",
 	    dfl & SERIO_TIMEOUT ? ", timeout" : "");
 
-	filtered = i8042_filter(data, str);
+	filtered = i8042_filter(data, str, serio);
 
 	spin_unlock_irqrestore(&i8042_lock, flags);
 
 	if (likely(port->exists && !filtered))
-		serio_interrupt(port->serio, data, dfl);
+		serio_interrupt(serio, data, dfl);
 
  out:
 	return IRQ_RETVAL(ret);
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 60c3360ef6ad..9bf6870ee5f4 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -39,6 +39,10 @@ void i8042_lock_chip(void);
 void i8042_unlock_chip(void);
 int i8042_command(unsigned char *param, int command);
 bool i8042_check_port_owner(const struct serio *);
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio));
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio));
 
 #else
 
@@ -52,7 +56,7 @@ void i8042_unlock_chip(void)
 
 int i8042_command(unsigned char *param, int command)
 {
-	return -ENOSYS;
+	return -ENODEV;
 }
 
 bool i8042_check_port_owner(const struct serio *serio)
@@ -60,6 +64,18 @@ bool i8042_check_port_owner(const struct serio *serio)
 	return false;
 }
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	return -ENODEV;
+}
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio))
+{
+	return -ENODEV;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 01fc0ac198eabcbf460e1ed058860a935b6c2c9a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 19 Apr 2009 21:57:19 +0200
Subject: kbuild: move bounds.h to include/generated

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                 | 1 -
 Kbuild                     | 2 +-
 Makefile                   | 2 +-
 include/linux/mmzone.h     | 2 +-
 include/linux/page-flags.h | 2 +-
 kernel/bounds.c            | 2 +-
 6 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/.gitignore b/.gitignore
index 946c7ec5c922..36d9cd6d4281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,7 +52,6 @@ include/linux/autoconf.h
 include/linux/compile.h
 include/linux/version.h
 include/linux/utsrelease.h
-include/linux/bounds.h
 include/generated
 
 # stgit generated dirs
diff --git a/Kbuild b/Kbuild
index f056b4feee51..1165d7a5ca4a 100644
--- a/Kbuild
+++ b/Kbuild
@@ -8,7 +8,7 @@
 #####
 # 1) Generate bounds.h
 
-bounds-file := include/linux/bounds.h
+bounds-file := include/generated/bounds.h
 
 always  := $(bounds-file)
 targets := $(bounds-file) kernel/bounds.s
diff --git a/Makefile b/Makefile
index 07711786dc95..b58e9312ce30 100644
--- a/Makefile
+++ b/Makefile
@@ -1197,7 +1197,7 @@ MRPROPER_DIRS  += include/config include2 usr/include include/generated
 MRPROPER_FILES += .config .config.old include/asm .version .old_version \
                   include/linux/autoconf.h include/linux/version.h      \
                   include/linux/utsrelease.h                            \
-                  include/linux/bounds.h include/asm*/asm-offsets.h     \
+                  include/asm*/asm-offsets.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f7561730d88..30fe668c2542 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,7 @@
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #include <asm/atomic.h>
 #include <asm/page.h>
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b173955..ef36725aa515 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #ifndef __GENERATING_BOUNDS_H
 #include <linux/mm_types.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #endif /* !__GENERATING_BOUNDS_H */
 
 /*
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
 
 void foo(void)
 {
-	/* The enum constants to put into include/linux/bounds.h */
+	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 	/* End of constants */
-- 
cgit v1.2.3


From 98b8788ae91694499d1995035625bea16a4db0c4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:39:40 +0200
Subject: drop explicit include of autoconf.h

kbuild.h forces include of autoconf.h on the
commandline using -include - so we do not need to
include the file explicit.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/cris/arch-v32/kernel/head.S                | 1 -
 arch/cris/kernel/asm-offsets.c                  | 1 -
 arch/cris/kernel/vmlinux.lds.S                  | 1 -
 arch/ia64/kvm/asm-offsets.c                     | 1 -
 drivers/accessibility/braille/braille_console.c | 1 -
 drivers/hid/hid-lg.h                            | 2 --
 drivers/platform/x86/compal-laptop.c            | 1 -
 drivers/staging/iio/ring_sw.h                   | 1 -
 include/linux/mmdebug.h                         | 2 --
 9 files changed, 11 deletions(-)

(limited to 'include')

diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
index 3db478eb5155..76266f80a5f1 100644
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -10,7 +10,6 @@
  * The macros found in mmu_defs_asm.h uses the ## concatenation operator, so
  * -traditional must not be used when assembling this file.
  */
-#include <linux/autoconf.h>
 #include <arch/memmap.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/intr_vect.h>
diff --git a/arch/cris/kernel/asm-offsets.c b/arch/cris/kernel/asm-offsets.c
index ddd6fbbe75de..dd7b8e983221 100644
--- a/arch/cris/kernel/asm-offsets.c
+++ b/arch/cris/kernel/asm-offsets.c
@@ -1,6 +1,5 @@
 #include <linux/sched.h>
 #include <asm/thread_info.h>
-#include <linux/autoconf.h>
 
 /*
  * Generate definitions needed by assembly language modules.
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index bbfda67d2907..d49d17d2a14f 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -8,7 +8,6 @@
  * the kernel has booted.
  */
 
-#include <linux/autoconf.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 0c3564a7a033..9324c875caf5 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -22,7 +22,6 @@
  *
  */
 
-#include <linux/autoconf.h>
 #include <linux/kvm_host.h>
 #include <linux/kbuild.h>
 
diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index d672cfe7ca59..cb423f5aef24 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -21,7 +21,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/autoconf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h
index 27ae750ca878..bf31592eaf79 100644
--- a/drivers/hid/hid-lg.h
+++ b/drivers/hid/hid-lg.h
@@ -1,8 +1,6 @@
 #ifndef __HID_LG_H
 #define __HID_LG_H
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_LOGITECH_FF
 int lgff_init(struct hid_device *hdev);
 #else
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 11003bba10d3..1a387e79f719 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -51,7 +51,6 @@
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/platform_device.h>
-#include <linux/autoconf.h>
 
 #define COMPAL_DRIVER_VERSION "0.2.6"
 
diff --git a/drivers/staging/iio/ring_sw.h b/drivers/staging/iio/ring_sw.h
index f0b86f02cd80..fd677f008365 100644
--- a/drivers/staging/iio/ring_sw.h
+++ b/drivers/staging/iio/ring_sw.h
@@ -29,7 +29,6 @@
  * driver requests - some may support multiple options */
 
 
-#include <linux/autoconf.h>
 #include "iio.h"
 #include "ring_generic.h"
 
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 8a5509877192..ee24ef8ab616 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,8 +1,6 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #else
-- 
cgit v1.2.3


From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:52:28 +0200
Subject: kbuild: move utsrelease.h to include/generated

Fix up all users of utsrelease.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                                   | 1 -
 Makefile                                     | 5 ++---
 arch/alpha/boot/bootp.c                      | 2 +-
 arch/alpha/boot/bootpz.c                     | 2 +-
 arch/alpha/boot/main.c                       | 2 +-
 arch/frv/kernel/setup.c                      | 2 +-
 arch/powerpc/platforms/52xx/efika.c          | 2 +-
 arch/powerpc/platforms/amigaone/setup.c      | 2 +-
 arch/powerpc/platforms/chrp/setup.c          | 2 +-
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 arch/x86/boot/header.S                       | 2 +-
 arch/x86/boot/version.c                      | 2 +-
 drivers/staging/panel/panel.c                | 2 +-
 include/linux/vermagic.h                     | 2 +-
 init/version.c                               | 2 +-
 kernel/kexec.c                               | 2 +-
 kernel/trace/trace.c                         | 2 +-
 17 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/.gitignore b/.gitignore
index c6c19ea6ea96..002d5304968b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,6 @@ Module.symvers
 #
 include/config
 include/linux/version.h
-include/linux/utsrelease.h
 include/generated
 
 # stgit generated dirs
diff --git a/Makefile b/Makefile
index 3bdd932e3d88..860224d7cbcf 100644
--- a/Makefile
+++ b/Makefile
@@ -968,7 +968,7 @@ endif
 # prepare2 creates a makefile if using a separate output directory
 prepare2: prepare3 outputmakefile
 
-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \
                    include/config/auto.conf
 	$(cmd_crmodverdir)
 
@@ -1005,7 +1005,7 @@ endef
 include/linux/version.h: $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
 
-include/linux/utsrelease.h: include/config/kernel.release FORCE
+include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
 
 PHONY += headerdep
@@ -1151,7 +1151,6 @@ CLEAN_FILES +=	vmlinux System.map \
 MRPROPER_DIRS  += include/config usr/include include/generated
 MRPROPER_FILES += .config .config.old .version .old_version             \
                   include/linux/version.h                               \
-                  include/linux/utsrelease.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index 3af21c789339..3c8d1b25c661 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -9,7 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index 1036b515e20c..ade3f129dc27 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -11,7 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 89f3be071ae5..644b7db55438 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -7,7 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 55e4fab7c0bc..75cf7f4b2fa8 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index bcc69e1f77c1..45c0cb9b67e6 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 9290a7a442d0..fb4eb0df054c 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <asm/machdep.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cd4ad9aea760..0a6f5ab8aab3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -23,7 +23,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/adb.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cf660916ae0b..9dd789a7370d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/page.h>
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
 #include <asm/page_types.h>
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 4d88763e39cb..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,7 +13,7 @@
  */
 
 #include "boot.h"
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <generated/compile.h>
 
 const char kernel_version[] =
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 4ce399b6d237..f98a52448eae 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -55,7 +55,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <linux/io.h>
 #include <asm/uaccess.h>
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 79b9837d9ca0..cf97b5b9d1fe 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,4 @@
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/module.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/init/version.c b/init/version.c
index 82328aaca1ef..adff586401a5 100644
--- a/init/version.c
+++ b/init/version.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/version.h>
 
 #ifndef CONFIG_KALLSYMS
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..83f54e2a6eed 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
 #include <linux/suspend.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..bfb1b64bfa9d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
-- 
cgit v1.2.3


From 7a77080dbec28ab2bceb422398601dcc53c142ad Mon Sep 17 00:00:00 2001
From: Jie Zhang <jie.zhang@analog.com>
Date: Thu, 12 Nov 2009 17:59:56 +0800
Subject: net: add net_tstamp.h to headers_install

include/linux/net_tstamp.h is userspace API for hardware time stamping
of network packets. It should be exported to userspace.

Signed-off-by: Jie Zhang <jie.zhang@analog.com>
Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Patrick Ohly <patrick.ohly@gmx.de>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index f72914db2a11..756f831cbdd5 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,6 +118,7 @@ header-y += mtio.h
 header-y += ncp_no.h
 header-y += neighbour.h
 header-y += net_dropmon.h
+header-y += net_tstamp.h
 header-y += netfilter_arp.h
 header-y += netrom.h
 header-y += nfs2.h
-- 
cgit v1.2.3


From c5df7f775148723de39274537a886e9502eef336 Mon Sep 17 00:00:00 2001
From: Albert Herranz <albert_herranz@yahoo.es>
Date: Sat, 12 Dec 2009 06:31:54 +0000
Subject: powerpc: allow ioremap within reserved memory regions

Add a flag to let a platform ioremap memory regions marked as reserved.

This flag will be used later by the Nintendo Wii support code to allow
ioremapping the I/O region sitting between MEM1 and MEM2 and marked
as reserved RAM in the patch "wii: use both mem1 and mem2 as ram".

This will no longer be needed when proper discontig memory support
for 32-bit PowerPC is added to the kernel.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/mm/init_32.c    | 5 +++++
 arch/powerpc/mm/mmu_decl.h   | 1 +
 arch/powerpc/mm/pgtable_32.c | 4 +++-
 include/linux/lmb.h          | 1 +
 lib/lmb.c                    | 7 ++++++-
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 703c7c2e0d9f..4ec900af332f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -82,6 +82,11 @@ extern struct task_struct *current_set[NR_CPUS];
 int __map_without_bats;
 int __map_without_ltlbs;
 
+/*
+ * This tells the system to allow ioremapping memory marked as reserved.
+ */
+int __allow_ioremap_reserved;
+
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9aa39fe74f8a..34dacc32250d 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -115,6 +115,7 @@ extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 extern void invalidate_tlbcam_entry(int index);
 
 extern int __map_without_bats;
+extern int __allow_ioremap_reserved;
 extern unsigned long ioremap_base;
 extern unsigned int rtas_data, rtas_size;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index b55bbe87acb8..177e4038b43c 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/lmb.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -191,7 +192,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory))) {
+	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
+	    !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) {
 		printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 2442e3f3d033..ef82b8fcbddb 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -54,6 +54,7 @@ extern u64 __init lmb_phys_mem_size(void);
 extern u64 lmb_end_of_DRAM(void);
 extern void __init lmb_enforce_memory_limit(u64 memory_limit);
 extern int __init lmb_is_reserved(u64 addr);
+extern int lmb_is_region_reserved(u64 base, u64 size);
 extern int lmb_find(struct lmb_property *res);
 
 extern void lmb_dump_all(void);
diff --git a/lib/lmb.c b/lib/lmb.c
index 0343c05609f0..9cee17142b2c 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -263,7 +263,7 @@ long __init lmb_reserve(u64 base, u64 size)
 	return lmb_add_region(_rgn, base, size);
 }
 
-long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
+long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
 {
 	unsigned long i;
 
@@ -493,6 +493,11 @@ int __init lmb_is_reserved(u64 addr)
 	return 0;
 }
 
+int lmb_is_region_reserved(u64 base, u64 size)
+{
+	return lmb_overlaps_region(&lmb.reserved, base, size);
+}
+
 /*
  * Given a <base, len>, find which memory regions belong to this range.
  * Adjust the request and return a contiguous chunk.
-- 
cgit v1.2.3


From 4819568f23a8bef0ca99b740ca60fe2450ab0aac Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 12 Dec 2009 13:06:13 -0800
Subject: ftrace.h: Use common pr_info fmt string

Reduces fmt string space a bit.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1260651974.2637.4.camel@Joe-Laptop.home>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index d1b3de9c1a71..c4eca380204e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -733,7 +733,7 @@ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 	ret = register_trace_##call(ftrace_raw_event_##call);		\
 	if (ret)							\
 		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
+			"probe to %s\n", #call);			\
 	return ret;							\
 }									\
 									\
-- 
cgit v1.2.3


From 87d9b4e1c52867a45331a9a5495f6448e0c68b23 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:20 +0800
Subject: tracing: Extract duplicate ftrace_raw_init_event_foo()

Use a generic trace_event_raw_init() function for all event's raw_init
callbacks (but kprobes) instead of defining the same version for each
of these.
This shrinks the kernel code:

   text    data     bss     dec     hex filename
5355293 1961928 7103260 14420481         dc0a01 vmlinux.o.old
5346802 1961864 7103260 14411926         dbe896 vmlinux.o

raw_init can't be removed, because ftrace events and kprobe events
use different raw_init callbacks. Though it's possible to totally
remove raw_init, I choose to leave it as it is for now.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <4B1DC48C.7080603@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h |  1 +
 include/linux/syscalls.h     |  4 ++--
 include/trace/ftrace.h       | 35 ++++-------------------------------
 kernel/trace/trace_events.c  | 14 ++++++++++++++
 4 files changed, 21 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 38f8d6553831..ea44b8911094 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -158,6 +158,7 @@ enum {
 	FILTER_PTR_STRING,
 };
 
+extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bc70c5810fec..94ac28437bef 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -145,7 +145,7 @@ struct perf_event_attr;
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -167,7 +167,7 @@ struct perf_event_attr;
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c4eca380204e..6055b0604c86 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -623,23 +623,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
  * static struct ftrace_event_call __used
  * __attribute__((__aligned__(4)))
  * __attribute__((section("_ftrace_events"))) event_<call> = {
  *	.name			= "<call>",
  *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.raw_init		= trace_event_raw_init,
  *	.regfunc		= ftrace_reg_event_<call>,
  *	.unregfunc		= ftrace_unreg_event_<call>,
  *	.show_format		= ftrace_format_<call>,
@@ -647,9 +636,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
@@ -744,19 +730,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 									\
 static struct trace_event ftrace_event_type_##call = {			\
 	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}
+};
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
@@ -776,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##template,		\
@@ -793,7 +767,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##call,			\
@@ -953,7 +927,6 @@ end:									\
 	perf_swevent_put_recursion_context(rctx);			\
 end_recursion:								\
 	local_irq_restore(irq_flags);					\
-									\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..8ed66e0d476b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -105,6 +105,20 @@ void trace_destroy_fields(struct ftrace_event_call *call)
 	}
 }
 
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-- 
cgit v1.2.3


From 614a71a26ba3d97e9fa85649db69a682b78e407d Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:36 +0800
Subject: tracing: Pull up calls to trace_define_common_fields()

Call trace_define_common_fields() in event_create_dir() only.
This avoids trace events to handle it from their define_fields
callbacks and shrinks the kernel code size:

   text    data     bss     dec     hex filename
5346802 1961864 7103260 14411926         dbe896 vmlinux.o.old
5345151 1961864 7103260 14410275         dbe223 vmlinux.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <4B1DC49C.8000107@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h  | 1 -
 include/trace/ftrace.h        | 4 ----
 kernel/trace/trace_events.c   | 7 ++++---
 kernel/trace/trace_export.c   | 4 ----
 kernel/trace/trace_kprobe.c   | 8 --------
 kernel/trace/trace_syscalls.c | 8 --------
 6 files changed, 4 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ea44b8911094..db97c64ce0e9 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -159,7 +159,6 @@ enum {
 };
 
 extern int trace_event_raw_init(struct ftrace_event_call *call);
-extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 6055b0604c86..2af2f7a2c1bd 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct ftrace_raw_##call field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8ed66e0d476b..97b0b3aa166d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	if (ret)							\
 		return ret;
 
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
 {
 	int ret;
 	struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
@@ -927,7 +926,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		 		  id);
 
 	if (call->define_fields) {
-		ret = call->define_fields(call);
+		ret = trace_define_common_fields(call);
+		if (!ret)
+			ret = call->define_fields(call);
 		if (ret < 0) {
 			pr_warning("Could not initialize trace point"
 				   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 	struct struct_name field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..e3c80e925896 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1113,10 +1113,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
 	/* Set argument names as fields */
@@ -1131,10 +1127,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kretprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..b957edd0ca3b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From 3b8e4273814a7f9e9a74ece517d9206fea919aaa Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:52 +0800
Subject: tracing: Move a printk out of ftrace_raw_reg_event_foo()

Move the printk from each ftrace_raw_reg_event_foo() to
its caller ftrace_event_enable_disable(). This avoids each
regfunc trace event callbacks to handle a same error report
that can be carried from the caller.

See how much space this saves:

   text    data     bss     dec     hex filename
5345151 1961864 7103260 14410275         dbe223 vmlinux.o.old
5331487 1961864 7103260 14396611         dbacc3 vmlinux.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <4B1DC4AC.802@cn.fujitsu.com>
[start cmdline record before calling regfunc to avoid lost
window of pid to comm resolution]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/ftrace.h        | 16 ++--------------
 kernel/trace/trace_events.c   | 20 +++++++++++++++-----
 kernel/trace/trace_syscalls.c | 10 ++--------
 3 files changed, 19 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 2af2f7a2c1bd..0c21af85211c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -555,13 +555,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
  * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
+ *	return register_trace_<call>(ftrace_event_<call>);
  * }
  *
  * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
@@ -710,13 +704,7 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 {									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to %s\n", #call);			\
-	return ret;							\
+	return register_trace_##call(ftrace_raw_event_##call);		\
 }									\
 									\
 static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 97b0b3aa166d..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -118,9 +118,11 @@ int trace_event_raw_init(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
-static void ftrace_event_enable_disable(struct ftrace_event_call *call,
+static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
+	int ret = 0;
+
 	switch (enable) {
 	case 0:
 		if (call->enabled) {
@@ -131,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 		break;
 	case 1:
 		if (!call->enabled) {
-			call->enabled = 1;
 			tracing_start_cmdline_record();
-			call->regfunc(call);
+			ret = call->regfunc(call);
+			if (ret) {
+				tracing_stop_cmdline_record();
+				pr_info("event trace: Could not enable event "
+					"%s\n", call->name);
+				break;
+			}
+			call->enabled = 1;
 		}
 		break;
 	}
+
+	return ret;
 }
 
 static void ftrace_clear_events(void)
@@ -415,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	case 0:
 	case 1:
 		mutex_lock(&event_mutex);
-		ftrace_event_enable_disable(call, val);
+		ret = ftrace_event_enable_disable(call, val);
 		mutex_unlock(&event_mutex);
 		break;
 
@@ -425,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	*ppos += cnt;
 
-	return cnt;
+	return ret ? ret : cnt;
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index b957edd0ca3b..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -325,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_enter)
 		ret = register_trace_sys_enter(ftrace_syscall_enter);
-	if (ret) {
-		pr_info("event trace: Could not activate"
-				"syscall entry trace point");
-	} else {
+	if (!ret) {
 		set_bit(num, enabled_enter_syscalls);
 		sys_refcount_enter++;
 	}
@@ -362,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_exit)
 		ret = register_trace_sys_exit(ftrace_syscall_exit);
-	if (ret) {
-		pr_info("event trace: Could not activate"
-				"syscall exit trace point");
-	} else {
+	if (!ret) {
 		set_bit(num, enabled_exit_syscalls);
 		sys_refcount_exit++;
 	}
-- 
cgit v1.2.3


From e00bf2ec60605eb95687b7a0c3b83c87c48541dc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:17:29 +0800
Subject: tracing: Change event->profile_count to be int type

Like total_profile_count, struct ftrace_event_call::profile_count
is protected by event_mutex, so it doesn't need to be atomic_t.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B1DC549.5010705@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h       | 2 +-
 include/linux/syscalls.h           | 2 --
 include/trace/ftrace.h             | 1 -
 kernel/trace/trace_event_profile.c | 6 +++---
 kernel/trace/trace_kprobe.c        | 1 -
 5 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index db97c64ce0e9..2233c98d80df 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,7 +131,7 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	atomic_t		profile_count;
+	int			profile_count;
 	int			(*profile_enable)(struct ftrace_event_call *);
 	void			(*profile_disable)(struct ftrace_event_call *);
 };
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 94ac28437bef..72d69860d901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -102,12 +102,10 @@ struct perf_event_attr;
 #ifdef CONFIG_EVENT_PROFILE
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysenter_enable,				       \
 	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysexit_enable,				       \
 	.profile_disable = prof_sysexit_disable,
 #else
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0c21af85211c..73523151a731 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -629,7 +629,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
 	.profile_enable = ftrace_profile_enable_##call,			\
 	.profile_disable = ftrace_profile_disable_##call,
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	char *buf;
 	int ret = -ENOMEM;
 
-	if (atomic_inc_return(&event->profile_count))
+	if (event->profile_count++ > 0)
 		return 0;
 
 	if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
 		perf_trace_buf = NULL;
 	}
 fail_buf:
-	atomic_dec(&event->profile_count);
+	event->profile_count--;
 
 	return ret;
 }
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
 	char *buf, *nmi_buf;
 
-	if (!atomic_add_negative(-1, &event->profile_count))
+	if (--event->profile_count > 0)
 		return;
 
 	event->profile_disable(event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e3c80e925896..6ed223447a3f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1426,7 +1426,6 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_EVENT_PROFILE
-	atomic_set(&call->profile_count, -1);
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
-- 
cgit v1.2.3


From c7cd606f60e7679c7f9eee7010f02a6f000209c1 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Sat, 12 Dec 2009 04:13:21 +0000
Subject: can: Fix data length code handling in rx path

A valid CAN dataframe can have a data length code (DLC) of 0 .. 8 data bytes.

When reading the CAN controllers register the 4-bit value may contain values
from 0 .. 15 which may exceed the reserved space in the socket buffer!

The ISO 11898-1 Chapter 8.4.2.3 (DLC field) says that register values > 8
should be reduced to 8 without any error reporting or frame drop.

This patch introduces a new helper macro to cast a given 4-bit data length
code (dlc) to __u8 and ensure the DLC value to be max. 8 bytes.

The different handlings in the rx path of the CAN netdevice drivers are fixed.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  2 +-
 drivers/net/can/bfin_can.c        |  2 +-
 drivers/net/can/mcp251x.c         | 13 +++----------
 drivers/net/can/mscan/mscan.c     |  3 ++-
 drivers/net/can/sja1000/sja1000.c | 18 ++++++++----------
 drivers/net/can/ti_hecc.c         |  2 +-
 drivers/net/can/usb/ems_usb.c     |  2 +-
 include/linux/can/dev.h           |  9 +++++++++
 8 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index cbe3fce53e3b..d0ec17878ffc 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -474,7 +474,7 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb,
 	reg_msr = at91_read(priv, AT91_MSR(mb));
 	if (reg_msr & AT91_MSR_MRTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = min_t(__u8, (reg_msr >> 16) & 0xf, 8);
+	cf->can_dlc = get_can_dlc((reg_msr >> 16) & 0xf);
 
 	*(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
 	*(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb));
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index c7fc1de28173..0ec1524523cc 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -392,7 +392,7 @@ static void bfin_can_rx(struct net_device *dev, u16 isrc)
 		cf->can_id |= CAN_RTR_FLAG;
 
 	/* get data length code */
-	cf->can_dlc = bfin_read16(&reg->chl[obj].dlc);
+	cf->can_dlc = get_can_dlc(bfin_read16(&reg->chl[obj].dlc) & 0xF);
 
 	/* get payload */
 	for (i = 0; i < 8; i += 2) {
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 78b1b69b2921..9c5a1537939c 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -403,9 +403,8 @@ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
 
 		for (i = 1; i < RXBDAT_OFF; i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
-		len = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-		if (len > 8)
-			len = 8;
+
+		len = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 		for (; i < (RXBDAT_OFF + len); i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
 	} else {
@@ -455,13 +454,7 @@ static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx)
 			(buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT);
 	}
 	/* Data length */
-	frame->can_dlc = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-	if (frame->can_dlc > 8) {
-		dev_warn(&spi->dev, "invalid frame recevied\n");
-		priv->net->stats.rx_errors++;
-		dev_kfree_skb(skb);
-		return;
-	}
+	frame->can_dlc = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 	memcpy(frame->data, buf + RXBDAT_OFF, frame->can_dlc);
 
 	priv->net->stats.rx_packets++;
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index bb06dfb58f25..07346f880ca6 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -297,7 +297,8 @@ static void mscan_get_rx_frame(struct net_device *dev, struct can_frame *frame)
 	frame->can_id |= can_id >> 1;
 	if (can_id & 1)
 		frame->can_id |= CAN_RTR_FLAG;
-	frame->can_dlc = in_8(&regs->rx.dlr) & 0xf;
+
+	frame->can_dlc = get_can_dlc(in_8(&regs->rx.dlr) & 0xf);
 
 	if (!(frame->can_id & CAN_RTR_FLAG)) {
 		void __iomem *data = &regs->rx.dsr1_0;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index b4ba88a31075..542a4f7255b4 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -293,15 +293,14 @@ static void sja1000_rx(struct net_device *dev)
 	uint8_t fi;
 	uint8_t dreg;
 	canid_t id;
-	uint8_t dlc;
 	int i;
 
+	/* create zero'ed CAN frame buffer */
 	skb = alloc_can_skb(dev, &cf);
 	if (skb == NULL)
 		return;
 
 	fi = priv->read_reg(priv, REG_FI);
-	dlc = fi & 0x0F;
 
 	if (fi & FI_FF) {
 		/* extended frame format (EFF) */
@@ -318,16 +317,15 @@ static void sja1000_rx(struct net_device *dev)
 		    | (priv->read_reg(priv, REG_ID2) >> 5);
 	}
 
-	if (fi & FI_RTR)
+	if (fi & FI_RTR) {
 		id |= CAN_RTR_FLAG;
+	} else {
+		cf->can_dlc = get_can_dlc(fi & 0x0F);
+		for (i = 0; i < cf->can_dlc; i++)
+			cf->data[i] = priv->read_reg(priv, dreg++);
+	}
 
 	cf->can_id = id;
-	cf->can_dlc = dlc;
-	for (i = 0; i < dlc; i++)
-		cf->data[i] = priv->read_reg(priv, dreg++);
-
-	while (i < 8)
-		cf->data[i++] = 0;
 
 	/* release receive buffer */
 	priv->write_reg(priv, REG_CMR, CMD_RRB);
@@ -335,7 +333,7 @@ static void sja1000_rx(struct net_device *dev)
 	netif_rx(skb);
 
 	stats->rx_packets++;
-	stats->rx_bytes += dlc;
+	stats->rx_bytes += cf->can_dlc;
 }
 
 static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 07e8016b17ec..5c993c2da528 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -552,7 +552,7 @@ static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMCF);
 	if (data & HECC_CANMCF_RTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = data & 0xF;
+	cf->can_dlc = get_can_dlc(data & 0xF);
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMDL);
 	*(u32 *)(cf->data) = cpu_to_be32(data);
 	if (cf->can_dlc > 4) {
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 591eb0eb1c2b..efbb05c71bf4 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -316,7 +316,7 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
 		return;
 
 	cf->can_id = le32_to_cpu(msg->msg.can_msg.id);
-	cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+	cf->can_dlc = get_can_dlc(msg->msg.can_msg.length & 0xF);
 
 	if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME ||
 	    msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 1ed2a5cc03f5..3db7767d2a17 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -51,6 +51,15 @@ struct can_priv {
 	struct sk_buff **echo_skb;
 };
 
+/*
+ * get_can_dlc(value) - helper macro to cast a given data length code (dlc)
+ * to __u8 and ensure the dlc value to be max. 8 bytes.
+ *
+ * To be used in the CAN netdriver receive path to ensure conformance with
+ * ISO 11898-1 Chapter 8.4.2.3 (DLC field)
+ */
+#define get_can_dlc(i)	(min_t(__u8, (i), 8))
+
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From d24deb2580823ab0b8425790c6f5d18e2ff749d8 Mon Sep 17 00:00:00 2001
From: Gertjan van Wingerde <gwingerde@gmail.com>
Date: Fri, 4 Dec 2009 23:46:54 +0100
Subject: mac80211: Add define for TX headroom reserved by mac80211 itself.

Add a definition of the amount of TX headroom reserved by mac80211 itself
for its own purposes. Also add BUILD_BUG_ON to validate the value.
This define can then be used by drivers to request additional TX headroom
in the most efficient manner.

Signed-off-by: Gertjan van Wingerde <gwingerde@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 6 ++++++
 net/mac80211/main.c    | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2aff4906b2ae..538d6b761887 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1737,6 +1737,12 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
 	local_bh_enable();
 }
 
+/*
+ * The TX headroom reserved by mac80211 for its own tx_status functions.
+ * This is enough for the radiotap header.
+ */
+#define IEEE80211_TX_STATUS_HEADROOM	13
+
 /**
  * ieee80211_tx_status - transmit status callback
  *
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8116d1a96a4a..0d2d94881f1f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -515,6 +515,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	 * and we need some headroom for passing the frame to monitor
 	 * interfaces, but never both at the same time.
 	 */
+	BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM !=
+			sizeof(struct ieee80211_tx_status_rtap_hdr));
 	local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
 				   sizeof(struct ieee80211_tx_status_rtap_hdr));
 
-- 
cgit v1.2.3


From be1cb8689c480228ffd2e4bfccc0dab7156cd9ea Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Mon, 14 Dec 2009 22:07:45 +0000
Subject: drm/ttm: Add more driver type enums

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_object.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index 703ca4db0a29..0d9db099978b 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -77,7 +77,11 @@ enum ttm_object_type {
 	ttm_buffer_type,
 	ttm_lock_type,
 	ttm_driver_type0 = 256,
-	ttm_driver_type1
+	ttm_driver_type1,
+	ttm_driver_type2,
+	ttm_driver_type3,
+	ttm_driver_type4,
+	ttm_driver_type5
 };
 
 struct ttm_object_file;
-- 
cgit v1.2.3


From fb1d9738ca053ea8afa5e86af6463155f983b01c Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Thu, 10 Dec 2009 00:19:58 +0000
Subject: drm/vmwgfx: Add DRM driver for VMware Virtual GPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the vmwgfx driver for the VWware Virtual GPU aka SVGA.
The driver is under staging the same as Nouveau and Radeon KMS. Hopefully
the 2D ioctls are bug free and don't need changing, so that part of the
API should be stable. But there there is a pretty big chance that the 3D API
will change in the future.

Signed-off-by: Thomas Hellström <thellstrom@vmware.com>
Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile                 |    1 +
 drivers/gpu/drm/vmwgfx/Kconfig           |   13 +
 drivers/gpu/drm/vmwgfx/Makefile          |    9 +
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c   |  229 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c      |  735 ++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  511 +++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  |  516 +++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c       |  742 +++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c     |  521 +++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c      |  213 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c    |   81 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c      |  295 ++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c      |  872 ++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h      |  102 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c      |  516 +++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c  |  634 ++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_reg.h      |   57 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 1192 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c |   99 +++
 drivers/staging/Kconfig                  |    2 +
 include/drm/Kbuild                       |    1 +
 include/drm/vmwgfx_drm.h                 |  574 ++++++++++++++
 22 files changed, 7915 insertions(+)
 create mode 100644 drivers/gpu/drm/vmwgfx/Kconfig
 create mode 100644 drivers/gpu/drm/vmwgfx/Makefile
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_reg.h
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
 create mode 100644 include/drm/vmwgfx_drm.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 91567ac806f1..bc14ba7c3b6f 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -30,4 +30,5 @@ obj-$(CONFIG_DRM_I830)	+= i830/
 obj-$(CONFIG_DRM_I915)  += i915/
 obj-$(CONFIG_DRM_SIS)   += sis/
 obj-$(CONFIG_DRM_SAVAGE)+= savage/
+obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
 obj-$(CONFIG_DRM_VIA)	+=via/
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
new file mode 100644
index 000000000000..f20b8bcbef39
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -0,0 +1,13 @@
+config DRM_VMWGFX
+	tristate "DRM driver for VMware Virtual GPU"
+	depends on DRM && PCI
+	select FB_DEFERRED_IO
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select DRM_TTM
+	help
+	  KMS enabled DRM driver for SVGA2 virtual hardware.
+
+	  If unsure say n. The compiled module will be
+	  called vmwgfx.ko
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
new file mode 100644
index 000000000000..1a3cb6816d1c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -0,0 +1,9 @@
+
+ccflags-y := -Iinclude/drm
+
+vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
+	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
+	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
+	    vmwgfx_overlay.o
+
+obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
new file mode 100644
index 000000000000..d6f2d2b882e9
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -0,0 +1,229 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_placement.h"
+
+static uint32_t vram_placement_flags = TTM_PL_FLAG_VRAM |
+	TTM_PL_FLAG_CACHED;
+
+static uint32_t vram_ne_placement_flags = TTM_PL_FLAG_VRAM |
+	TTM_PL_FLAG_CACHED |
+	TTM_PL_FLAG_NO_EVICT;
+
+static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
+	TTM_PL_FLAG_CACHED;
+
+struct ttm_placement vmw_vram_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.placement = &vram_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &vram_placement_flags
+};
+
+struct ttm_placement vmw_vram_ne_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.placement = &vram_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &vram_ne_placement_flags
+};
+
+struct ttm_placement vmw_sys_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct vmw_ttm_backend {
+	struct ttm_backend backend;
+};
+
+static int vmw_ttm_populate(struct ttm_backend *backend,
+			    unsigned long num_pages, struct page **pages,
+			    struct page *dummy_read_page)
+{
+	return 0;
+}
+
+static int vmw_ttm_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
+{
+	return 0;
+}
+
+static int vmw_ttm_unbind(struct ttm_backend *backend)
+{
+	return 0;
+}
+
+static void vmw_ttm_clear(struct ttm_backend *backend)
+{
+}
+
+static void vmw_ttm_destroy(struct ttm_backend *backend)
+{
+	struct vmw_ttm_backend *vmw_be =
+	    container_of(backend, struct vmw_ttm_backend, backend);
+
+	kfree(vmw_be);
+}
+
+static struct ttm_backend_func vmw_ttm_func = {
+	.populate = vmw_ttm_populate,
+	.clear = vmw_ttm_clear,
+	.bind = vmw_ttm_bind,
+	.unbind = vmw_ttm_unbind,
+	.destroy = vmw_ttm_destroy,
+};
+
+struct ttm_backend *vmw_ttm_backend_init(struct ttm_bo_device *bdev)
+{
+	struct vmw_ttm_backend *vmw_be;
+
+	vmw_be = kmalloc(sizeof(*vmw_be), GFP_KERNEL);
+	if (!vmw_be)
+		return NULL;
+
+	vmw_be->backend.func = &vmw_ttm_func;
+
+	return &vmw_be->backend;
+}
+
+int vmw_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
+{
+	return 0;
+}
+
+int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+		      struct ttm_mem_type_manager *man)
+{
+	struct vmw_private *dev_priv =
+	    container_of(bdev, struct vmw_private, bdev);
+
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		/* System memory */
+
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_VRAM:
+		/* "On-card" video ram */
+		man->gpu_offset = 0;
+		man->io_offset = dev_priv->vram_start;
+		man->io_size = dev_priv->vram_size;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED |
+		    TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->io_addr = NULL;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void vmw_evict_flags(struct ttm_buffer_object *bo,
+		     struct ttm_placement *placement)
+{
+	*placement = vmw_sys_placement;
+}
+
+/**
+ * FIXME: Proper access checks on buffers.
+ */
+
+static int vmw_verify_access(struct ttm_buffer_object *bo, struct file *filp)
+{
+	return 0;
+}
+
+/**
+ * FIXME: We're using the old vmware polling method to sync.
+ * Do this with fences instead.
+ */
+
+static void *vmw_sync_obj_ref(void *sync_obj)
+{
+	return sync_obj;
+}
+
+static void vmw_sync_obj_unref(void **sync_obj)
+{
+	*sync_obj = NULL;
+}
+
+static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
+{
+	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
+	mutex_unlock(&dev_priv->hw_mutex);
+	return 0;
+}
+
+static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
+{
+	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
+	uint32_t sequence = (unsigned long) sync_obj;
+
+	return vmw_fence_signaled(dev_priv, sequence);
+}
+
+static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
+			     bool lazy, bool interruptible)
+{
+	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
+	uint32_t sequence = (unsigned long) sync_obj;
+
+	return vmw_wait_fence(dev_priv, false, sequence, false, 3*HZ);
+}
+
+struct ttm_bo_driver vmw_bo_driver = {
+	.create_ttm_backend_entry = vmw_ttm_backend_init,
+	.invalidate_caches = vmw_invalidate_caches,
+	.init_mem_type = vmw_init_mem_type,
+	.evict_flags = vmw_evict_flags,
+	.move = NULL,
+	.verify_access = vmw_verify_access,
+	.sync_obj_signaled = vmw_sync_obj_signaled,
+	.sync_obj_wait = vmw_sync_obj_wait,
+	.sync_obj_flush = vmw_sync_obj_flush,
+	.sync_obj_unref = vmw_sync_obj_unref,
+	.sync_obj_ref = vmw_sync_obj_ref
+};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
new file mode 100644
index 000000000000..7b48bb3b63b2
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -0,0 +1,735 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+#include "ttm/ttm_placement.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_object.h"
+#include "ttm/ttm_module.h"
+
+#define VMWGFX_DRIVER_NAME "vmwgfx"
+#define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
+#define VMWGFX_CHIP_SVGAII 0
+#define VMW_FB_RESERVATION 0
+
+/**
+ * Fully encoded drm commands. Might move to vmw_drm.h
+ */
+
+#define DRM_IOCTL_VMW_GET_PARAM					\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_GET_PARAM,		\
+		 struct drm_vmw_getparam_arg)
+#define DRM_IOCTL_VMW_ALLOC_DMABUF				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_ALLOC_DMABUF,	\
+		union drm_vmw_alloc_dmabuf_arg)
+#define DRM_IOCTL_VMW_UNREF_DMABUF				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_UNREF_DMABUF,	\
+		struct drm_vmw_unref_dmabuf_arg)
+#define DRM_IOCTL_VMW_CURSOR_BYPASS				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_CURSOR_BYPASS,	\
+		 struct drm_vmw_cursor_bypass_arg)
+
+#define DRM_IOCTL_VMW_CONTROL_STREAM				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_CONTROL_STREAM,	\
+		 struct drm_vmw_control_stream_arg)
+#define DRM_IOCTL_VMW_CLAIM_STREAM				\
+	DRM_IOR(DRM_COMMAND_BASE + DRM_VMW_CLAIM_STREAM,	\
+		 struct drm_vmw_stream_arg)
+#define DRM_IOCTL_VMW_UNREF_STREAM				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_UNREF_STREAM,	\
+		 struct drm_vmw_stream_arg)
+
+#define DRM_IOCTL_VMW_CREATE_CONTEXT				\
+	DRM_IOR(DRM_COMMAND_BASE + DRM_VMW_CREATE_CONTEXT,	\
+		struct drm_vmw_context_arg)
+#define DRM_IOCTL_VMW_UNREF_CONTEXT				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_UNREF_CONTEXT,	\
+		struct drm_vmw_context_arg)
+#define DRM_IOCTL_VMW_CREATE_SURFACE				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_CREATE_SURFACE,	\
+		 union drm_vmw_surface_create_arg)
+#define DRM_IOCTL_VMW_UNREF_SURFACE				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_UNREF_SURFACE,	\
+		 struct drm_vmw_surface_arg)
+#define DRM_IOCTL_VMW_REF_SURFACE				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_REF_SURFACE,	\
+		 union drm_vmw_surface_reference_arg)
+#define DRM_IOCTL_VMW_EXECBUF					\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_EXECBUF,		\
+		struct drm_vmw_execbuf_arg)
+#define DRM_IOCTL_VMW_FIFO_DEBUG				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FIFO_DEBUG,		\
+		 struct drm_vmw_fifo_debug_arg)
+#define DRM_IOCTL_VMW_FENCE_WAIT				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_WAIT,		\
+		 struct drm_vmw_fence_wait_arg)
+
+
+/**
+ * The core DRM version of this macro doesn't account for
+ * DRM_COMMAND_BASE.
+ */
+
+#define VMW_IOCTL_DEF(ioctl, func, flags) \
+	[DRM_IOCTL_NR(ioctl) - DRM_COMMAND_BASE] = {ioctl, flags, func}
+
+/**
+ * Ioctl definitions.
+ */
+
+static struct drm_ioctl_desc vmw_ioctls[] = {
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_GET_PARAM, vmw_getparam_ioctl, 0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CURSOR_BYPASS,
+		      vmw_kms_cursor_bypass_ioctl, 0),
+
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CONTROL_STREAM, vmw_overlay_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
+		      0),
+
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_REF_SURFACE, vmw_surface_reference_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_EXECBUF, vmw_execbuf_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
+		      0),
+	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+		      0)
+};
+
+static struct pci_device_id vmw_pci_id_list[] = {
+	{0x15ad, 0x0405, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VMWGFX_CHIP_SVGAII},
+	{0, 0, 0}
+};
+
+static char *vmw_devname = "vmwgfx";
+
+static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
+static void vmw_master_init(struct vmw_master *);
+
+static void vmw_print_capabilities(uint32_t capabilities)
+{
+	DRM_INFO("Capabilities:\n");
+	if (capabilities & SVGA_CAP_RECT_COPY)
+		DRM_INFO("  Rect copy.\n");
+	if (capabilities & SVGA_CAP_CURSOR)
+		DRM_INFO("  Cursor.\n");
+	if (capabilities & SVGA_CAP_CURSOR_BYPASS)
+		DRM_INFO("  Cursor bypass.\n");
+	if (capabilities & SVGA_CAP_CURSOR_BYPASS_2)
+		DRM_INFO("  Cursor bypass 2.\n");
+	if (capabilities & SVGA_CAP_8BIT_EMULATION)
+		DRM_INFO("  8bit emulation.\n");
+	if (capabilities & SVGA_CAP_ALPHA_CURSOR)
+		DRM_INFO("  Alpha cursor.\n");
+	if (capabilities & SVGA_CAP_3D)
+		DRM_INFO("  3D.\n");
+	if (capabilities & SVGA_CAP_EXTENDED_FIFO)
+		DRM_INFO("  Extended Fifo.\n");
+	if (capabilities & SVGA_CAP_MULTIMON)
+		DRM_INFO("  Multimon.\n");
+	if (capabilities & SVGA_CAP_PITCHLOCK)
+		DRM_INFO("  Pitchlock.\n");
+	if (capabilities & SVGA_CAP_IRQMASK)
+		DRM_INFO("  Irq mask.\n");
+	if (capabilities & SVGA_CAP_DISPLAY_TOPOLOGY)
+		DRM_INFO("  Display Topology.\n");
+	if (capabilities & SVGA_CAP_GMR)
+		DRM_INFO("  GMR.\n");
+	if (capabilities & SVGA_CAP_TRACES)
+		DRM_INFO("  Traces.\n");
+}
+
+static int vmw_request_device(struct vmw_private *dev_priv)
+{
+	int ret;
+
+	vmw_kms_save_vga(dev_priv);
+
+	ret = vmw_fifo_init(dev_priv, &dev_priv->fifo);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Unable to initialize FIFO.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vmw_release_device(struct vmw_private *dev_priv)
+{
+	vmw_fifo_release(dev_priv, &dev_priv->fifo);
+	vmw_kms_restore_vga(dev_priv);
+}
+
+
+static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
+{
+	struct vmw_private *dev_priv;
+	int ret;
+
+	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
+	if (unlikely(dev_priv == NULL)) {
+		DRM_ERROR("Failed allocating a device private struct.\n");
+		return -ENOMEM;
+	}
+	memset(dev_priv, 0, sizeof(*dev_priv));
+
+	dev_priv->dev = dev;
+	dev_priv->vmw_chipset = chipset;
+	mutex_init(&dev_priv->hw_mutex);
+	mutex_init(&dev_priv->cmdbuf_mutex);
+	rwlock_init(&dev_priv->resource_lock);
+	idr_init(&dev_priv->context_idr);
+	idr_init(&dev_priv->surface_idr);
+	idr_init(&dev_priv->stream_idr);
+	ida_init(&dev_priv->gmr_ida);
+	mutex_init(&dev_priv->init_mutex);
+	init_waitqueue_head(&dev_priv->fence_queue);
+	init_waitqueue_head(&dev_priv->fifo_queue);
+	atomic_set(&dev_priv->fence_queue_waiters, 0);
+	atomic_set(&dev_priv->fifo_queue_waiters, 0);
+	INIT_LIST_HEAD(&dev_priv->gmr_lru);
+
+	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
+	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
+	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
+
+	if (dev_priv->capabilities & SVGA_CAP_GMR) {
+		dev_priv->max_gmr_descriptors =
+			vmw_read(dev_priv,
+				 SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH);
+		dev_priv->max_gmr_ids =
+			vmw_read(dev_priv, SVGA_REG_GMR_MAX_IDS);
+	}
+
+	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
+	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
+	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
+	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
+
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	vmw_print_capabilities(dev_priv->capabilities);
+
+	if (dev_priv->capabilities & SVGA_CAP_GMR) {
+		DRM_INFO("Max GMR ids is %u\n",
+			 (unsigned)dev_priv->max_gmr_ids);
+		DRM_INFO("Max GMR descriptors is %u\n",
+			 (unsigned)dev_priv->max_gmr_descriptors);
+	}
+	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
+		 dev_priv->vram_start, dev_priv->vram_size / 1024);
+	DRM_INFO("MMIO at 0x%08x size is %u kiB\n",
+		 dev_priv->mmio_start, dev_priv->mmio_size / 1024);
+
+	ret = vmw_ttm_global_init(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_err0;
+
+
+	vmw_master_init(&dev_priv->fbdev_master);
+	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
+	dev_priv->active_master = &dev_priv->fbdev_master;
+
+
+	ret = ttm_bo_device_init(&dev_priv->bdev,
+				 dev_priv->bo_global_ref.ref.object,
+				 &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
+				 false);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
+		goto out_err1;
+	}
+
+	ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM,
+			     (dev_priv->vram_size >> PAGE_SHIFT));
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed initializing memory manager for VRAM.\n");
+		goto out_err2;
+	}
+
+	dev_priv->mmio_mtrr = drm_mtrr_add(dev_priv->mmio_start,
+					   dev_priv->mmio_size, DRM_MTRR_WC);
+
+	dev_priv->mmio_virt = ioremap_wc(dev_priv->mmio_start,
+					 dev_priv->mmio_size);
+
+	if (unlikely(dev_priv->mmio_virt == NULL)) {
+		ret = -ENOMEM;
+		DRM_ERROR("Failed mapping MMIO.\n");
+		goto out_err3;
+	}
+
+	dev_priv->tdev = ttm_object_device_init
+	    (dev_priv->mem_global_ref.object, 12);
+
+	if (unlikely(dev_priv->tdev == NULL)) {
+		DRM_ERROR("Unable to initialize TTM object management.\n");
+		ret = -ENOMEM;
+		goto out_err4;
+	}
+
+	dev->dev_private = dev_priv;
+
+	if (!dev->devname)
+		dev->devname = vmw_devname;
+
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
+		ret = drm_irq_install(dev);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Failed installing irq: %d\n", ret);
+			goto out_no_irq;
+		}
+	}
+
+	ret = pci_request_regions(dev->pdev, "vmwgfx probe");
+	dev_priv->stealth = (ret != 0);
+	if (dev_priv->stealth) {
+		/**
+		 * Request at least the mmio PCI resource.
+		 */
+
+		DRM_INFO("It appears like vesafb is loaded. "
+			 "Ignore above error if any. Entering stealth mode.\n");
+		ret = pci_request_region(dev->pdev, 2, "vmwgfx stealth probe");
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Failed reserving the SVGA MMIO resource.\n");
+			goto out_no_device;
+		}
+		vmw_kms_init(dev_priv);
+		vmw_overlay_init(dev_priv);
+	} else {
+		ret = vmw_request_device(dev_priv);
+		if (unlikely(ret != 0))
+			goto out_no_device;
+		vmw_kms_init(dev_priv);
+		vmw_overlay_init(dev_priv);
+		vmw_fb_init(dev_priv);
+	}
+
+	return 0;
+
+out_no_device:
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
+		drm_irq_uninstall(dev_priv->dev);
+	if (dev->devname == vmw_devname)
+		dev->devname = NULL;
+out_no_irq:
+	ttm_object_device_release(&dev_priv->tdev);
+out_err4:
+	iounmap(dev_priv->mmio_virt);
+out_err3:
+	drm_mtrr_del(dev_priv->mmio_mtrr, dev_priv->mmio_start,
+		     dev_priv->mmio_size, DRM_MTRR_WC);
+	(void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
+out_err2:
+	(void)ttm_bo_device_release(&dev_priv->bdev);
+out_err1:
+	vmw_ttm_global_release(dev_priv);
+out_err0:
+	ida_destroy(&dev_priv->gmr_ida);
+	idr_destroy(&dev_priv->surface_idr);
+	idr_destroy(&dev_priv->context_idr);
+	idr_destroy(&dev_priv->stream_idr);
+	kfree(dev_priv);
+	return ret;
+}
+
+static int vmw_driver_unload(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	DRM_INFO(VMWGFX_DRIVER_NAME " unload.\n");
+
+	if (!dev_priv->stealth) {
+		vmw_fb_close(dev_priv);
+		vmw_kms_close(dev_priv);
+		vmw_overlay_close(dev_priv);
+		vmw_release_device(dev_priv);
+		pci_release_regions(dev->pdev);
+	} else {
+		vmw_kms_close(dev_priv);
+		vmw_overlay_close(dev_priv);
+		pci_release_region(dev->pdev, 2);
+	}
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
+		drm_irq_uninstall(dev_priv->dev);
+	if (dev->devname == vmw_devname)
+		dev->devname = NULL;
+	ttm_object_device_release(&dev_priv->tdev);
+	iounmap(dev_priv->mmio_virt);
+	drm_mtrr_del(dev_priv->mmio_mtrr, dev_priv->mmio_start,
+		     dev_priv->mmio_size, DRM_MTRR_WC);
+	(void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
+	(void)ttm_bo_device_release(&dev_priv->bdev);
+	vmw_ttm_global_release(dev_priv);
+	ida_destroy(&dev_priv->gmr_ida);
+	idr_destroy(&dev_priv->surface_idr);
+	idr_destroy(&dev_priv->context_idr);
+	idr_destroy(&dev_priv->stream_idr);
+
+	kfree(dev_priv);
+
+	return 0;
+}
+
+static void vmw_postclose(struct drm_device *dev,
+			 struct drm_file *file_priv)
+{
+	struct vmw_fpriv *vmw_fp;
+
+	vmw_fp = vmw_fpriv(file_priv);
+	ttm_object_file_release(&vmw_fp->tfile);
+	if (vmw_fp->locked_master)
+		drm_master_put(&vmw_fp->locked_master);
+	kfree(vmw_fp);
+}
+
+static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_fpriv *vmw_fp;
+	int ret = -ENOMEM;
+
+	vmw_fp = kzalloc(sizeof(*vmw_fp), GFP_KERNEL);
+	if (unlikely(vmw_fp == NULL))
+		return ret;
+
+	vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev, 10);
+	if (unlikely(vmw_fp->tfile == NULL))
+		goto out_no_tfile;
+
+	file_priv->driver_priv = vmw_fp;
+
+	if (unlikely(dev_priv->bdev.dev_mapping == NULL))
+		dev_priv->bdev.dev_mapping =
+			file_priv->filp->f_path.dentry->d_inode->i_mapping;
+
+	return 0;
+
+out_no_tfile:
+	kfree(vmw_fp);
+	return ret;
+}
+
+static long vmw_unlocked_ioctl(struct file *filp, unsigned int cmd,
+			       unsigned long arg)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct drm_device *dev = file_priv->minor->dev;
+	unsigned int nr = DRM_IOCTL_NR(cmd);
+	long ret;
+
+	/*
+	 * The driver private ioctls and TTM ioctls should be
+	 * thread-safe.
+	 */
+
+	if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END)
+	    && (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) {
+		struct drm_ioctl_desc *ioctl =
+		    &vmw_ioctls[nr - DRM_COMMAND_BASE];
+
+		if (unlikely(ioctl->cmd != cmd)) {
+			DRM_ERROR("Invalid command format, ioctl %d\n",
+				  nr - DRM_COMMAND_BASE);
+			return -EINVAL;
+		}
+		return drm_ioctl(filp->f_path.dentry->d_inode,
+				 filp, cmd, arg);
+	}
+
+	/*
+	 * Not all old drm ioctls are thread-safe.
+	 */
+
+	lock_kernel();
+	ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
+static int vmw_firstopen(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	dev_priv->is_opened = true;
+
+	return 0;
+}
+
+static void vmw_lastclose(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_crtc *crtc;
+	struct drm_mode_set set;
+	int ret;
+
+	/**
+	 * Do nothing on the lastclose call from drm_unload.
+	 */
+
+	if (!dev_priv->is_opened)
+		return;
+
+	dev_priv->is_opened = false;
+	set.x = 0;
+	set.y = 0;
+	set.fb = NULL;
+	set.mode = NULL;
+	set.connectors = NULL;
+	set.num_connectors = 0;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		set.crtc = crtc;
+		ret = crtc->funcs->set_config(&set);
+		WARN_ON(ret != 0);
+	}
+
+}
+
+static void vmw_master_init(struct vmw_master *vmaster)
+{
+	ttm_lock_init(&vmaster->lock);
+}
+
+static int vmw_master_create(struct drm_device *dev,
+			     struct drm_master *master)
+{
+	struct vmw_master *vmaster;
+
+	DRM_INFO("Master create.\n");
+	vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
+	if (unlikely(vmaster == NULL))
+		return -ENOMEM;
+
+	ttm_lock_init(&vmaster->lock);
+	ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
+	master->driver_priv = vmaster;
+
+	return 0;
+}
+
+static void vmw_master_destroy(struct drm_device *dev,
+			       struct drm_master *master)
+{
+	struct vmw_master *vmaster = vmw_master(master);
+
+	DRM_INFO("Master destroy.\n");
+	master->driver_priv = NULL;
+	kfree(vmaster);
+}
+
+
+static int vmw_master_set(struct drm_device *dev,
+			  struct drm_file *file_priv,
+			  bool from_open)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct vmw_master *active = dev_priv->active_master;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret = 0;
+
+	DRM_INFO("Master set.\n");
+	if (dev_priv->stealth) {
+		ret = vmw_request_device(dev_priv);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	if (active) {
+		BUG_ON(active != &dev_priv->fbdev_master);
+		ret = ttm_vt_lock(&active->lock, false, vmw_fp->tfile);
+		if (unlikely(ret != 0))
+			goto out_no_active_lock;
+
+		ttm_lock_set_kill(&active->lock, true, SIGTERM);
+		ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Unable to clean VRAM on "
+				  "master drop.\n");
+		}
+
+		dev_priv->active_master = NULL;
+	}
+
+	ttm_lock_set_kill(&vmaster->lock, false, SIGTERM);
+	if (!from_open) {
+		ttm_vt_unlock(&vmaster->lock);
+		BUG_ON(vmw_fp->locked_master != file_priv->master);
+		drm_master_put(&vmw_fp->locked_master);
+	}
+
+	dev_priv->active_master = vmaster;
+
+	return 0;
+
+out_no_active_lock:
+	vmw_release_device(dev_priv);
+	return ret;
+}
+
+static void vmw_master_drop(struct drm_device *dev,
+			    struct drm_file *file_priv,
+			    bool from_release)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+	DRM_INFO("Master drop.\n");
+
+	/**
+	 * Make sure the master doesn't disappear while we have
+	 * it locked.
+	 */
+
+	vmw_fp->locked_master = drm_master_get(file_priv->master);
+	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
+
+	if (unlikely((ret != 0))) {
+		DRM_ERROR("Unable to lock TTM at VT switch.\n");
+		drm_master_put(&vmw_fp->locked_master);
+	}
+
+	ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
+
+	if (dev_priv->stealth) {
+		ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM);
+		if (unlikely(ret != 0))
+			DRM_ERROR("Unable to clean VRAM on master drop.\n");
+		vmw_release_device(dev_priv);
+	}
+	dev_priv->active_master = &dev_priv->fbdev_master;
+	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
+	ttm_vt_unlock(&dev_priv->fbdev_master.lock);
+
+	if (!dev_priv->stealth)
+		vmw_fb_on(dev_priv);
+}
+
+
+static void vmw_remove(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+
+	drm_put_dev(dev);
+}
+
+static struct drm_driver driver = {
+	.driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED |
+	DRIVER_MODESET,
+	.load = vmw_driver_load,
+	.unload = vmw_driver_unload,
+	.firstopen = vmw_firstopen,
+	.lastclose = vmw_lastclose,
+	.irq_preinstall = vmw_irq_preinstall,
+	.irq_postinstall = vmw_irq_postinstall,
+	.irq_uninstall = vmw_irq_uninstall,
+	.irq_handler = vmw_irq_handler,
+	.reclaim_buffers_locked = NULL,
+	.get_map_ofs = drm_core_get_map_ofs,
+	.get_reg_ofs = drm_core_get_reg_ofs,
+	.ioctls = vmw_ioctls,
+	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
+	.dma_quiescent = NULL,	/*vmw_dma_quiescent, */
+	.master_create = vmw_master_create,
+	.master_destroy = vmw_master_destroy,
+	.master_set = vmw_master_set,
+	.master_drop = vmw_master_drop,
+	.open = vmw_driver_open,
+	.postclose = vmw_postclose,
+	.fops = {
+		 .owner = THIS_MODULE,
+		 .open = drm_open,
+		 .release = drm_release,
+		 .unlocked_ioctl = vmw_unlocked_ioctl,
+		 .mmap = vmw_mmap,
+		 .poll = drm_poll,
+		 .fasync = drm_fasync,
+#if defined(CONFIG_COMPAT)
+		 .compat_ioctl = drm_compat_ioctl,
+#endif
+		 },
+	.pci_driver = {
+		       .name = VMWGFX_DRIVER_NAME,
+		       .id_table = vmw_pci_id_list,
+		       .probe = vmw_probe,
+		       .remove = vmw_remove
+		       },
+	.name = VMWGFX_DRIVER_NAME,
+	.desc = VMWGFX_DRIVER_DESC,
+	.date = VMWGFX_DRIVER_DATE,
+	.major = VMWGFX_DRIVER_MAJOR,
+	.minor = VMWGFX_DRIVER_MINOR,
+	.patchlevel = VMWGFX_DRIVER_PATCHLEVEL
+};
+
+static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	return drm_get_dev(pdev, ent, &driver);
+}
+
+static int __init vmwgfx_init(void)
+{
+	int ret;
+	ret = drm_init(&driver);
+	if (ret)
+		DRM_ERROR("Failed initializing DRM.\n");
+	return ret;
+}
+
+static void __exit vmwgfx_exit(void)
+{
+	drm_exit(&driver);
+}
+
+module_init(vmwgfx_init);
+module_exit(vmwgfx_exit);
+
+MODULE_AUTHOR("VMware Inc. and others");
+MODULE_DESCRIPTION("Standalone drm driver for the VMware SVGA device");
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
new file mode 100644
index 000000000000..43546d09d1b0
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -0,0 +1,511 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_DRV_H_
+#define _VMWGFX_DRV_H_
+
+#include "vmwgfx_reg.h"
+#include "drmP.h"
+#include "vmwgfx_drm.h"
+#include "drm_hashtab.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_object.h"
+#include "ttm/ttm_lock.h"
+#include "ttm/ttm_execbuf_util.h"
+#include "ttm/ttm_module.h"
+
+#define VMWGFX_DRIVER_DATE "20090724"
+#define VMWGFX_DRIVER_MAJOR 0
+#define VMWGFX_DRIVER_MINOR 1
+#define VMWGFX_DRIVER_PATCHLEVEL 2
+#define VMWGFX_FILE_PAGE_OFFSET 0x00100000
+#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
+#define VMWGFX_MAX_RELOCATIONS 2048
+#define VMWGFX_MAX_GMRS 2048
+
+struct vmw_fpriv {
+	struct drm_master *locked_master;
+	struct ttm_object_file *tfile;
+};
+
+struct vmw_dma_buffer {
+	struct ttm_buffer_object base;
+	struct list_head validate_list;
+	struct list_head gmr_lru;
+	uint32_t gmr_id;
+	bool gmr_bound;
+	uint32_t cur_validate_node;
+	bool on_validate_list;
+};
+
+struct vmw_resource {
+	struct kref kref;
+	struct vmw_private *dev_priv;
+	struct idr *idr;
+	int id;
+	enum ttm_object_type res_type;
+	bool avail;
+	void (*hw_destroy) (struct vmw_resource *res);
+	void (*res_free) (struct vmw_resource *res);
+
+	/* TODO is a generic snooper needed? */
+#if 0
+	void (*snoop)(struct vmw_resource *res,
+		      struct ttm_object_file *tfile,
+		      SVGA3dCmdHeader *header);
+	void *snoop_priv;
+#endif
+};
+
+struct vmw_cursor_snooper {
+	struct drm_crtc *crtc;
+	size_t age;
+	uint32_t *image;
+};
+
+struct vmw_surface {
+	struct vmw_resource res;
+	uint32_t flags;
+	uint32_t format;
+	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	struct drm_vmw_size *sizes;
+	uint32_t num_sizes;
+
+	/* TODO so far just a extra pointer */
+	struct vmw_cursor_snooper snooper;
+};
+
+struct vmw_fifo_state {
+	unsigned long reserved_size;
+	__le32 *dynamic_buffer;
+	__le32 *static_buffer;
+	__le32 *last_buffer;
+	uint32_t last_data_size;
+	uint32_t last_buffer_size;
+	bool last_buffer_add;
+	unsigned long static_buffer_size;
+	bool using_bounce_buffer;
+	uint32_t capabilities;
+	struct rw_semaphore rwsem;
+};
+
+struct vmw_relocation {
+	SVGAGuestPtr *location;
+	uint32_t index;
+};
+
+struct vmw_sw_context{
+	struct ida bo_list;
+	uint32_t last_cid;
+	bool cid_valid;
+	uint32_t last_sid;
+	bool sid_valid;
+	struct ttm_object_file *tfile;
+	struct list_head validate_nodes;
+	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
+	uint32_t cur_reloc;
+	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_GMRS];
+	uint32_t cur_val_buf;
+};
+
+struct vmw_legacy_display;
+struct vmw_overlay;
+
+struct vmw_master {
+	struct ttm_lock lock;
+};
+
+struct vmw_private {
+	struct ttm_bo_device bdev;
+	struct ttm_bo_global_ref bo_global_ref;
+	struct ttm_global_reference mem_global_ref;
+
+	struct vmw_fifo_state fifo;
+
+	struct drm_device *dev;
+	unsigned long vmw_chipset;
+	unsigned int io_start;
+	uint32_t vram_start;
+	uint32_t vram_size;
+	uint32_t mmio_start;
+	uint32_t mmio_size;
+	uint32_t fb_max_width;
+	uint32_t fb_max_height;
+	__le32 __iomem *mmio_virt;
+	int mmio_mtrr;
+	uint32_t capabilities;
+	uint32_t max_gmr_descriptors;
+	uint32_t max_gmr_ids;
+	struct mutex hw_mutex;
+
+	/*
+	 * VGA registers.
+	 */
+
+	uint32_t vga_width;
+	uint32_t vga_height;
+	uint32_t vga_depth;
+	uint32_t vga_bpp;
+	uint32_t vga_pseudo;
+	uint32_t vga_red_mask;
+	uint32_t vga_blue_mask;
+	uint32_t vga_green_mask;
+
+	/*
+	 * Framebuffer info.
+	 */
+
+	void *fb_info;
+	struct vmw_legacy_display *ldu_priv;
+	struct vmw_overlay *overlay_priv;
+
+	/*
+	 * Context and surface management.
+	 */
+
+	rwlock_t resource_lock;
+	struct idr context_idr;
+	struct idr surface_idr;
+	struct idr stream_idr;
+
+	/*
+	 * Block lastclose from racing with firstopen.
+	 */
+
+	struct mutex init_mutex;
+
+	/*
+	 * A resource manager for kernel-only surfaces and
+	 * contexts.
+	 */
+
+	struct ttm_object_device *tdev;
+
+	/*
+	 * Fencing and IRQs.
+	 */
+
+	uint32_t fence_seq;
+	wait_queue_head_t fence_queue;
+	wait_queue_head_t fifo_queue;
+	atomic_t fence_queue_waiters;
+	atomic_t fifo_queue_waiters;
+	uint32_t last_read_sequence;
+	spinlock_t irq_lock;
+
+	/*
+	 * Device state
+	 */
+
+	uint32_t traces_state;
+	uint32_t enable_state;
+	uint32_t config_done_state;
+
+	/**
+	 * Execbuf
+	 */
+	/**
+	 * Protected by the cmdbuf mutex.
+	 */
+
+	struct vmw_sw_context ctx;
+	uint32_t val_seq;
+	struct mutex cmdbuf_mutex;
+
+	/**
+	 * GMR management. Protected by the lru spinlock.
+	 */
+
+	struct ida gmr_ida;
+	struct list_head gmr_lru;
+
+
+	/**
+	 * Operating mode.
+	 */
+
+	bool stealth;
+	bool is_opened;
+
+	/**
+	 * Master management.
+	 */
+
+	struct vmw_master *active_master;
+	struct vmw_master fbdev_master;
+};
+
+static inline struct vmw_private *vmw_priv(struct drm_device *dev)
+{
+	return (struct vmw_private *)dev->dev_private;
+}
+
+static inline struct vmw_fpriv *vmw_fpriv(struct drm_file *file_priv)
+{
+	return (struct vmw_fpriv *)file_priv->driver_priv;
+}
+
+static inline struct vmw_master *vmw_master(struct drm_master *master)
+{
+	return (struct vmw_master *) master->driver_priv;
+}
+
+static inline void vmw_write(struct vmw_private *dev_priv,
+			     unsigned int offset, uint32_t value)
+{
+	outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
+	outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
+}
+
+static inline uint32_t vmw_read(struct vmw_private *dev_priv,
+				unsigned int offset)
+{
+	uint32_t val;
+
+	outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
+	val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
+	return val;
+}
+
+/**
+ * GMR utilities - vmwgfx_gmr.c
+ */
+
+extern int vmw_gmr_bind(struct vmw_private *dev_priv,
+			struct ttm_buffer_object *bo);
+extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id);
+
+/**
+ * Resource utilities - vmwgfx_resource.c
+ */
+
+extern struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv);
+extern void vmw_resource_unreference(struct vmw_resource **p_res);
+extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
+extern int vmw_context_destroy_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+extern int vmw_context_define_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+extern int vmw_context_check(struct vmw_private *dev_priv,
+			     struct ttm_object_file *tfile,
+			     int id);
+extern void vmw_surface_res_free(struct vmw_resource *res);
+extern int vmw_surface_init(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf,
+			    void (*res_free) (struct vmw_resource *res));
+extern int vmw_user_surface_lookup(struct vmw_private *dev_priv,
+				   struct ttm_object_file *tfile,
+				   int sid, struct vmw_surface **out);
+extern int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+extern int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+extern int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
+				       struct drm_file *file_priv);
+extern int vmw_surface_check(struct vmw_private *dev_priv,
+			     struct ttm_object_file *tfile,
+			     int id);
+extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
+extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
+			   struct vmw_dma_buffer *vmw_bo,
+			   size_t size, struct ttm_placement *placement,
+			   bool interuptable,
+			   void (*bo_free) (struct ttm_buffer_object *bo));
+extern int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+extern int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
+					 uint32_t cur_validate_node);
+extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
+extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
+				  uint32_t id, struct vmw_dma_buffer **out);
+extern uint32_t vmw_dmabuf_gmr(struct ttm_buffer_object *bo);
+extern void vmw_dmabuf_set_gmr(struct ttm_buffer_object *bo, uint32_t id);
+extern int vmw_gmr_id_alloc(struct vmw_private *dev_priv, uint32_t *p_id);
+extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_dma_buffer *bo);
+extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
+				struct vmw_dma_buffer *bo);
+extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
+				  struct ttm_object_file *tfile,
+				  uint32_t *inout_id,
+				  struct vmw_resource **out);
+
+
+/**
+ * Misc Ioctl functionality - vmwgfx_ioctl.c
+ */
+
+extern int vmw_getparam_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+extern int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+
+/**
+ * Fifo utilities - vmwgfx_fifo.c
+ */
+
+extern int vmw_fifo_init(struct vmw_private *dev_priv,
+			 struct vmw_fifo_state *fifo);
+extern void vmw_fifo_release(struct vmw_private *dev_priv,
+			     struct vmw_fifo_state *fifo);
+extern void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes);
+extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
+extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
+			       uint32_t *sequence);
+extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
+extern int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/**
+ * TTM glue - vmwgfx_ttm_glue.c
+ */
+
+extern int vmw_ttm_global_init(struct vmw_private *dev_priv);
+extern void vmw_ttm_global_release(struct vmw_private *dev_priv);
+extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/**
+ * TTM buffer object driver - vmwgfx_buffer.c
+ */
+
+extern struct ttm_placement vmw_vram_placement;
+extern struct ttm_placement vmw_vram_ne_placement;
+extern struct ttm_placement vmw_sys_placement;
+extern struct ttm_bo_driver vmw_bo_driver;
+extern int vmw_dma_quiescent(struct drm_device *dev);
+
+/**
+ * Command submission - vmwgfx_execbuf.c
+ */
+
+extern int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+
+/**
+ * IRQs and wating - vmwgfx_irq.c
+ */
+
+extern irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS);
+extern int vmw_wait_fence(struct vmw_private *dev_priv, bool lazy,
+			  uint32_t sequence, bool interruptible,
+			  unsigned long timeout);
+extern void vmw_irq_preinstall(struct drm_device *dev);
+extern int vmw_irq_postinstall(struct drm_device *dev);
+extern void vmw_irq_uninstall(struct drm_device *dev);
+extern bool vmw_fence_signaled(struct vmw_private *dev_priv,
+			       uint32_t sequence);
+extern int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+extern int vmw_fallback_wait(struct vmw_private *dev_priv,
+			     bool lazy,
+			     bool fifo_idle,
+			     uint32_t sequence,
+			     bool interruptible,
+			     unsigned long timeout);
+
+/**
+ * Kernel framebuffer - vmwgfx_fb.c
+ */
+
+int vmw_fb_init(struct vmw_private *vmw_priv);
+int vmw_fb_close(struct vmw_private *dev_priv);
+int vmw_fb_off(struct vmw_private *vmw_priv);
+int vmw_fb_on(struct vmw_private *vmw_priv);
+
+/**
+ * Kernel modesetting - vmwgfx_kms.c
+ */
+
+int vmw_kms_init(struct vmw_private *dev_priv);
+int vmw_kms_close(struct vmw_private *dev_priv);
+int vmw_kms_save_vga(struct vmw_private *vmw_priv);
+int vmw_kms_restore_vga(struct vmw_private *vmw_priv);
+int vmw_kms_cursor_bypass_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv);
+void vmw_kms_cursor_snoop(struct vmw_surface *srf,
+			  struct ttm_object_file *tfile,
+			  struct ttm_buffer_object *bo,
+			  SVGA3dCmdHeader *header);
+
+/**
+ * Overlay control - vmwgfx_overlay.c
+ */
+
+int vmw_overlay_init(struct vmw_private *dev_priv);
+int vmw_overlay_close(struct vmw_private *dev_priv);
+int vmw_overlay_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+int vmw_overlay_stop_all(struct vmw_private *dev_priv);
+int vmw_overlay_resume_all(struct vmw_private *dev_priv);
+int vmw_overlay_pause_all(struct vmw_private *dev_priv);
+int vmw_overlay_claim(struct vmw_private *dev_priv, uint32_t *out);
+int vmw_overlay_unref(struct vmw_private *dev_priv, uint32_t stream_id);
+int vmw_overlay_num_overlays(struct vmw_private *dev_priv);
+int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv);
+
+/**
+ * Inline helper functions
+ */
+
+static inline void vmw_surface_unreference(struct vmw_surface **srf)
+{
+	struct vmw_surface *tmp_srf = *srf;
+	struct vmw_resource *res = &tmp_srf->res;
+	*srf = NULL;
+
+	vmw_resource_unreference(&res);
+}
+
+static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
+{
+	(void) vmw_resource_reference(&srf->res);
+	return srf;
+}
+
+static inline void vmw_dmabuf_unreference(struct vmw_dma_buffer **buf)
+{
+	struct vmw_dma_buffer *tmp_buf = *buf;
+	struct ttm_buffer_object *bo = &tmp_buf->base;
+	*buf = NULL;
+
+	ttm_bo_unref(&bo);
+}
+
+static inline struct vmw_dma_buffer *vmw_dmabuf_reference(struct vmw_dma_buffer *buf)
+{
+	if (ttm_bo_reference(&buf->base))
+		return buf;
+	return NULL;
+}
+
+#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
new file mode 100644
index 000000000000..7a39f3e6dc2c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -0,0 +1,516 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "vmwgfx_reg.h"
+#include "ttm/ttm_bo_api.h"
+#include "ttm/ttm_placement.h"
+
+static int vmw_cmd_invalid(struct vmw_private *dev_priv,
+			   struct vmw_sw_context *sw_context,
+			   SVGA3dCmdHeader *header)
+{
+	return capable(CAP_SYS_ADMIN) ? : -EINVAL;
+}
+
+static int vmw_cmd_ok(struct vmw_private *dev_priv,
+		      struct vmw_sw_context *sw_context,
+		      SVGA3dCmdHeader *header)
+{
+	return 0;
+}
+
+static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     SVGA3dCmdHeader *header)
+{
+	struct vmw_cid_cmd {
+		SVGA3dCmdHeader header;
+		__le32 cid;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_cid_cmd, header);
+	if (likely(sw_context->cid_valid && cmd->cid == sw_context->last_cid))
+		return 0;
+
+	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not find or use context %u\n",
+			  (unsigned) cmd->cid);
+		return ret;
+	}
+
+	sw_context->last_cid = cmd->cid;
+	sw_context->cid_valid = true;
+
+	return 0;
+}
+
+static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     uint32_t sid)
+{
+	if (unlikely((!sw_context->sid_valid || sid != sw_context->last_sid) &&
+		     sid != SVGA3D_INVALID_ID)) {
+		int ret = vmw_surface_check(dev_priv, sw_context->tfile, sid);
+
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Could ot find or use surface %u\n",
+				  (unsigned) sid);
+			return ret;
+		}
+
+		sw_context->last_sid = sid;
+		sw_context->sid_valid = true;
+	}
+	return 0;
+}
+
+
+static int vmw_cmd_set_render_target_check(struct vmw_private *dev_priv,
+					   struct vmw_sw_context *sw_context,
+					   SVGA3dCmdHeader *header)
+{
+	struct vmw_sid_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSetRenderTarget body;
+	} *cmd;
+	int ret;
+
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	cmd = container_of(header, struct vmw_sid_cmd, header);
+	return vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.target.sid);
+}
+
+static int vmw_cmd_surface_copy_check(struct vmw_private *dev_priv,
+				      struct vmw_sw_context *sw_context,
+				      SVGA3dCmdHeader *header)
+{
+	struct vmw_sid_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceCopy body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_sid_cmd, header);
+	ret = vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.src.sid);
+	if (unlikely(ret != 0))
+		return ret;
+	return vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.dest.sid);
+}
+
+static int vmw_cmd_stretch_blt_check(struct vmw_private *dev_priv,
+				     struct vmw_sw_context *sw_context,
+				     SVGA3dCmdHeader *header)
+{
+	struct vmw_sid_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceStretchBlt body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_sid_cmd, header);
+	ret = vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.src.sid);
+	if (unlikely(ret != 0))
+		return ret;
+	return vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.dest.sid);
+}
+
+static int vmw_cmd_blt_surf_screen_check(struct vmw_private *dev_priv,
+					 struct vmw_sw_context *sw_context,
+					 SVGA3dCmdHeader *header)
+{
+	struct vmw_sid_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdBlitSurfaceToScreen body;
+	} *cmd;
+
+	cmd = container_of(header, struct vmw_sid_cmd, header);
+	return vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.srcImage.sid);
+}
+
+static int vmw_cmd_present_check(struct vmw_private *dev_priv,
+				 struct vmw_sw_context *sw_context,
+				 SVGA3dCmdHeader *header)
+{
+	struct vmw_sid_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdPresent body;
+	} *cmd;
+
+	cmd = container_of(header, struct vmw_sid_cmd, header);
+	return vmw_cmd_sid_check(dev_priv, sw_context, cmd->body.sid);
+}
+
+static int vmw_cmd_dma(struct vmw_private *dev_priv,
+		       struct vmw_sw_context *sw_context,
+		       SVGA3dCmdHeader *header)
+{
+	uint32_t handle;
+	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct ttm_buffer_object *bo;
+	struct vmw_surface *srf = NULL;
+	struct vmw_dma_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceDMA dma;
+	} *cmd;
+	struct vmw_relocation *reloc;
+	int ret;
+	uint32_t cur_validate_node;
+	struct ttm_validate_buffer *val_buf;
+
+
+	cmd = container_of(header, struct vmw_dma_cmd, header);
+	ret = vmw_cmd_sid_check(dev_priv, sw_context, cmd->dma.host.sid);
+	if (unlikely(ret != 0))
+		return ret;
+
+	handle = cmd->dma.guest.ptr.gmrId;
+	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not find or use GMR region.\n");
+		return -EINVAL;
+	}
+	bo = &vmw_bo->base;
+
+	if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
+		DRM_ERROR("Max number of DMA commands per submission"
+			  " exceeded\n");
+		ret = -EINVAL;
+		goto out_no_reloc;
+	}
+
+	reloc = &sw_context->relocs[sw_context->cur_reloc++];
+	reloc->location = &cmd->dma.guest.ptr;
+
+	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
+	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
+		DRM_ERROR("Max number of DMA buffers per submission"
+			  " exceeded.\n");
+		ret = -EINVAL;
+		goto out_no_reloc;
+	}
+
+	reloc->index = cur_validate_node;
+	if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
+		val_buf = &sw_context->val_bufs[cur_validate_node];
+		val_buf->bo = ttm_bo_reference(bo);
+		val_buf->new_sync_obj_arg = (void *) dev_priv;
+		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
+		++sw_context->cur_val_buf;
+	}
+
+	ret = vmw_user_surface_lookup(dev_priv, sw_context->tfile,
+				      cmd->dma.host.sid, &srf);
+	if (ret) {
+		DRM_ERROR("could not find surface\n");
+		goto out_no_reloc;
+	}
+
+	vmw_kms_cursor_snoop(srf, sw_context->tfile, bo, header);
+	vmw_surface_unreference(&srf);
+
+out_no_reloc:
+	vmw_dmabuf_unreference(&vmw_bo);
+	return ret;
+}
+
+
+typedef int (*vmw_cmd_func) (struct vmw_private *,
+			     struct vmw_sw_context *,
+			     SVGA3dCmdHeader *);
+
+#define VMW_CMD_DEF(cmd, func) \
+	[cmd - SVGA_3D_CMD_BASE] = func
+
+static vmw_cmd_func vmw_cmd_funcs[SVGA_3D_CMD_MAX] = {
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DEFINE, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DESTROY, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_COPY, &vmw_cmd_surface_copy_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_STRETCHBLT, &vmw_cmd_stretch_blt_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DMA, &vmw_cmd_dma),
+	VMW_CMD_DEF(SVGA_3D_CMD_CONTEXT_DEFINE, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_CONTEXT_DESTROY, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETTRANSFORM, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETZRANGE, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETRENDERSTATE, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETRENDERTARGET,
+		    &vmw_cmd_set_render_target_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETTEXTURESTATE, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETMATERIAL, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETLIGHTDATA, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETLIGHTENABLED, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETVIEWPORT, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETCLIPPLANE, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_CLEAR, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok),
+	VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN,
+		    &vmw_cmd_blt_surf_screen_check)
+};
+
+static int vmw_cmd_check(struct vmw_private *dev_priv,
+			 struct vmw_sw_context *sw_context,
+			 void *buf, uint32_t *size)
+{
+	uint32_t cmd_id;
+	SVGA3dCmdHeader *header = (SVGA3dCmdHeader *) buf;
+	int ret;
+
+	cmd_id = ((uint32_t *)buf)[0];
+	if (cmd_id == SVGA_CMD_UPDATE) {
+		*size = 5 << 2;
+		return 0;
+	}
+
+	cmd_id = le32_to_cpu(header->id);
+	*size = le32_to_cpu(header->size) + sizeof(SVGA3dCmdHeader);
+
+	cmd_id -= SVGA_3D_CMD_BASE;
+	if (unlikely(cmd_id >= SVGA_3D_CMD_MAX - SVGA_3D_CMD_BASE))
+		goto out_err;
+
+	ret = vmw_cmd_funcs[cmd_id](dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		goto out_err;
+
+	return 0;
+out_err:
+	DRM_ERROR("Illegal / Invalid SVGA3D command: %d\n",
+		  cmd_id + SVGA_3D_CMD_BASE);
+	return -EINVAL;
+}
+
+static int vmw_cmd_check_all(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     void *buf, uint32_t size)
+{
+	int32_t cur_size = size;
+	int ret;
+
+	while (cur_size > 0) {
+		ret = vmw_cmd_check(dev_priv, sw_context, buf, &size);
+		if (unlikely(ret != 0))
+			return ret;
+		buf = (void *)((unsigned long) buf + size);
+		cur_size -= size;
+	}
+
+	if (unlikely(cur_size != 0)) {
+		DRM_ERROR("Command verifier out of sync.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void vmw_free_relocations(struct vmw_sw_context *sw_context)
+{
+	sw_context->cur_reloc = 0;
+}
+
+static void vmw_apply_relocations(struct vmw_sw_context *sw_context)
+{
+	uint32_t i;
+	struct vmw_relocation *reloc;
+	struct ttm_validate_buffer *validate;
+	struct ttm_buffer_object *bo;
+
+	for (i = 0; i < sw_context->cur_reloc; ++i) {
+		reloc = &sw_context->relocs[i];
+		validate = &sw_context->val_bufs[reloc->index];
+		bo = validate->bo;
+		reloc->location->offset += bo->offset;
+		reloc->location->gmrId = vmw_dmabuf_gmr(bo);
+	}
+	vmw_free_relocations(sw_context);
+}
+
+static void vmw_clear_validations(struct vmw_sw_context *sw_context)
+{
+	struct ttm_validate_buffer *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &sw_context->validate_nodes,
+				 head) {
+		list_del(&entry->head);
+		vmw_dmabuf_validate_clear(entry->bo);
+		ttm_bo_unref(&entry->bo);
+		sw_context->cur_val_buf--;
+	}
+	BUG_ON(sw_context->cur_val_buf != 0);
+}
+
+static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
+				      struct ttm_buffer_object *bo)
+{
+	int ret;
+
+	if (vmw_dmabuf_gmr(bo) != SVGA_GMR_NULL)
+		return 0;
+
+	ret = vmw_gmr_bind(dev_priv, bo);
+	if (likely(ret == 0 || ret == -ERESTART))
+		return ret;
+
+
+	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false);
+	return ret;
+}
+
+
+static int vmw_validate_buffers(struct vmw_private *dev_priv,
+				struct vmw_sw_context *sw_context)
+{
+	struct ttm_validate_buffer *entry;
+	int ret;
+
+	list_for_each_entry(entry, &sw_context->validate_nodes, head) {
+		ret = vmw_validate_single_buffer(dev_priv, entry->bo);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+	return 0;
+}
+
+int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
+	struct drm_vmw_fence_rep fence_rep;
+	struct drm_vmw_fence_rep __user *user_fence_rep;
+	int ret;
+	void *user_cmd;
+	void *cmd;
+	uint32_t sequence;
+	struct vmw_sw_context *sw_context = &dev_priv->ctx;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTART;
+		goto out_no_cmd_mutex;
+	}
+
+	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving fifo space for commands.\n");
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	user_cmd = (void __user *)(unsigned long)arg->commands;
+	ret = copy_from_user(cmd, user_cmd, arg->command_size);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed copying commands.\n");
+		goto out_commit;
+	}
+
+	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
+	sw_context->cid_valid = false;
+	sw_context->sid_valid = false;
+	sw_context->cur_reloc = 0;
+	sw_context->cur_val_buf = 0;
+
+	INIT_LIST_HEAD(&sw_context->validate_nodes);
+
+	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
+	if (unlikely(ret != 0))
+		goto out_err;
+	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes,
+				     dev_priv->val_seq++);
+	if (unlikely(ret != 0))
+		goto out_err;
+
+	ret = vmw_validate_buffers(dev_priv, sw_context);
+	if (unlikely(ret != 0))
+		goto out_err;
+
+	vmw_apply_relocations(sw_context);
+	vmw_fifo_commit(dev_priv, arg->command_size);
+
+	ret = vmw_fifo_send_fence(dev_priv, &sequence);
+
+	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
+				    (void *)(unsigned long) sequence);
+	vmw_clear_validations(sw_context);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	/*
+	 * This error is harmless, because if fence submission fails,
+	 * vmw_fifo_send_fence will sync.
+	 */
+
+	if (ret != 0)
+		DRM_ERROR("Fence submission error. Syncing.\n");
+
+	fence_rep.error = ret;
+	fence_rep.fence_seq = (uint64_t) sequence;
+
+	user_fence_rep = (struct drm_vmw_fence_rep __user *)
+	    (unsigned long)arg->fence_rep;
+
+	/*
+	 * copy_to_user errors will be detected by user space not
+	 * seeing fence_rep::error filled in.
+	 */
+
+	ret = copy_to_user(user_fence_rep, &fence_rep, sizeof(fence_rep));
+
+	vmw_kms_cursor_post_execbuf(dev_priv);
+	ttm_read_unlock(&vmaster->lock);
+	return 0;
+out_err:
+	vmw_free_relocations(sw_context);
+	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
+	vmw_clear_validations(sw_context);
+out_commit:
+	vmw_fifo_commit(dev_priv, 0);
+out_unlock:
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+out_no_cmd_mutex:
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
new file mode 100644
index 000000000000..641dde76ada1
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -0,0 +1,742 @@
+/**************************************************************************
+ *
+ * Copyright © 2007 David Airlie
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+#include "ttm/ttm_placement.h"
+
+#define VMW_DIRTY_DELAY (HZ / 30)
+
+struct vmw_fb_par {
+	struct vmw_private *vmw_priv;
+
+	void *vmalloc;
+
+	struct vmw_dma_buffer *vmw_bo;
+	struct ttm_bo_kmap_obj map;
+
+	u32 pseudo_palette[17];
+
+	unsigned depth;
+	unsigned bpp;
+
+	unsigned max_width;
+	unsigned max_height;
+
+	void *bo_ptr;
+	unsigned bo_size;
+	bool bo_iowrite;
+
+	struct {
+		spinlock_t lock;
+		bool active;
+		unsigned x1;
+		unsigned y1;
+		unsigned x2;
+		unsigned y2;
+	} dirty;
+};
+
+static int vmw_fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			    unsigned blue, unsigned transp,
+			    struct fb_info *info)
+{
+	struct vmw_fb_par *par = info->par;
+	u32 *pal = par->pseudo_palette;
+
+	if (regno > 15) {
+		DRM_ERROR("Bad regno %u.\n", regno);
+		return 1;
+	}
+
+	switch (par->depth) {
+	case 24:
+	case 32:
+		pal[regno] = ((red & 0xff00) << 8) |
+			      (green & 0xff00) |
+			     ((blue  & 0xff00) >> 8);
+		break;
+	default:
+		DRM_ERROR("Bad depth %u, bpp %u.\n", par->depth, par->bpp);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int vmw_fb_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info)
+{
+	int depth = var->bits_per_pixel;
+	struct vmw_fb_par *par = info->par;
+	struct vmw_private *vmw_priv = par->vmw_priv;
+
+	switch (var->bits_per_pixel) {
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		DRM_ERROR("Bad bpp %u.\n", var->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	switch (depth) {
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		DRM_ERROR("Bad depth %u.\n", depth);
+		return -EINVAL;
+	}
+
+	/* without multimon its hard to resize */
+	if (!(vmw_priv->capabilities & SVGA_CAP_MULTIMON) &&
+	    (var->xres != par->max_width ||
+	     var->yres != par->max_height)) {
+		DRM_ERROR("Tried to resize, but we don't have multimon\n");
+		return -EINVAL;
+	}
+
+	if (var->xres > par->max_width ||
+	    var->yres > par->max_height) {
+		DRM_ERROR("Requested geom can not fit in framebuffer\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vmw_fb_set_par(struct fb_info *info)
+{
+	struct vmw_fb_par *par = info->par;
+	struct vmw_private *vmw_priv = par->vmw_priv;
+
+	if (vmw_priv->capabilities & SVGA_CAP_MULTIMON) {
+		vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_IS_PRIMARY, true);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_X, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_Y, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_WIDTH, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_HEIGHT, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+
+		vmw_write(vmw_priv, SVGA_REG_ENABLE, 1);
+		vmw_write(vmw_priv, SVGA_REG_WIDTH, par->max_width);
+		vmw_write(vmw_priv, SVGA_REG_HEIGHT, par->max_height);
+		vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, par->bpp);
+		vmw_write(vmw_priv, SVGA_REG_DEPTH, par->depth);
+		vmw_write(vmw_priv, SVGA_REG_RED_MASK, 0x00ff0000);
+		vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, 0x0000ff00);
+		vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, 0x000000ff);
+
+		/* TODO check if pitch and offset changes */
+
+		vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_IS_PRIMARY, true);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_X, info->var.xoffset);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_Y, info->var.yoffset);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_WIDTH, info->var.xres);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_HEIGHT, info->var.yres);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+	} else {
+		vmw_write(vmw_priv, SVGA_REG_WIDTH, info->var.xres);
+		vmw_write(vmw_priv, SVGA_REG_HEIGHT, info->var.yres);
+
+		/* TODO check if pitch and offset changes */
+	}
+
+	return 0;
+}
+
+static int vmw_fb_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	return 0;
+}
+
+static int vmw_fb_blank(int blank, struct fb_info *info)
+{
+	return 0;
+}
+
+/*
+ * Dirty code
+ */
+
+static void vmw_fb_dirty_flush(struct vmw_fb_par *par)
+{
+	struct vmw_private *vmw_priv = par->vmw_priv;
+	struct fb_info *info = vmw_priv->fb_info;
+	int stride = (info->fix.line_length / 4);
+	int *src = (int *)info->screen_base;
+	__le32 __iomem *vram_mem = par->bo_ptr;
+	unsigned long flags;
+	unsigned x, y, w, h;
+	int i, k;
+	struct {
+		uint32_t header;
+		SVGAFifoCmdUpdate body;
+	} *cmd;
+
+	spin_lock_irqsave(&par->dirty.lock, flags);
+	if (!par->dirty.active) {
+		spin_unlock_irqrestore(&par->dirty.lock, flags);
+		return;
+	}
+	x = par->dirty.x1;
+	y = par->dirty.y1;
+	w = min(par->dirty.x2, info->var.xres) - x;
+	h = min(par->dirty.y2, info->var.yres) - y;
+	par->dirty.x1 = par->dirty.x2 = 0;
+	par->dirty.y1 = par->dirty.y2 = 0;
+	spin_unlock_irqrestore(&par->dirty.lock, flags);
+
+	for (i = y * stride; i < info->fix.smem_len / 4; i += stride) {
+		for (k = i+x; k < i+x+w && k < info->fix.smem_len / 4; k++)
+			iowrite32(src[k], vram_mem + k);
+	}
+
+#if 0
+	DRM_INFO("%s, (%u, %u) (%ux%u)\n", __func__, x, y, w, h);
+#endif
+
+	cmd = vmw_fifo_reserve(vmw_priv, sizeof(*cmd));
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return;
+	}
+
+	cmd->header = cpu_to_le32(SVGA_CMD_UPDATE);
+	cmd->body.x = cpu_to_le32(x);
+	cmd->body.y = cpu_to_le32(y);
+	cmd->body.width = cpu_to_le32(w);
+	cmd->body.height = cpu_to_le32(h);
+	vmw_fifo_commit(vmw_priv, sizeof(*cmd));
+}
+
+static void vmw_fb_dirty_mark(struct vmw_fb_par *par,
+			      unsigned x1, unsigned y1,
+			      unsigned width, unsigned height)
+{
+	struct fb_info *info = par->vmw_priv->fb_info;
+	unsigned long flags;
+	unsigned x2 = x1 + width;
+	unsigned y2 = y1 + height;
+
+	spin_lock_irqsave(&par->dirty.lock, flags);
+	if (par->dirty.x1 == par->dirty.x2) {
+		par->dirty.x1 = x1;
+		par->dirty.y1 = y1;
+		par->dirty.x2 = x2;
+		par->dirty.y2 = y2;
+		/* if we are active start the dirty work
+		 * we share the work with the defio system */
+		if (par->dirty.active)
+			schedule_delayed_work(&info->deferred_work, VMW_DIRTY_DELAY);
+	} else {
+		if (x1 < par->dirty.x1)
+			par->dirty.x1 = x1;
+		if (y1 < par->dirty.y1)
+			par->dirty.y1 = y1;
+		if (x2 > par->dirty.x2)
+			par->dirty.x2 = x2;
+		if (y2 > par->dirty.y2)
+			par->dirty.y2 = y2;
+	}
+	spin_unlock_irqrestore(&par->dirty.lock, flags);
+}
+
+static void vmw_deferred_io(struct fb_info *info,
+			    struct list_head *pagelist)
+{
+	struct vmw_fb_par *par = info->par;
+	unsigned long start, end, min, max;
+	unsigned long flags;
+	struct page *page;
+	int y1, y2;
+
+	min = ULONG_MAX;
+	max = 0;
+	list_for_each_entry(page, pagelist, lru) {
+		start = page->index << PAGE_SHIFT;
+		end = start + PAGE_SIZE - 1;
+		min = min(min, start);
+		max = max(max, end);
+	}
+
+	if (min < max) {
+		y1 = min / info->fix.line_length;
+		y2 = (max / info->fix.line_length) + 1;
+
+		spin_lock_irqsave(&par->dirty.lock, flags);
+		par->dirty.x1 = 0;
+		par->dirty.y1 = y1;
+		par->dirty.x2 = info->var.xres;
+		par->dirty.y2 = y2;
+		spin_unlock_irqrestore(&par->dirty.lock, flags);
+	}
+
+	vmw_fb_dirty_flush(par);
+};
+
+struct fb_deferred_io vmw_defio = {
+	.delay		= VMW_DIRTY_DELAY,
+	.deferred_io	= vmw_deferred_io,
+};
+
+/*
+ * Draw code
+ */
+
+static void vmw_fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	cfb_fillrect(info, rect);
+	vmw_fb_dirty_mark(info->par, rect->dx, rect->dy,
+			  rect->width, rect->height);
+}
+
+static void vmw_fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
+{
+	cfb_copyarea(info, region);
+	vmw_fb_dirty_mark(info->par, region->dx, region->dy,
+			  region->width, region->height);
+}
+
+static void vmw_fb_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+	cfb_imageblit(info, image);
+	vmw_fb_dirty_mark(info->par, image->dx, image->dy,
+			  image->width, image->height);
+}
+
+/*
+ * Bring up code
+ */
+
+static struct fb_ops vmw_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = vmw_fb_check_var,
+	.fb_set_par = vmw_fb_set_par,
+	.fb_setcolreg = vmw_fb_setcolreg,
+	.fb_fillrect = vmw_fb_fillrect,
+	.fb_copyarea = vmw_fb_copyarea,
+	.fb_imageblit = vmw_fb_imageblit,
+	.fb_pan_display = vmw_fb_pan_display,
+	.fb_blank = vmw_fb_blank,
+};
+
+static int vmw_fb_create_bo(struct vmw_private *vmw_priv,
+			    size_t size, struct vmw_dma_buffer **out)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	struct ttm_placement ne_placement = vmw_vram_ne_placement;
+	int ret;
+
+	ne_placement.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/* interuptable? */
+	ret = ttm_write_lock(&vmw_priv->fbdev_master.lock, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_bo = kmalloc(sizeof(*vmw_bo), GFP_KERNEL);
+	if (!vmw_bo)
+		goto err_unlock;
+
+	ret = vmw_dmabuf_init(vmw_priv, vmw_bo, size,
+			      &ne_placement,
+			      false,
+			      &vmw_dmabuf_bo_free);
+	if (unlikely(ret != 0))
+		goto err_unlock; /* init frees the buffer on failure */
+
+	*out = vmw_bo;
+
+	ttm_write_unlock(&vmw_priv->fbdev_master.lock);
+
+	return 0;
+
+err_unlock:
+	ttm_write_unlock(&vmw_priv->fbdev_master.lock);
+	return ret;
+}
+
+int vmw_fb_init(struct vmw_private *vmw_priv)
+{
+	struct device *device = &vmw_priv->dev->pdev->dev;
+	struct vmw_fb_par *par;
+	struct fb_info *info;
+	unsigned initial_width, initial_height;
+	unsigned fb_width, fb_height;
+	unsigned fb_bbp, fb_depth, fb_offset, fb_pitch, fb_size;
+	int ret;
+
+	initial_width = 800;
+	initial_height = 600;
+
+	fb_bbp = 32;
+	fb_depth = 24;
+
+	if (vmw_priv->capabilities & SVGA_CAP_MULTIMON) {
+		fb_width = min(vmw_priv->fb_max_width, (unsigned)2048);
+		fb_height = min(vmw_priv->fb_max_height, (unsigned)2048);
+	} else {
+		fb_width = min(vmw_priv->fb_max_width, initial_width);
+		fb_height = min(vmw_priv->fb_max_height, initial_height);
+	}
+
+	initial_width = min(fb_width, initial_width);
+	initial_height = min(fb_height, initial_height);
+
+	vmw_write(vmw_priv, SVGA_REG_WIDTH, fb_width);
+	vmw_write(vmw_priv, SVGA_REG_HEIGHT, fb_height);
+	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, fb_bbp);
+	vmw_write(vmw_priv, SVGA_REG_DEPTH, fb_depth);
+	vmw_write(vmw_priv, SVGA_REG_RED_MASK, 0x00ff0000);
+	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, 0x0000ff00);
+	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, 0x000000ff);
+
+	fb_size = vmw_read(vmw_priv, SVGA_REG_FB_SIZE);
+	fb_offset = vmw_read(vmw_priv, SVGA_REG_FB_OFFSET);
+	fb_pitch = vmw_read(vmw_priv, SVGA_REG_BYTES_PER_LINE);
+
+	DRM_DEBUG("width  %u\n", vmw_read(vmw_priv, SVGA_REG_MAX_WIDTH));
+	DRM_DEBUG("height %u\n", vmw_read(vmw_priv, SVGA_REG_MAX_HEIGHT));
+	DRM_DEBUG("width  %u\n", vmw_read(vmw_priv, SVGA_REG_WIDTH));
+	DRM_DEBUG("height %u\n", vmw_read(vmw_priv, SVGA_REG_HEIGHT));
+	DRM_DEBUG("bpp    %u\n", vmw_read(vmw_priv, SVGA_REG_BITS_PER_PIXEL));
+	DRM_DEBUG("depth  %u\n", vmw_read(vmw_priv, SVGA_REG_DEPTH));
+	DRM_DEBUG("bpl    %u\n", vmw_read(vmw_priv, SVGA_REG_BYTES_PER_LINE));
+	DRM_DEBUG("r mask %08x\n", vmw_read(vmw_priv, SVGA_REG_RED_MASK));
+	DRM_DEBUG("g mask %08x\n", vmw_read(vmw_priv, SVGA_REG_GREEN_MASK));
+	DRM_DEBUG("b mask %08x\n", vmw_read(vmw_priv, SVGA_REG_BLUE_MASK));
+	DRM_DEBUG("fb_offset 0x%08x\n", fb_offset);
+	DRM_DEBUG("fb_pitch  %u\n", fb_pitch);
+	DRM_DEBUG("fb_size   %u kiB\n", fb_size / 1024);
+
+	info = framebuffer_alloc(sizeof(*par), device);
+	if (!info)
+		return -ENOMEM;
+
+	/*
+	 * Par
+	 */
+	vmw_priv->fb_info = info;
+	par = info->par;
+	par->vmw_priv = vmw_priv;
+	par->depth = fb_depth;
+	par->bpp = fb_bbp;
+	par->vmalloc = NULL;
+	par->max_width = fb_width;
+	par->max_height = fb_height;
+
+	/*
+	 * Create buffers and alloc memory
+	 */
+	par->vmalloc = vmalloc(fb_size);
+	if (unlikely(par->vmalloc == NULL)) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	ret = vmw_fb_create_bo(vmw_priv, fb_size, &par->vmw_bo);
+	if (unlikely(ret != 0))
+		goto err_free;
+
+	ret = ttm_bo_kmap(&par->vmw_bo->base,
+			  0,
+			  par->vmw_bo->base.num_pages,
+			  &par->map);
+	if (unlikely(ret != 0))
+		goto err_unref;
+	par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+	par->bo_size = fb_size;
+
+	/*
+	 * Fixed and var
+	 */
+	strcpy(info->fix.id, "svgadrmfb");
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_NONE;
+	info->fix.line_length = fb_pitch;
+
+	info->fix.smem_start = 0;
+	info->fix.smem_len = fb_size;
+
+	info->fix.mmio_start = 0;
+	info->fix.mmio_len = 0;
+
+	info->pseudo_palette = par->pseudo_palette;
+	info->screen_base = par->vmalloc;
+	info->screen_size = fb_size;
+
+	info->flags = FBINFO_DEFAULT;
+	info->fbops = &vmw_fb_ops;
+
+	/* 24 depth per default */
+	info->var.red.offset = 16;
+	info->var.green.offset = 8;
+	info->var.blue.offset = 0;
+	info->var.red.length = 8;
+	info->var.green.length = 8;
+	info->var.blue.length = 8;
+	info->var.transp.offset = 0;
+	info->var.transp.length = 0;
+
+	info->var.xres_virtual = fb_width;
+	info->var.yres_virtual = fb_height;
+	info->var.bits_per_pixel = par->bpp;
+	info->var.xoffset = 0;
+	info->var.yoffset = 0;
+	info->var.activate = FB_ACTIVATE_NOW;
+	info->var.height = -1;
+	info->var.width = -1;
+
+	info->var.xres = initial_width;
+	info->var.yres = initial_height;
+
+#if 0
+	info->pixmap.size = 64*1024;
+	info->pixmap.buf_align = 8;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+	info->pixmap.scan_align = 1;
+#else
+	info->pixmap.size = 0;
+	info->pixmap.buf_align = 8;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+	info->pixmap.scan_align = 1;
+#endif
+
+	/*
+	 * Dirty & Deferred IO
+	 */
+	par->dirty.x1 = par->dirty.x2 = 0;
+	par->dirty.y1 = par->dirty.y1 = 0;
+	par->dirty.active = true;
+	spin_lock_init(&par->dirty.lock);
+	info->fbdefio = &vmw_defio;
+	fb_deferred_io_init(info);
+
+	ret = register_framebuffer(info);
+	if (unlikely(ret != 0))
+		goto err_defio;
+
+	return 0;
+
+err_defio:
+	fb_deferred_io_cleanup(info);
+	ttm_bo_kunmap(&par->map);
+err_unref:
+	ttm_bo_unref((struct ttm_buffer_object **)&par->vmw_bo);
+err_free:
+	vfree(par->vmalloc);
+	framebuffer_release(info);
+	vmw_priv->fb_info = NULL;
+
+	return ret;
+}
+
+int vmw_fb_close(struct vmw_private *vmw_priv)
+{
+	struct fb_info *info;
+	struct vmw_fb_par *par;
+	struct ttm_buffer_object *bo;
+
+	if (!vmw_priv->fb_info)
+		return 0;
+
+	info = vmw_priv->fb_info;
+	par = info->par;
+	bo = &par->vmw_bo->base;
+	par->vmw_bo = NULL;
+
+	/* ??? order */
+	fb_deferred_io_cleanup(info);
+	unregister_framebuffer(info);
+
+	ttm_bo_kunmap(&par->map);
+	ttm_bo_unref(&bo);
+
+	vfree(par->vmalloc);
+	framebuffer_release(info);
+
+	return 0;
+}
+
+int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
+			 struct vmw_dma_buffer *vmw_bo)
+{
+	struct ttm_buffer_object *bo = &vmw_bo->base;
+	int ret = 0;
+
+	ret = ttm_bo_reserve(bo, false, false, false, 0);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_validate(bo, &vmw_sys_placement, false, false);
+	ttm_bo_unreserve(bo);
+
+	return ret;
+}
+
+int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				struct vmw_dma_buffer *vmw_bo)
+{
+	struct ttm_buffer_object *bo = &vmw_bo->base;
+	struct ttm_placement ne_placement = vmw_vram_ne_placement;
+	int ret = 0;
+
+	ne_placement.lpfn = bo->num_pages;
+
+	/* interuptable? */
+	ret = ttm_write_lock(&vmw_priv->active_master->lock, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, false, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	if (vmw_bo->gmr_bound) {
+		vmw_gmr_unbind(vmw_priv, vmw_bo->gmr_id);
+		spin_lock(&bo->glob->lru_lock);
+		ida_remove(&vmw_priv->gmr_ida, vmw_bo->gmr_id);
+		spin_unlock(&bo->glob->lru_lock);
+		vmw_bo->gmr_bound = NULL;
+	}
+
+	ret = ttm_bo_validate(bo, &ne_placement, false, false);
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&vmw_priv->active_master->lock);
+
+	return ret;
+}
+
+int vmw_fb_off(struct vmw_private *vmw_priv)
+{
+	struct fb_info *info;
+	struct vmw_fb_par *par;
+	unsigned long flags;
+
+	if (!vmw_priv->fb_info)
+		return -EINVAL;
+
+	info = vmw_priv->fb_info;
+	par = info->par;
+
+	spin_lock_irqsave(&par->dirty.lock, flags);
+	par->dirty.active = false;
+	spin_unlock_irqrestore(&par->dirty.lock, flags);
+
+	flush_scheduled_work();
+
+	par->bo_ptr = NULL;
+	ttm_bo_kunmap(&par->map);
+
+	vmw_dmabuf_from_vram(vmw_priv, par->vmw_bo);
+
+	return 0;
+}
+
+int vmw_fb_on(struct vmw_private *vmw_priv)
+{
+	struct fb_info *info;
+	struct vmw_fb_par *par;
+	unsigned long flags;
+	bool dummy;
+	int ret;
+
+	if (!vmw_priv->fb_info)
+		return -EINVAL;
+
+	info = vmw_priv->fb_info;
+	par = info->par;
+
+	/* we are already active */
+	if (par->bo_ptr != NULL)
+		return 0;
+
+	/* Make sure that all overlays are stoped when we take over */
+	vmw_overlay_stop_all(vmw_priv);
+
+	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("could not move buffer to start of VRAM\n");
+		goto err_no_buffer;
+	}
+
+	ret = ttm_bo_kmap(&par->vmw_bo->base,
+			  0,
+			  par->vmw_bo->base.num_pages,
+			  &par->map);
+	BUG_ON(ret != 0);
+	par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &dummy);
+
+	spin_lock_irqsave(&par->dirty.lock, flags);
+	par->dirty.active = true;
+	spin_unlock_irqrestore(&par->dirty.lock, flags);
+
+err_no_buffer:
+	vmw_fb_set_par(info);
+
+	vmw_fb_dirty_mark(par, 0, 0, info->var.xres, info->var.yres);
+
+	/* If there already was stuff dirty we wont
+	 * schedule a new work, so lets do it now */
+	schedule_delayed_work(&info->deferred_work, 0);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
new file mode 100644
index 000000000000..76b0693e2458
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -0,0 +1,521 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "drmP.h"
+#include "ttm/ttm_placement.h"
+
+int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t max;
+	uint32_t min;
+	uint32_t dummy;
+	int ret;
+
+	fifo->static_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
+	fifo->static_buffer = vmalloc(fifo->static_buffer_size);
+	if (unlikely(fifo->static_buffer == NULL))
+		return -ENOMEM;
+
+	fifo->last_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
+	fifo->last_data_size = 0;
+	fifo->last_buffer_add = false;
+	fifo->last_buffer = vmalloc(fifo->last_buffer_size);
+	if (unlikely(fifo->last_buffer == NULL)) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	fifo->dynamic_buffer = NULL;
+	fifo->reserved_size = 0;
+	fifo->using_bounce_buffer = false;
+
+	init_rwsem(&fifo->rwsem);
+
+	/*
+	 * Allow mapping the first page read-only to user-space.
+	 */
+
+	DRM_INFO("width %d\n", vmw_read(dev_priv, SVGA_REG_WIDTH));
+	DRM_INFO("height %d\n", vmw_read(dev_priv, SVGA_REG_HEIGHT));
+	DRM_INFO("bpp %d\n", vmw_read(dev_priv, SVGA_REG_BITS_PER_PIXEL));
+
+	mutex_lock(&dev_priv->hw_mutex);
+	dev_priv->enable_state = vmw_read(dev_priv, SVGA_REG_ENABLE);
+	dev_priv->config_done_state = vmw_read(dev_priv, SVGA_REG_CONFIG_DONE);
+	vmw_write(dev_priv, SVGA_REG_ENABLE, 1);
+
+	min = 4;
+	if (dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO)
+		min = vmw_read(dev_priv, SVGA_REG_MEM_REGS);
+	min <<= 2;
+
+	if (min < PAGE_SIZE)
+		min = PAGE_SIZE;
+
+	iowrite32(min, fifo_mem + SVGA_FIFO_MIN);
+	iowrite32(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX);
+	wmb();
+	iowrite32(min,  fifo_mem + SVGA_FIFO_NEXT_CMD);
+	iowrite32(min,  fifo_mem + SVGA_FIFO_STOP);
+	iowrite32(0, fifo_mem + SVGA_FIFO_BUSY);
+	mb();
+
+	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
+	min = ioread32(fifo_mem  + SVGA_FIFO_MIN);
+	fifo->capabilities = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES);
+
+	DRM_INFO("Fifo max 0x%08x min 0x%08x cap 0x%08x\n",
+		 (unsigned int) max,
+		 (unsigned int) min,
+		 (unsigned int) fifo->capabilities);
+
+	dev_priv->fence_seq = (uint32_t) -100;
+	dev_priv->last_read_sequence = (uint32_t) -100;
+	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
+
+	return vmw_fifo_send_fence(dev_priv, &dummy);
+out_err:
+	vfree(fifo->static_buffer);
+	fifo->static_buffer = NULL;
+	return ret;
+}
+
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+
+	mutex_lock(&dev_priv->hw_mutex);
+
+	if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
+		iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
+		vmw_write(dev_priv, SVGA_REG_SYNC, reason);
+	}
+
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+
+	mutex_lock(&dev_priv->hw_mutex);
+
+	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
+		vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
+
+	dev_priv->last_read_sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+
+	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE,
+		  dev_priv->config_done_state);
+	vmw_write(dev_priv, SVGA_REG_ENABLE,
+		  dev_priv->enable_state);
+
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	if (likely(fifo->last_buffer != NULL)) {
+		vfree(fifo->last_buffer);
+		fifo->last_buffer = NULL;
+	}
+
+	if (likely(fifo->static_buffer != NULL)) {
+		vfree(fifo->static_buffer);
+		fifo->static_buffer = NULL;
+	}
+
+	if (likely(fifo->dynamic_buffer != NULL)) {
+		vfree(fifo->dynamic_buffer);
+		fifo->dynamic_buffer = NULL;
+	}
+}
+
+static bool vmw_fifo_is_full(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
+	uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
+	uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+	uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+
+	return ((max - next_cmd) + (stop - min) <= bytes);
+}
+
+static int vmw_fifo_wait_noirq(struct vmw_private *dev_priv,
+			       uint32_t bytes, bool interruptible,
+			       unsigned long timeout)
+{
+	int ret = 0;
+	unsigned long end_jiffies = jiffies + timeout;
+	DEFINE_WAIT(__wait);
+
+	DRM_INFO("Fifo wait noirq.\n");
+
+	for (;;) {
+		prepare_to_wait(&dev_priv->fifo_queue, &__wait,
+				(interruptible) ?
+				TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		if (!vmw_fifo_is_full(dev_priv, bytes))
+			break;
+		if (time_after_eq(jiffies, end_jiffies)) {
+			ret = -EBUSY;
+			DRM_ERROR("SVGA device lockup.\n");
+			break;
+		}
+		schedule_timeout(1);
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTART;
+			break;
+		}
+	}
+	finish_wait(&dev_priv->fifo_queue, &__wait);
+	wake_up_all(&dev_priv->fifo_queue);
+	DRM_INFO("Fifo noirq exit.\n");
+	return ret;
+}
+
+static int vmw_fifo_wait(struct vmw_private *dev_priv,
+			 uint32_t bytes, bool interruptible,
+			 unsigned long timeout)
+{
+	long ret = 1L;
+	unsigned long irq_flags;
+
+	if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL);
+	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
+		return vmw_fifo_wait_noirq(dev_priv, bytes,
+					   interruptible, timeout);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	if (atomic_add_return(1, &dev_priv->fifo_queue_waiters) > 0) {
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_FIFO_PROGRESS,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		vmw_write(dev_priv, SVGA_REG_IRQMASK,
+			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
+			  SVGA_IRQFLAG_FIFO_PROGRESS);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+		    (dev_priv->fifo_queue,
+		     !vmw_fifo_is_full(dev_priv, bytes), timeout);
+	else
+		ret = wait_event_timeout
+		    (dev_priv->fifo_queue,
+		     !vmw_fifo_is_full(dev_priv, bytes), timeout);
+
+	if (unlikely(ret == -ERESTARTSYS))
+		ret = -ERESTART;
+	else if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		vmw_write(dev_priv, SVGA_REG_IRQMASK,
+			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
+			  ~SVGA_IRQFLAG_FIFO_PROGRESS);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ret;
+}
+
+void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t max;
+	uint32_t min;
+	uint32_t next_cmd;
+	uint32_t reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
+	int ret;
+
+	down_write(&fifo_state->rwsem);
+	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
+	min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+	next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
+
+	if (unlikely(bytes >= (max - min)))
+		goto out_err;
+
+	BUG_ON(fifo_state->reserved_size != 0);
+	BUG_ON(fifo_state->dynamic_buffer != NULL);
+
+	fifo_state->reserved_size = bytes;
+
+	while (1) {
+		uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+		bool need_bounce = false;
+		bool reserve_in_place = false;
+
+		if (next_cmd >= stop) {
+			if (likely((next_cmd + bytes < max ||
+				    (next_cmd + bytes == max && stop > min))))
+				reserve_in_place = true;
+
+			else if (vmw_fifo_is_full(dev_priv, bytes)) {
+				ret = vmw_fifo_wait(dev_priv, bytes,
+						    false, 3 * HZ);
+				if (unlikely(ret != 0))
+					goto out_err;
+			} else
+				need_bounce = true;
+
+		} else {
+
+			if (likely((next_cmd + bytes < stop)))
+				reserve_in_place = true;
+			else {
+				ret = vmw_fifo_wait(dev_priv, bytes,
+						    false, 3 * HZ);
+				if (unlikely(ret != 0))
+					goto out_err;
+			}
+		}
+
+		if (reserve_in_place) {
+			if (reserveable || bytes <= sizeof(uint32_t)) {
+				fifo_state->using_bounce_buffer = false;
+
+				if (reserveable)
+					iowrite32(bytes, fifo_mem +
+						  SVGA_FIFO_RESERVED);
+				return fifo_mem + (next_cmd >> 2);
+			} else {
+				need_bounce = true;
+			}
+		}
+
+		if (need_bounce) {
+			fifo_state->using_bounce_buffer = true;
+			if (bytes < fifo_state->static_buffer_size)
+				return fifo_state->static_buffer;
+			else {
+				fifo_state->dynamic_buffer = vmalloc(bytes);
+				return fifo_state->dynamic_buffer;
+			}
+		}
+	}
+out_err:
+	fifo_state->reserved_size = 0;
+	up_write(&fifo_state->rwsem);
+	return NULL;
+}
+
+static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
+			      __le32 __iomem *fifo_mem,
+			      uint32_t next_cmd,
+			      uint32_t max, uint32_t min, uint32_t bytes)
+{
+	uint32_t chunk_size = max - next_cmd;
+	uint32_t rest;
+	uint32_t *buffer = (fifo_state->dynamic_buffer != NULL) ?
+	    fifo_state->dynamic_buffer : fifo_state->static_buffer;
+
+	if (bytes < chunk_size)
+		chunk_size = bytes;
+
+	iowrite32(bytes, fifo_mem + SVGA_FIFO_RESERVED);
+	mb();
+	memcpy_toio(fifo_mem + (next_cmd >> 2), buffer, chunk_size);
+	rest = bytes - chunk_size;
+	if (rest)
+		memcpy_toio(fifo_mem + (min >> 2), buffer + (chunk_size >> 2),
+			    rest);
+}
+
+static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
+			       __le32 __iomem *fifo_mem,
+			       uint32_t next_cmd,
+			       uint32_t max, uint32_t min, uint32_t bytes)
+{
+	uint32_t *buffer = (fifo_state->dynamic_buffer != NULL) ?
+	    fifo_state->dynamic_buffer : fifo_state->static_buffer;
+
+	while (bytes > 0) {
+		iowrite32(*buffer++, fifo_mem + (next_cmd >> 2));
+		next_cmd += sizeof(uint32_t);
+		if (unlikely(next_cmd == max))
+			next_cmd = min;
+		mb();
+		iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+		mb();
+		bytes -= sizeof(uint32_t);
+	}
+}
+
+void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
+	uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
+	uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+	bool reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
+
+	BUG_ON((bytes & 3) != 0);
+	BUG_ON(bytes > fifo_state->reserved_size);
+
+	fifo_state->reserved_size = 0;
+
+	if (fifo_state->using_bounce_buffer) {
+		if (reserveable)
+			vmw_fifo_res_copy(fifo_state, fifo_mem,
+					  next_cmd, max, min, bytes);
+		else
+			vmw_fifo_slow_copy(fifo_state, fifo_mem,
+					   next_cmd, max, min, bytes);
+
+		if (fifo_state->dynamic_buffer) {
+			vfree(fifo_state->dynamic_buffer);
+			fifo_state->dynamic_buffer = NULL;
+		}
+
+	}
+
+	if (fifo_state->using_bounce_buffer || reserveable) {
+		next_cmd += bytes;
+		if (next_cmd >= max)
+			next_cmd -= max - min;
+		mb();
+		iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+	}
+
+	if (reserveable)
+		iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED);
+	mb();
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	up_write(&fifo_state->rwsem);
+}
+
+int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
+{
+	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
+	struct svga_fifo_cmd_fence *cmd_fence;
+	void *fm;
+	int ret = 0;
+	uint32_t bytes = sizeof(__le32) + sizeof(*cmd_fence);
+
+	fm = vmw_fifo_reserve(dev_priv, bytes);
+	if (unlikely(fm == NULL)) {
+		down_write(&fifo_state->rwsem);
+		*sequence = dev_priv->fence_seq;
+		up_write(&fifo_state->rwsem);
+		ret = -ENOMEM;
+		(void)vmw_fallback_wait(dev_priv, false, true, *sequence,
+					false, 3*HZ);
+		goto out_err;
+	}
+
+	do {
+		*sequence = dev_priv->fence_seq++;
+	} while (*sequence == 0);
+
+	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) {
+
+		/*
+		 * Don't request hardware to send a fence. The
+		 * waiting code in vmwgfx_irq.c will emulate this.
+		 */
+
+		vmw_fifo_commit(dev_priv, 0);
+		return 0;
+	}
+
+	*(__le32 *) fm = cpu_to_le32(SVGA_CMD_FENCE);
+	cmd_fence = (struct svga_fifo_cmd_fence *)
+	    ((unsigned long)fm + sizeof(__le32));
+
+	iowrite32(*sequence, &cmd_fence->fence);
+	fifo_state->last_buffer_add = true;
+	vmw_fifo_commit(dev_priv, bytes);
+	fifo_state->last_buffer_add = false;
+
+out_err:
+	return ret;
+}
+
+/**
+ * Map the first page of the FIFO read-only to user-space.
+ */
+
+static int vmw_fifo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	int ret;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+
+	if (address != vma->vm_start)
+		return VM_FAULT_SIGBUS;
+
+	ret = vm_insert_pfn(vma, address, vma->vm_pgoff);
+	if (likely(ret == -EBUSY || ret == 0))
+		return VM_FAULT_NOPAGE;
+	else if (ret == -ENOMEM)
+		return VM_FAULT_OOM;
+
+	return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct vmw_fifo_vm_ops = {
+	.fault = vmw_fifo_vm_fault,
+	.open = NULL,
+	.close = NULL
+};
+
+int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct vmw_private *dev_priv;
+
+	file_priv = (struct drm_file *)filp->private_data;
+	dev_priv = vmw_priv(file_priv->minor->dev);
+
+	if (vma->vm_pgoff != (dev_priv->mmio_start >> PAGE_SHIFT) ||
+	    (vma->vm_end - vma->vm_start) != PAGE_SIZE)
+		return -EINVAL;
+
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE);
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_SHARED;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	vma->vm_page_prot = ttm_io_prot(TTM_PL_FLAG_UNCACHED,
+					vma->vm_page_prot);
+	vma->vm_ops = &vmw_fifo_vm_ops;
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
new file mode 100644
index 000000000000..5f8908a5d7fd
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
@@ -0,0 +1,213 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "drmP.h"
+#include "ttm/ttm_bo_driver.h"
+
+/**
+ * FIXME: Adjust to the ttm lowmem / highmem storage to minimize
+ * the number of used descriptors.
+ */
+
+static int vmw_gmr_build_descriptors(struct list_head *desc_pages,
+				     struct page *pages[],
+				     unsigned long num_pages)
+{
+	struct page *page, *next;
+	struct svga_guest_mem_descriptor *page_virtual = NULL;
+	struct svga_guest_mem_descriptor *desc_virtual = NULL;
+	unsigned int desc_per_page;
+	unsigned long prev_pfn;
+	unsigned long pfn;
+	int ret;
+
+	desc_per_page = PAGE_SIZE /
+	    sizeof(struct svga_guest_mem_descriptor) - 1;
+
+	while (likely(num_pages != 0)) {
+		page = alloc_page(__GFP_HIGHMEM);
+		if (unlikely(page == NULL)) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		list_add_tail(&page->lru, desc_pages);
+
+		/*
+		 * Point previous page terminating descriptor to this
+		 * page before unmapping it.
+		 */
+
+		if (likely(page_virtual != NULL)) {
+			desc_virtual->ppn = page_to_pfn(page);
+			kunmap_atomic(page_virtual, KM_USER0);
+		}
+
+		page_virtual = kmap_atomic(page, KM_USER0);
+		desc_virtual = page_virtual - 1;
+		prev_pfn = ~(0UL);
+
+		while (likely(num_pages != 0)) {
+			pfn = page_to_pfn(*pages);
+
+			if (pfn != prev_pfn + 1) {
+
+				if (desc_virtual - page_virtual ==
+				    desc_per_page - 1)
+					break;
+
+				(++desc_virtual)->ppn = cpu_to_le32(pfn);
+				desc_virtual->num_pages = cpu_to_le32(1);
+			} else {
+				uint32_t tmp =
+				    le32_to_cpu(desc_virtual->num_pages);
+				desc_virtual->num_pages = cpu_to_le32(tmp + 1);
+			}
+			prev_pfn = pfn;
+			--num_pages;
+			++pages;
+		}
+
+		(++desc_virtual)->ppn = cpu_to_le32(0);
+		desc_virtual->num_pages = cpu_to_le32(0);
+	}
+
+	if (likely(page_virtual != NULL))
+		kunmap_atomic(page_virtual, KM_USER0);
+
+	return 0;
+out_err:
+	list_for_each_entry_safe(page, next, desc_pages, lru) {
+		list_del_init(&page->lru);
+		__free_page(page);
+	}
+	return ret;
+}
+
+static inline void vmw_gmr_free_descriptors(struct list_head *desc_pages)
+{
+	struct page *page, *next;
+
+	list_for_each_entry_safe(page, next, desc_pages, lru) {
+		list_del_init(&page->lru);
+		__free_page(page);
+	}
+}
+
+static void vmw_gmr_fire_descriptors(struct vmw_private *dev_priv,
+				     int gmr_id, struct list_head *desc_pages)
+{
+	struct page *page;
+
+	if (unlikely(list_empty(desc_pages)))
+		return;
+
+	page = list_entry(desc_pages->next, struct page, lru);
+
+	mutex_lock(&dev_priv->hw_mutex);
+
+	vmw_write(dev_priv, SVGA_REG_GMR_ID, gmr_id);
+	wmb();
+	vmw_write(dev_priv, SVGA_REG_GMR_DESCRIPTOR, page_to_pfn(page));
+	mb();
+
+	mutex_unlock(&dev_priv->hw_mutex);
+
+}
+
+/**
+ * FIXME: Adjust to the ttm lowmem / highmem storage to minimize
+ * the number of used descriptors.
+ */
+
+static unsigned long vmw_gmr_count_descriptors(struct page *pages[],
+					       unsigned long num_pages)
+{
+	unsigned long prev_pfn = ~(0UL);
+	unsigned long pfn;
+	unsigned long descriptors = 0;
+
+	while (num_pages--) {
+		pfn = page_to_pfn(*pages++);
+		if (prev_pfn + 1 != pfn)
+			++descriptors;
+		prev_pfn = pfn;
+	}
+
+	return descriptors;
+}
+
+int vmw_gmr_bind(struct vmw_private *dev_priv,
+		 struct ttm_buffer_object *bo)
+{
+	struct ttm_tt *ttm = bo->ttm;
+	unsigned long descriptors;
+	int ret;
+	uint32_t id;
+	struct list_head desc_pages;
+
+	if (!(dev_priv->capabilities & SVGA_CAP_GMR))
+		return -EINVAL;
+
+	ret = ttm_tt_populate(ttm);
+	if (unlikely(ret != 0))
+		return ret;
+
+	descriptors = vmw_gmr_count_descriptors(ttm->pages, ttm->num_pages);
+	if (unlikely(descriptors > dev_priv->max_gmr_descriptors))
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&desc_pages);
+	ret = vmw_gmr_build_descriptors(&desc_pages, ttm->pages,
+					ttm->num_pages);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_gmr_id_alloc(dev_priv, &id);
+	if (unlikely(ret != 0))
+		goto out_no_id;
+
+	vmw_gmr_fire_descriptors(dev_priv, id, &desc_pages);
+	vmw_gmr_free_descriptors(&desc_pages);
+	vmw_dmabuf_set_gmr(bo, id);
+	return 0;
+
+out_no_id:
+	vmw_gmr_free_descriptors(&desc_pages);
+	return ret;
+}
+
+void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	vmw_write(dev_priv, SVGA_REG_GMR_ID, gmr_id);
+	wmb();
+	vmw_write(dev_priv, SVGA_REG_GMR_DESCRIPTOR, 0);
+	mb();
+	mutex_unlock(&dev_priv->hw_mutex);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
new file mode 100644
index 000000000000..5fa6a4ed238a
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "vmwgfx_drm.h"
+
+int vmw_getparam_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_getparam_arg *param =
+	    (struct drm_vmw_getparam_arg *)data;
+
+	switch (param->param) {
+	case DRM_VMW_PARAM_NUM_STREAMS:
+		param->value = vmw_overlay_num_overlays(dev_priv);
+		break;
+	case DRM_VMW_PARAM_NUM_FREE_STREAMS:
+		param->value = vmw_overlay_num_free_overlays(dev_priv);
+		break;
+	case DRM_VMW_PARAM_3D:
+		param->value = dev_priv->capabilities & SVGA_CAP_3D ? 1 : 0;
+		break;
+	case DRM_VMW_PARAM_FIFO_OFFSET:
+		param->value = dev_priv->mmio_start;
+		break;
+	default:
+		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
+			  param->param);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
+	struct drm_vmw_fifo_debug_arg *arg =
+	    (struct drm_vmw_fifo_debug_arg *)data;
+	__le32 __user *buffer = (__le32 __user *)
+	    (unsigned long)arg->debug_buffer;
+
+	if (unlikely(fifo_state->last_buffer == NULL))
+		return -EINVAL;
+
+	if (arg->debug_buffer_size < fifo_state->last_data_size) {
+		arg->used_size = arg->debug_buffer_size;
+		arg->did_not_fit = 1;
+	} else {
+		arg->used_size = fifo_state->last_data_size;
+		arg->did_not_fit = 0;
+	}
+	return copy_to_user(buffer, fifo_state->last_buffer, arg->used_size);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
new file mode 100644
index 000000000000..9e0f0306eedb
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -0,0 +1,295 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+#define VMW_FENCE_WRAP (1 << 24)
+
+irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS)
+{
+	struct drm_device *dev = (struct drm_device *)arg;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	uint32_t status;
+
+	spin_lock(&dev_priv->irq_lock);
+	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	spin_unlock(&dev_priv->irq_lock);
+
+	if (status & SVGA_IRQFLAG_ANY_FENCE)
+		wake_up_all(&dev_priv->fence_queue);
+	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
+		wake_up_all(&dev_priv->fifo_queue);
+
+	if (likely(status)) {
+		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t sequence)
+{
+	uint32_t busy;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	busy = vmw_read(dev_priv, SVGA_REG_BUSY);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return (busy == 0);
+}
+
+
+bool vmw_fence_signaled(struct vmw_private *dev_priv,
+			uint32_t sequence)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	struct vmw_fifo_state *fifo_state;
+	bool ret;
+
+	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+		return true;
+
+	dev_priv->last_read_sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+		return true;
+
+	fifo_state = &dev_priv->fifo;
+	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE) &&
+	    vmw_fifo_idle(dev_priv, sequence))
+		return true;
+
+	/**
+	 * Below is to signal stale fences that have wrapped.
+	 * First, block fence submission.
+	 */
+
+	down_read(&fifo_state->rwsem);
+
+	/**
+	 * Then check if the sequence is higher than what we've actually
+	 * emitted. Then the fence is stale and signaled.
+	 */
+
+	ret = ((dev_priv->fence_seq - sequence) > VMW_FENCE_WRAP);
+	up_read(&fifo_state->rwsem);
+
+	return ret;
+}
+
+int vmw_fallback_wait(struct vmw_private *dev_priv,
+		      bool lazy,
+		      bool fifo_idle,
+		      uint32_t sequence,
+		      bool interruptible,
+		      unsigned long timeout)
+{
+	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
+
+	uint32_t count = 0;
+	uint32_t signal_seq;
+	int ret;
+	unsigned long end_jiffies = jiffies + timeout;
+	bool (*wait_condition)(struct vmw_private *, uint32_t);
+	DEFINE_WAIT(__wait);
+
+	wait_condition = (fifo_idle) ? &vmw_fifo_idle :
+		&vmw_fence_signaled;
+
+	/**
+	 * Block command submission while waiting for idle.
+	 */
+
+	if (fifo_idle)
+		down_read(&fifo_state->rwsem);
+	signal_seq = dev_priv->fence_seq;
+	ret = 0;
+
+	for (;;) {
+		prepare_to_wait(&dev_priv->fence_queue, &__wait,
+				(interruptible) ?
+				TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		if (wait_condition(dev_priv, sequence))
+			break;
+		if (time_after_eq(jiffies, end_jiffies)) {
+			DRM_ERROR("SVGA device lockup.\n");
+			break;
+		}
+		if (lazy)
+			schedule_timeout(1);
+		else if ((++count & 0x0F) == 0) {
+			/**
+			 * FIXME: Use schedule_hr_timeout here for
+			 * newer kernels and lower CPU utilization.
+			 */
+
+			__set_current_state(TASK_RUNNING);
+			schedule();
+			__set_current_state((interruptible) ?
+					    TASK_INTERRUPTIBLE :
+					    TASK_UNINTERRUPTIBLE);
+		}
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTART;
+			break;
+		}
+	}
+	finish_wait(&dev_priv->fence_queue, &__wait);
+	if (ret == 0 && fifo_idle) {
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		iowrite32(signal_seq, fifo_mem + SVGA_FIFO_FENCE);
+	}
+	wake_up_all(&dev_priv->fence_queue);
+	if (fifo_idle)
+		up_read(&fifo_state->rwsem);
+
+	return ret;
+}
+
+int vmw_wait_fence(struct vmw_private *dev_priv,
+		   bool lazy, uint32_t sequence,
+		   bool interruptible, unsigned long timeout)
+{
+	long ret;
+	unsigned long irq_flags;
+	struct vmw_fifo_state *fifo = &dev_priv->fifo;
+
+	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+		return 0;
+
+	if (likely(vmw_fence_signaled(dev_priv, sequence)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+
+	if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
+		return vmw_fallback_wait(dev_priv, lazy, true, sequence,
+					 interruptible, timeout);
+
+	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
+		return vmw_fallback_wait(dev_priv, lazy, false, sequence,
+					 interruptible, timeout);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	if (atomic_add_return(1, &dev_priv->fence_queue_waiters) > 0) {
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_ANY_FENCE,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		vmw_write(dev_priv, SVGA_REG_IRQMASK,
+			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
+			  SVGA_IRQFLAG_ANY_FENCE);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+		    (dev_priv->fence_queue,
+		     vmw_fence_signaled(dev_priv, sequence),
+		     timeout);
+	else
+		ret = wait_event_timeout
+		    (dev_priv->fence_queue,
+		     vmw_fence_signaled(dev_priv, sequence),
+		     timeout);
+
+	if (unlikely(ret == -ERESTARTSYS))
+		ret = -ERESTART;
+	else if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	if (atomic_dec_and_test(&dev_priv->fence_queue_waiters)) {
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		vmw_write(dev_priv, SVGA_REG_IRQMASK,
+			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
+			  ~SVGA_IRQFLAG_ANY_FENCE);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ret;
+}
+
+void vmw_irq_preinstall(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	uint32_t status;
+
+	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
+		return;
+
+	spin_lock_init(&dev_priv->irq_lock);
+	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+}
+
+int vmw_irq_postinstall(struct drm_device *dev)
+{
+	return 0;
+}
+
+void vmw_irq_uninstall(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	uint32_t status;
+
+	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
+		return;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	vmw_write(dev_priv, SVGA_REG_IRQMASK, 0);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+}
+
+#define VMW_FENCE_WAIT_TIMEOUT 3*HZ;
+
+int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_wait_arg *arg =
+	    (struct drm_vmw_fence_wait_arg *)data;
+	unsigned long timeout;
+
+	if (!arg->cookie_valid) {
+		arg->cookie_valid = 1;
+		arg->kernel_cookie = jiffies + VMW_FENCE_WAIT_TIMEOUT;
+	}
+
+	timeout = jiffies;
+	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie))
+		return -EBUSY;
+
+	timeout = (unsigned long)arg->kernel_cookie - timeout;
+	return vmw_wait_fence(vmw_priv(dev), true, arg->sequence, true, timeout);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
new file mode 100644
index 000000000000..e9403be446fe
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -0,0 +1,872 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_kms.h"
+
+/* Might need a hrtimer here? */
+#define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
+
+
+void vmw_display_unit_cleanup(struct vmw_display_unit *du)
+{
+	if (du->cursor_surface)
+		vmw_surface_unreference(&du->cursor_surface);
+	if (du->cursor_dmabuf)
+		vmw_dmabuf_unreference(&du->cursor_dmabuf);
+	drm_crtc_cleanup(&du->crtc);
+	drm_encoder_cleanup(&du->encoder);
+	drm_connector_cleanup(&du->connector);
+}
+
+/*
+ * Display Unit Cursor functions
+ */
+
+int vmw_cursor_update_image(struct vmw_private *dev_priv,
+			    u32 *image, u32 width, u32 height,
+			    u32 hotspotX, u32 hotspotY)
+{
+	struct {
+		u32 cmd;
+		SVGAFifoCmdDefineAlphaCursor cursor;
+	} *cmd;
+	u32 image_size = width * height * 4;
+	u32 cmd_size = sizeof(*cmd) + image_size;
+
+	if (!image)
+		return -EINVAL;
+
+	cmd = vmw_fifo_reserve(dev_priv, cmd_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, sizeof(*cmd));
+
+	memcpy(&cmd[1], image, image_size);
+
+	cmd->cmd = cpu_to_le32(SVGA_CMD_DEFINE_ALPHA_CURSOR);
+	cmd->cursor.id = cpu_to_le32(0);
+	cmd->cursor.width = cpu_to_le32(width);
+	cmd->cursor.height = cpu_to_le32(height);
+	cmd->cursor.hotspotX = cpu_to_le32(hotspotX);
+	cmd->cursor.hotspotY = cpu_to_le32(hotspotY);
+
+	vmw_fifo_commit(dev_priv, cmd_size);
+
+	return 0;
+}
+
+void vmw_cursor_update_position(struct vmw_private *dev_priv,
+				bool show, int x, int y)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t count;
+
+	iowrite32(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON);
+	iowrite32(x, fifo_mem + SVGA_FIFO_CURSOR_X);
+	iowrite32(y, fifo_mem + SVGA_FIFO_CURSOR_Y);
+	count = ioread32(fifo_mem + SVGA_FIFO_CURSOR_COUNT);
+	iowrite32(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
+}
+
+int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
+			   uint32_t handle, uint32_t width, uint32_t height)
+{
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+	struct vmw_surface *surface = NULL;
+	struct vmw_dma_buffer *dmabuf = NULL;
+	int ret;
+
+	if (handle) {
+		ret = vmw_user_surface_lookup(dev_priv, tfile,
+					      handle, &surface);
+		if (!ret) {
+			if (!surface->snooper.image) {
+				DRM_ERROR("surface not suitable for cursor\n");
+				return -EINVAL;
+			}
+		} else {
+			ret = vmw_user_dmabuf_lookup(tfile,
+						     handle, &dmabuf);
+			if (ret) {
+				DRM_ERROR("failed to find surface or dmabuf: %i\n", ret);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* takedown old cursor */
+	if (du->cursor_surface) {
+		du->cursor_surface->snooper.crtc = NULL;
+		vmw_surface_unreference(&du->cursor_surface);
+	}
+	if (du->cursor_dmabuf)
+		vmw_dmabuf_unreference(&du->cursor_dmabuf);
+
+	/* setup new image */
+	if (surface) {
+		/* vmw_user_surface_lookup takes one reference */
+		du->cursor_surface = surface;
+
+		du->cursor_surface->snooper.crtc = crtc;
+		du->cursor_age = du->cursor_surface->snooper.age;
+		vmw_cursor_update_image(dev_priv, surface->snooper.image,
+					64, 64, du->hotspot_x, du->hotspot_y);
+	} else if (dmabuf) {
+		struct ttm_bo_kmap_obj map;
+		unsigned long kmap_offset;
+		unsigned long kmap_num;
+		void *virtual;
+		bool dummy;
+
+		/* vmw_user_surface_lookup takes one reference */
+		du->cursor_dmabuf = dmabuf;
+
+		kmap_offset = 0;
+		kmap_num = (64*64*4) >> PAGE_SHIFT;
+
+		ret = ttm_bo_reserve(&dmabuf->base, true, false, false, 0);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("reserve failed\n");
+			return -EINVAL;
+		}
+
+		ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
+		if (unlikely(ret != 0))
+			goto err_unreserve;
+
+		virtual = ttm_kmap_obj_virtual(&map, &dummy);
+		vmw_cursor_update_image(dev_priv, virtual, 64, 64,
+					du->hotspot_x, du->hotspot_y);
+
+		ttm_bo_kunmap(&map);
+err_unreserve:
+		ttm_bo_unreserve(&dmabuf->base);
+
+	} else {
+		vmw_cursor_update_position(dev_priv, false, 0, 0);
+		return 0;
+	}
+
+	vmw_cursor_update_position(dev_priv, true, du->cursor_x, du->cursor_y);
+
+	return 0;
+}
+
+int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+	bool shown = du->cursor_surface || du->cursor_dmabuf ? true : false;
+
+	du->cursor_x = x + crtc->x;
+	du->cursor_y = y + crtc->y;
+
+	vmw_cursor_update_position(dev_priv, shown,
+				   du->cursor_x, du->cursor_y);
+
+	return 0;
+}
+
+void vmw_kms_cursor_snoop(struct vmw_surface *srf,
+			  struct ttm_object_file *tfile,
+			  struct ttm_buffer_object *bo,
+			  SVGA3dCmdHeader *header)
+{
+	struct ttm_bo_kmap_obj map;
+	unsigned long kmap_offset;
+	unsigned long kmap_num;
+	SVGA3dCopyBox *box;
+	unsigned box_count;
+	void *virtual;
+	bool dummy;
+	struct vmw_dma_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceDMA dma;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_dma_cmd, header);
+
+	/* No snooper installed */
+	if (!srf->snooper.image)
+		return;
+
+	if (cmd->dma.host.face != 0 || cmd->dma.host.mipmap != 0) {
+		DRM_ERROR("face and mipmap for cursors should never != 0\n");
+		return;
+	}
+
+	if (cmd->header.size < 64) {
+		DRM_ERROR("at least one full copy box must be given\n");
+		return;
+	}
+
+	box = (SVGA3dCopyBox *)&cmd[1];
+	box_count = (cmd->header.size - sizeof(SVGA3dCmdSurfaceDMA)) /
+			sizeof(SVGA3dCopyBox);
+
+	if (cmd->dma.guest.pitch != (64 * 4) ||
+	    cmd->dma.guest.ptr.offset % PAGE_SIZE ||
+	    box->x != 0    || box->y != 0    || box->z != 0    ||
+	    box->srcx != 0 || box->srcy != 0 || box->srcz != 0 ||
+	    box->w != 64   || box->h != 64   || box->d != 1    ||
+	    box_count != 1) {
+		/* TODO handle none page aligned offsets */
+		/* TODO handle partial uploads and pitch != 256 */
+		/* TODO handle more then one copy (size != 64) */
+		DRM_ERROR("lazy programer, cant handle wierd stuff\n");
+		return;
+	}
+
+	kmap_offset = cmd->dma.guest.ptr.offset >> PAGE_SHIFT;
+	kmap_num = (64*64*4) >> PAGE_SHIFT;
+
+	ret = ttm_bo_reserve(bo, true, false, false, 0);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("reserve failed\n");
+		return;
+	}
+
+	ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
+	if (unlikely(ret != 0))
+		goto err_unreserve;
+
+	virtual = ttm_kmap_obj_virtual(&map, &dummy);
+
+	memcpy(srf->snooper.image, virtual, 64*64*4);
+	srf->snooper.age++;
+
+	/* we can't call this function from this function since execbuf has
+	 * reserved fifo space.
+	 *
+	 * if (srf->snooper.crtc)
+	 *	vmw_ldu_crtc_cursor_update_image(dev_priv,
+	 *					 srf->snooper.image, 64, 64,
+	 *					 du->hotspot_x, du->hotspot_y);
+	 */
+
+	ttm_bo_kunmap(&map);
+err_unreserve:
+	ttm_bo_unreserve(bo);
+}
+
+void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_display_unit *du;
+	struct drm_crtc *crtc;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		du = vmw_crtc_to_du(crtc);
+		if (!du->cursor_surface ||
+		    du->cursor_age == du->cursor_surface->snooper.age)
+			continue;
+
+		du->cursor_age = du->cursor_surface->snooper.age;
+		vmw_cursor_update_image(dev_priv,
+					du->cursor_surface->snooper.image,
+					64, 64, du->hotspot_x, du->hotspot_y);
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+
+/*
+ * Generic framebuffer code
+ */
+
+int vmw_framebuffer_create_handle(struct drm_framebuffer *fb,
+				  struct drm_file *file_priv,
+				  unsigned int *handle)
+{
+	if (handle)
+		handle = 0;
+
+	return 0;
+}
+
+/*
+ * Surface framebuffer code
+ */
+
+#define vmw_framebuffer_to_vfbs(x) \
+	container_of(x, struct vmw_framebuffer_surface, base.base)
+
+struct vmw_framebuffer_surface {
+	struct vmw_framebuffer base;
+	struct vmw_surface *surface;
+	struct delayed_work d_work;
+	struct mutex work_lock;
+	bool present_fs;
+};
+
+void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct vmw_framebuffer_surface *vfb =
+		vmw_framebuffer_to_vfbs(framebuffer);
+
+	cancel_delayed_work_sync(&vfb->d_work);
+	drm_framebuffer_cleanup(framebuffer);
+	vmw_surface_unreference(&vfb->surface);
+
+	kfree(framebuffer);
+}
+
+static void vmw_framebuffer_present_fs_callback(struct work_struct *work)
+{
+	struct delayed_work *d_work =
+		container_of(work, struct delayed_work, work);
+	struct vmw_framebuffer_surface *vfbs =
+		container_of(d_work, struct vmw_framebuffer_surface, d_work);
+	struct vmw_surface *surf = vfbs->surface;
+	struct drm_framebuffer *framebuffer = &vfbs->base.base;
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdPresent body;
+		SVGA3dCopyRect cr;
+	} *cmd;
+
+	mutex_lock(&vfbs->work_lock);
+	if (!vfbs->present_fs)
+		goto out_unlock;
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	if (unlikely(cmd == NULL))
+		goto out_resched;
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + sizeof(cmd->cr));
+	cmd->body.sid = cpu_to_le32(surf->res.id);
+	cmd->cr.x = cpu_to_le32(0);
+	cmd->cr.y = cpu_to_le32(0);
+	cmd->cr.srcx = cmd->cr.x;
+	cmd->cr.srcy = cmd->cr.y;
+	cmd->cr.w = cpu_to_le32(framebuffer->width);
+	cmd->cr.h = cpu_to_le32(framebuffer->height);
+	vfbs->present_fs = false;
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+out_resched:
+	/**
+	 * Will not re-add if already pending.
+	 */
+	schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
+out_unlock:
+	mutex_unlock(&vfbs->work_lock);
+}
+
+
+int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
+				  unsigned flags, unsigned color,
+				  struct drm_clip_rect *clips,
+				  unsigned num_clips)
+{
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct vmw_framebuffer_surface *vfbs =
+		vmw_framebuffer_to_vfbs(framebuffer);
+	struct vmw_surface *surf = vfbs->surface;
+	struct drm_clip_rect norect;
+	SVGA3dCopyRect *cr;
+	int i, inc = 1;
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdPresent body;
+		SVGA3dCopyRect cr;
+	} *cmd;
+
+	if (!num_clips ||
+	    !(dev_priv->fifo.capabilities &
+	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
+		int ret;
+
+		mutex_lock(&vfbs->work_lock);
+		vfbs->present_fs = true;
+		ret = schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
+		mutex_unlock(&vfbs->work_lock);
+		if (ret) {
+			/**
+			 * No work pending, Force immediate present.
+			 */
+			vmw_framebuffer_present_fs_callback(&vfbs->d_work.work);
+		}
+		return 0;
+	}
+
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = framebuffer->width;
+		norect.y2 = framebuffer->height;
+	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
+		num_clips /= 2;
+		inc = 2; /* skip source rects */
+	}
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, sizeof(*cmd));
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + num_clips * sizeof(cmd->cr));
+	cmd->body.sid = cpu_to_le32(surf->res.id);
+
+	for (i = 0, cr = &cmd->cr; i < num_clips; i++, cr++, clips += inc) {
+		cr->x = cpu_to_le16(clips->x1);
+		cr->y = cpu_to_le16(clips->y1);
+		cr->srcx = cr->x;
+		cr->srcy = cr->y;
+		cr->w = cpu_to_le16(clips->x2 - clips->x1);
+		cr->h = cpu_to_le16(clips->y2 - clips->y1);
+	}
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
+
+	return 0;
+}
+
+static struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = {
+	.destroy = vmw_framebuffer_surface_destroy,
+	.dirty = vmw_framebuffer_surface_dirty,
+	.create_handle = vmw_framebuffer_create_handle,
+};
+
+int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
+				    struct vmw_surface *surface,
+				    struct vmw_framebuffer **out,
+				    unsigned width, unsigned height)
+
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_framebuffer_surface *vfbs;
+	int ret;
+
+	vfbs = kzalloc(sizeof(*vfbs), GFP_KERNEL);
+	if (!vfbs) {
+		ret = -ENOMEM;
+		goto out_err1;
+	}
+
+	ret = drm_framebuffer_init(dev, &vfbs->base.base,
+				   &vmw_framebuffer_surface_funcs);
+	if (ret)
+		goto out_err2;
+
+	if (!vmw_surface_reference(surface)) {
+		DRM_ERROR("failed to reference surface %p\n", surface);
+		goto out_err3;
+	}
+
+	/* XXX get the first 3 from the surface info */
+	vfbs->base.base.bits_per_pixel = 32;
+	vfbs->base.base.pitch = width * 32 / 4;
+	vfbs->base.base.depth = 24;
+	vfbs->base.base.width = width;
+	vfbs->base.base.height = height;
+	vfbs->base.pin = NULL;
+	vfbs->base.unpin = NULL;
+	vfbs->surface = surface;
+	mutex_init(&vfbs->work_lock);
+	INIT_DELAYED_WORK(&vfbs->d_work, &vmw_framebuffer_present_fs_callback);
+	*out = &vfbs->base;
+
+	return 0;
+
+out_err3:
+	drm_framebuffer_cleanup(&vfbs->base.base);
+out_err2:
+	kfree(vfbs);
+out_err1:
+	return ret;
+}
+
+/*
+ * Dmabuf framebuffer code
+ */
+
+#define vmw_framebuffer_to_vfbd(x) \
+	container_of(x, struct vmw_framebuffer_dmabuf, base.base)
+
+struct vmw_framebuffer_dmabuf {
+	struct vmw_framebuffer base;
+	struct vmw_dma_buffer *buffer;
+};
+
+void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(framebuffer);
+
+	drm_framebuffer_cleanup(framebuffer);
+	vmw_dmabuf_unreference(&vfbd->buffer);
+
+	kfree(vfbd);
+}
+
+int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
+				 unsigned flags, unsigned color,
+				 struct drm_clip_rect *clips,
+				 unsigned num_clips)
+{
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct drm_clip_rect norect;
+	struct {
+		uint32_t header;
+		SVGAFifoCmdUpdate body;
+	} *cmd;
+	int i, increment = 1;
+
+	if (!num_clips ||
+	    !(dev_priv->fifo.capabilities &
+	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = framebuffer->width;
+		norect.y2 = framebuffer->height;
+	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
+		num_clips /= 2;
+		increment = 2;
+	}
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) * num_clips);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_clips; i++, clips += increment) {
+		cmd[i].header = cpu_to_le32(SVGA_CMD_UPDATE);
+		cmd[i].body.x = cpu_to_le32(clips[i].x1);
+		cmd[i].body.y = cpu_to_le32(clips[i].y1);
+		cmd[i].body.width = cpu_to_le32(clips[i].x2 - clips[i].x1);
+		cmd[i].body.height = cpu_to_le32(clips[i].y2 - clips[i].y1);
+	}
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd) * num_clips);
+
+	return 0;
+}
+
+static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
+	.destroy = vmw_framebuffer_dmabuf_destroy,
+	.dirty = vmw_framebuffer_dmabuf_dirty,
+	.create_handle = vmw_framebuffer_create_handle,
+};
+
+static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
+{
+	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(&vfb->base);
+	int ret;
+
+	vmw_overlay_pause_all(dev_priv);
+
+	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer);
+
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON) {
+		vmw_write(dev_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_IS_PRIMARY, true);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_X, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_Y, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_WIDTH, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_HEIGHT, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+
+		vmw_write(dev_priv, SVGA_REG_ENABLE, 1);
+		vmw_write(dev_priv, SVGA_REG_WIDTH, vfb->base.width);
+		vmw_write(dev_priv, SVGA_REG_HEIGHT, vfb->base.height);
+		vmw_write(dev_priv, SVGA_REG_BITS_PER_PIXEL, vfb->base.bits_per_pixel);
+		vmw_write(dev_priv, SVGA_REG_DEPTH, vfb->base.depth);
+		vmw_write(dev_priv, SVGA_REG_RED_MASK, 0x00ff0000);
+		vmw_write(dev_priv, SVGA_REG_GREEN_MASK, 0x0000ff00);
+		vmw_write(dev_priv, SVGA_REG_BLUE_MASK, 0x000000ff);
+	} else
+		WARN_ON(true);
+
+	vmw_overlay_resume_all(dev_priv);
+
+	return 0;
+}
+
+static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
+{
+	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(&vfb->base);
+
+	if (!vfbd->buffer) {
+		WARN_ON(!vfbd->buffer);
+		return 0;
+	}
+
+	return vmw_dmabuf_from_vram(dev_priv, vfbd->buffer);
+}
+
+int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
+				   struct vmw_dma_buffer *dmabuf,
+				   struct vmw_framebuffer **out,
+				   unsigned width, unsigned height)
+
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_framebuffer_dmabuf *vfbd;
+	int ret;
+
+	vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL);
+	if (!vfbd) {
+		ret = -ENOMEM;
+		goto out_err1;
+	}
+
+	ret = drm_framebuffer_init(dev, &vfbd->base.base,
+				   &vmw_framebuffer_dmabuf_funcs);
+	if (ret)
+		goto out_err2;
+
+	if (!vmw_dmabuf_reference(dmabuf)) {
+		DRM_ERROR("failed to reference dmabuf %p\n", dmabuf);
+		goto out_err3;
+	}
+
+	/* XXX get the first 3 from the surface info */
+	vfbd->base.base.bits_per_pixel = 32;
+	vfbd->base.base.pitch = width * 32 / 4;
+	vfbd->base.base.depth = 24;
+	vfbd->base.base.width = width;
+	vfbd->base.base.height = height;
+	vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
+	vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	vfbd->buffer = dmabuf;
+	*out = &vfbd->base;
+
+	return 0;
+
+out_err3:
+	drm_framebuffer_cleanup(&vfbd->base.base);
+out_err2:
+	kfree(vfbd);
+out_err1:
+	return ret;
+}
+
+/*
+ * Generic Kernel modesetting functions
+ */
+
+static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
+						 struct drm_file *file_priv,
+						 struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_framebuffer *vfb = NULL;
+	struct vmw_surface *surface = NULL;
+	struct vmw_dma_buffer *bo = NULL;
+	int ret;
+
+	ret = vmw_user_surface_lookup(dev_priv, tfile,
+				      mode_cmd->handle, &surface);
+	if (ret)
+		goto try_dmabuf;
+
+	ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
+					      mode_cmd->width, mode_cmd->height);
+
+	/* vmw_user_surface_lookup takes one ref so does new_fb */
+	vmw_surface_unreference(&surface);
+
+	if (ret) {
+		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		return NULL;
+	}
+	return &vfb->base;
+
+try_dmabuf:
+	DRM_INFO("%s: trying buffer\n", __func__);
+
+	ret = vmw_user_dmabuf_lookup(tfile, mode_cmd->handle, &bo);
+	if (ret) {
+		DRM_ERROR("failed to find buffer: %i\n", ret);
+		return NULL;
+	}
+
+	ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
+					     mode_cmd->width, mode_cmd->height);
+
+	/* vmw_user_dmabuf_lookup takes one ref so does new_fb */
+	vmw_dmabuf_unreference(&bo);
+
+	if (ret) {
+		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		return NULL;
+	}
+
+	return &vfb->base;
+}
+
+static int vmw_kms_fb_changed(struct drm_device *dev)
+{
+	return 0;
+}
+
+static struct drm_mode_config_funcs vmw_kms_funcs = {
+	.fb_create = vmw_kms_fb_create,
+	.fb_changed = vmw_kms_fb_changed,
+};
+
+int vmw_kms_init(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int ret;
+
+	drm_mode_config_init(dev);
+	dev->mode_config.funcs = &vmw_kms_funcs;
+	dev->mode_config.min_width = 640;
+	dev->mode_config.min_height = 480;
+	dev->mode_config.max_width = 2048;
+	dev->mode_config.max_height = 2048;
+
+	ret = vmw_kms_init_legacy_display_system(dev_priv);
+
+	return 0;
+}
+
+int vmw_kms_close(struct vmw_private *dev_priv)
+{
+	/*
+	 * Docs says we should take the lock before calling this function
+	 * but since it destroys encoders and our destructor calls
+	 * drm_encoder_cleanup which takes the lock we deadlock.
+	 */
+	drm_mode_config_cleanup(dev_priv->dev);
+	vmw_kms_close_legacy_display_system(dev_priv);
+	return 0;
+}
+
+int vmw_kms_cursor_bypass_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct drm_vmw_cursor_bypass_arg *arg = data;
+	struct vmw_display_unit *du;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+
+	mutex_lock(&dev->mode_config.mutex);
+	if (arg->flags & DRM_VMW_CURSOR_BYPASS_ALL) {
+
+		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+			du = vmw_crtc_to_du(crtc);
+			du->hotspot_x = arg->xhot;
+			du->hotspot_y = arg->yhot;
+		}
+
+		mutex_unlock(&dev->mode_config.mutex);
+		return 0;
+	}
+
+	obj = drm_mode_object_find(dev, arg->crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	crtc = obj_to_crtc(obj);
+	du = vmw_crtc_to_du(crtc);
+
+	du->hotspot_x = arg->xhot;
+	du->hotspot_y = arg->yhot;
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return ret;
+}
+
+int vmw_kms_save_vga(struct vmw_private *vmw_priv)
+{
+	/*
+	 * setup a single multimon monitor with the size
+	 * of 0x0, this stops the UI from resizing when we
+	 * change the framebuffer size
+	 */
+	if (vmw_priv->capabilities & SVGA_CAP_MULTIMON) {
+		vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_IS_PRIMARY, true);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_X, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_Y, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_WIDTH, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_HEIGHT, 0);
+		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+	}
+
+	vmw_priv->vga_width = vmw_read(vmw_priv, SVGA_REG_WIDTH);
+	vmw_priv->vga_height = vmw_read(vmw_priv, SVGA_REG_HEIGHT);
+	vmw_priv->vga_bpp = vmw_read(vmw_priv, SVGA_REG_BITS_PER_PIXEL);
+	vmw_priv->vga_depth = vmw_read(vmw_priv, SVGA_REG_DEPTH);
+	vmw_priv->vga_pseudo = vmw_read(vmw_priv, SVGA_REG_PSEUDOCOLOR);
+	vmw_priv->vga_red_mask = vmw_read(vmw_priv, SVGA_REG_RED_MASK);
+	vmw_priv->vga_green_mask = vmw_read(vmw_priv, SVGA_REG_GREEN_MASK);
+	vmw_priv->vga_blue_mask = vmw_read(vmw_priv, SVGA_REG_BLUE_MASK);
+
+	return 0;
+}
+
+int vmw_kms_restore_vga(struct vmw_private *vmw_priv)
+{
+	vmw_write(vmw_priv, SVGA_REG_WIDTH, vmw_priv->vga_width);
+	vmw_write(vmw_priv, SVGA_REG_HEIGHT, vmw_priv->vga_height);
+	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, vmw_priv->vga_bpp);
+	vmw_write(vmw_priv, SVGA_REG_DEPTH, vmw_priv->vga_depth);
+	vmw_write(vmw_priv, SVGA_REG_PSEUDOCOLOR, vmw_priv->vga_pseudo);
+	vmw_write(vmw_priv, SVGA_REG_RED_MASK, vmw_priv->vga_red_mask);
+	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, vmw_priv->vga_green_mask);
+	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, vmw_priv->vga_blue_mask);
+
+	/* TODO check for multimon */
+	vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 0);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
new file mode 100644
index 000000000000..8b95249f0531
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -0,0 +1,102 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef VMWGFX_KMS_H_
+#define VMWGFX_KMS_H_
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+
+#define vmw_framebuffer_to_vfb(x) \
+	container_of(x, struct vmw_framebuffer, base)
+
+/**
+ * Base class for framebuffers
+ *
+ * @pin is called the when ever a crtc uses this framebuffer
+ * @unpin is called
+ */
+struct vmw_framebuffer {
+	struct drm_framebuffer base;
+	int (*pin)(struct vmw_framebuffer *fb);
+	int (*unpin)(struct vmw_framebuffer *fb);
+};
+
+
+#define vmw_crtc_to_du(x) \
+	container_of(x, struct vmw_display_unit, crtc)
+
+/*
+ * Basic cursor manipulation
+ */
+int vmw_cursor_update_image(struct vmw_private *dev_priv,
+			    u32 *image, u32 width, u32 height,
+			    u32 hotspotX, u32 hotspotY);
+void vmw_cursor_update_position(struct vmw_private *dev_priv,
+				bool show, int x, int y);
+
+/**
+ * Base class display unit.
+ *
+ * Since the SVGA hw doesn't have a concept of a crtc, encoder or connector
+ * so the display unit is all of them at the same time. This is true for both
+ * legacy multimon and screen objects.
+ */
+struct vmw_display_unit {
+	struct drm_crtc crtc;
+	struct drm_encoder encoder;
+	struct drm_connector connector;
+
+	struct vmw_surface *cursor_surface;
+	struct vmw_dma_buffer *cursor_dmabuf;
+	size_t cursor_age;
+
+	int cursor_x;
+	int cursor_y;
+
+	int hotspot_x;
+	int hotspot_y;
+
+	unsigned unit;
+};
+
+/*
+ * Shared display unit functions - vmwgfx_kms.c
+ */
+void vmw_display_unit_cleanup(struct vmw_display_unit *du);
+int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
+			   uint32_t handle, uint32_t width, uint32_t height);
+int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+
+/*
+ * Legacy display unit functions - vmwgfx_ldu.h
+ */
+int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv);
+int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
new file mode 100644
index 000000000000..90891593bf6c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -0,0 +1,516 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_kms.h"
+
+#define vmw_crtc_to_ldu(x) \
+	container_of(x, struct vmw_legacy_display_unit, base.crtc)
+#define vmw_encoder_to_ldu(x) \
+	container_of(x, struct vmw_legacy_display_unit, base.encoder)
+#define vmw_connector_to_ldu(x) \
+	container_of(x, struct vmw_legacy_display_unit, base.connector)
+
+struct vmw_legacy_display {
+	struct list_head active;
+
+	unsigned num_active;
+
+	struct vmw_framebuffer *fb;
+};
+
+/**
+ * Display unit using the legacy register interface.
+ */
+struct vmw_legacy_display_unit {
+	struct vmw_display_unit base;
+
+	struct list_head active;
+
+	unsigned unit;
+};
+
+static void vmw_ldu_destroy(struct vmw_legacy_display_unit *ldu)
+{
+	list_del_init(&ldu->active);
+	vmw_display_unit_cleanup(&ldu->base);
+	kfree(ldu);
+}
+
+
+/*
+ * Legacy Display Unit CRTC functions
+ */
+
+static void vmw_ldu_crtc_save(struct drm_crtc *crtc)
+{
+}
+
+static void vmw_ldu_crtc_restore(struct drm_crtc *crtc)
+{
+}
+
+static void vmw_ldu_crtc_gamma_set(struct drm_crtc *crtc,
+				   u16 *r, u16 *g, u16 *b,
+				   uint32_t size)
+{
+}
+
+static void vmw_ldu_crtc_destroy(struct drm_crtc *crtc)
+{
+	vmw_ldu_destroy(vmw_crtc_to_ldu(crtc));
+}
+
+static int vmw_ldu_commit_list(struct vmw_private *dev_priv)
+{
+	struct vmw_legacy_display *lds = dev_priv->ldu_priv;
+	struct vmw_legacy_display_unit *entry;
+	struct drm_crtc *crtc;
+	int i = 0;
+
+	/* to stop the screen from changing size on resize */
+	vmw_write(dev_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 0);
+	for (i = 0; i < lds->num_active; i++) {
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, i);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_IS_PRIMARY, !i);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_X, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_Y, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_WIDTH, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_HEIGHT, 0);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+	}
+
+	/* Now set the mode */
+	vmw_write(dev_priv, SVGA_REG_NUM_GUEST_DISPLAYS, lds->num_active);
+	i = 0;
+	list_for_each_entry(entry, &lds->active, active) {
+		crtc = &entry->base.crtc;
+
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, i);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_IS_PRIMARY, !i);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_X, crtc->x);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_POSITION_Y, crtc->y);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_WIDTH, crtc->mode.hdisplay);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_HEIGHT, crtc->mode.vdisplay);
+		vmw_write(dev_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
+
+		i++;
+	}
+
+	return 0;
+}
+
+static int vmw_ldu_del_active(struct vmw_private *vmw_priv,
+			      struct vmw_legacy_display_unit *ldu)
+{
+	struct vmw_legacy_display *ld = vmw_priv->ldu_priv;
+	if (list_empty(&ldu->active))
+		return 0;
+
+	list_del_init(&ldu->active);
+	if (--(ld->num_active) == 0) {
+		BUG_ON(!ld->fb);
+		if (ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		ld->fb = NULL;
+	}
+
+	return 0;
+}
+
+static int vmw_ldu_add_active(struct vmw_private *vmw_priv,
+			      struct vmw_legacy_display_unit *ldu,
+			      struct vmw_framebuffer *vfb)
+{
+	struct vmw_legacy_display *ld = vmw_priv->ldu_priv;
+	struct vmw_legacy_display_unit *entry;
+	struct list_head *at;
+
+	if (!list_empty(&ldu->active))
+		return 0;
+
+	at = &ld->active;
+	list_for_each_entry(entry, &ld->active, active) {
+		if (entry->unit > ldu->unit)
+			break;
+
+		at = &entry->active;
+	}
+
+	list_add(&ldu->active, at);
+	if (ld->num_active++ == 0) {
+		BUG_ON(ld->fb);
+		if (vfb->pin)
+			vfb->pin(vfb);
+		ld->fb = vfb;
+	}
+
+	return 0;
+}
+
+static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_legacy_display_unit *ldu;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_encoder *encoder;
+	struct vmw_framebuffer *vfb;
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	/* get the ldu */
+	crtc = set->crtc;
+	ldu = vmw_crtc_to_ldu(crtc);
+	vfb = set->fb ? vmw_framebuffer_to_vfb(set->fb) : NULL;
+	dev_priv = vmw_priv(crtc->dev);
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("to many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &ldu->base.connector) {
+		DRM_ERROR("connector doesn't match %p %p\n",
+			set->connectors[0], &ldu->base.connector);
+		return -EINVAL;
+	}
+
+	/* ldu only supports one fb active at the time */
+	if (dev_priv->ldu_priv->fb && vfb &&
+	    dev_priv->ldu_priv->fb != vfb) {
+		DRM_ERROR("Multiple framebuffers not supported\n");
+		return -EINVAL;
+	}
+
+	/* since they always map one to one these are safe */
+	connector = &ldu->base.connector;
+	encoder = &ldu->base.encoder;
+
+	/* should we turn the crtc off? */
+	if (set->num_connectors == 0 || !set->mode || !set->fb) {
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+
+		vmw_ldu_del_active(dev_priv, ldu);
+
+		vmw_ldu_commit_list(dev_priv);
+
+		return 0;
+	}
+
+
+	/* we now know we want to set a mode */
+	mode = set->mode;
+	fb = set->fb;
+
+	if (set->x + mode->hdisplay > fb->width ||
+	    set->y + mode->vdisplay > fb->height) {
+		DRM_ERROR("set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	vmw_fb_off(dev_priv);
+
+	crtc->fb = fb;
+	encoder->crtc = crtc;
+	connector->encoder = encoder;
+	crtc->x = set->x;
+	crtc->y = set->y;
+	crtc->mode = *mode;
+
+	vmw_ldu_add_active(dev_priv, ldu, vfb);
+
+	vmw_ldu_commit_list(dev_priv);
+
+	return 0;
+}
+
+static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
+	.save = vmw_ldu_crtc_save,
+	.restore = vmw_ldu_crtc_restore,
+	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_move = vmw_du_crtc_cursor_move,
+	.gamma_set = vmw_ldu_crtc_gamma_set,
+	.destroy = vmw_ldu_crtc_destroy,
+	.set_config = vmw_ldu_crtc_set_config,
+};
+
+/*
+ * Legacy Display Unit encoder functions
+ */
+
+static void vmw_ldu_encoder_destroy(struct drm_encoder *encoder)
+{
+	vmw_ldu_destroy(vmw_encoder_to_ldu(encoder));
+}
+
+static struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
+	.destroy = vmw_ldu_encoder_destroy,
+};
+
+/*
+ * Legacy Display Unit connector functions
+ */
+
+static void vmw_ldu_connector_dpms(struct drm_connector *connector, int mode)
+{
+}
+
+static void vmw_ldu_connector_save(struct drm_connector *connector)
+{
+}
+
+static void vmw_ldu_connector_restore(struct drm_connector *connector)
+{
+}
+
+static enum drm_connector_status
+	vmw_ldu_connector_detect(struct drm_connector *connector)
+{
+	/* XXX vmwctrl should control connection status */
+	if (vmw_connector_to_ldu(connector)->base.unit == 0)
+		return connector_status_connected;
+	return connector_status_disconnected;
+}
+
+static struct drm_display_mode vmw_ldu_connector_builtin[] = {
+	/* 640x480@60Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
+		   752, 800, 0, 480, 489, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 800x600@60Hz */
+	{ DRM_MODE("800x600",
+		   DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+		   40000, 800, 840, 968, 1056, 0, 600, 601, 605, 628,
+		   0, DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0, 768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1152x864@75Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0, 864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x768@60Hz */
+	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
+		   1472, 1664, 0, 768, 771, 778, 798, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x800@60Hz */
+	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
+		   1480, 1680, 0, 800, 803, 809, 831, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1280x960@60Hz */
+	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
+		   1488, 1800, 0, 960, 961, 964, 1000, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x1024@60Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1360x768@60Hz */
+	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
+		   1536, 1792, 0, 768, 771, 777, 795, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x1050@60Hz */
+	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
+		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x900@60Hz */
+	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
+		   1672, 1904, 0, 900, 903, 909, 934, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1600x1200@60Hz */
+	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
+		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1680x1050@60Hz */
+	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
+		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1792x1344@60Hz */
+	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
+		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1853x1392@60Hz */
+	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
+		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1200@60Hz */
+	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
+		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1440@60Hz */
+	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
+		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 2560x1600@60Hz */
+	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
+		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* Terminate */
+	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
+};
+
+static int vmw_ldu_connector_fill_modes(struct drm_connector *connector,
+					uint32_t max_width, uint32_t max_height)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode = NULL;
+	int i;
+
+	for (i = 0; vmw_ldu_connector_builtin[i].type != 0; i++) {
+		if (vmw_ldu_connector_builtin[i].hdisplay > max_width ||
+		    vmw_ldu_connector_builtin[i].vdisplay > max_height)
+			continue;
+
+		mode = drm_mode_duplicate(dev, &vmw_ldu_connector_builtin[i]);
+		if (!mode)
+			return 0;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_probed_add(connector, mode);
+	}
+
+	drm_mode_connector_list_update(connector);
+
+	return 1;
+}
+
+static int vmw_ldu_connector_set_property(struct drm_connector *connector,
+					  struct drm_property *property,
+					  uint64_t val)
+{
+	return 0;
+}
+
+static void vmw_ldu_connector_destroy(struct drm_connector *connector)
+{
+	vmw_ldu_destroy(vmw_connector_to_ldu(connector));
+}
+
+static struct drm_connector_funcs vmw_legacy_connector_funcs = {
+	.dpms = vmw_ldu_connector_dpms,
+	.save = vmw_ldu_connector_save,
+	.restore = vmw_ldu_connector_restore,
+	.detect = vmw_ldu_connector_detect,
+	.fill_modes = vmw_ldu_connector_fill_modes,
+	.set_property = vmw_ldu_connector_set_property,
+	.destroy = vmw_ldu_connector_destroy,
+};
+
+static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
+{
+	struct vmw_legacy_display_unit *ldu;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct drm_crtc *crtc;
+
+	ldu = kzalloc(sizeof(*ldu), GFP_KERNEL);
+	if (!ldu)
+		return -ENOMEM;
+
+	ldu->unit = unit;
+	crtc = &ldu->base.crtc;
+	encoder = &ldu->base.encoder;
+	connector = &ldu->base.connector;
+
+	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
+			   DRM_MODE_CONNECTOR_LVDS);
+	/* Initial status */
+	if (unit == 0)
+		connector->status = connector_status_connected;
+	else
+		connector->status = connector_status_disconnected;
+
+	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+	drm_mode_connector_attach_encoder(connector, encoder);
+	encoder->possible_crtcs = (1 << unit);
+	encoder->possible_clones = 0;
+
+	INIT_LIST_HEAD(&ldu->active);
+
+	drm_crtc_init(dev, crtc, &vmw_legacy_crtc_funcs);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dirty_info_property,
+				      1);
+
+	return 0;
+}
+
+int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
+{
+	if (dev_priv->ldu_priv) {
+		DRM_INFO("ldu system already on\n");
+		return -EINVAL;
+	}
+
+	dev_priv->ldu_priv = kmalloc(GFP_KERNEL, sizeof(*dev_priv->ldu_priv));
+
+	if (!dev_priv->ldu_priv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dev_priv->ldu_priv->active);
+	dev_priv->ldu_priv->num_active = 0;
+	dev_priv->ldu_priv->fb = NULL;
+
+	drm_mode_create_dirty_info_property(dev_priv->dev);
+
+	vmw_ldu_init(dev_priv, 0);
+	vmw_ldu_init(dev_priv, 1);
+	vmw_ldu_init(dev_priv, 2);
+	vmw_ldu_init(dev_priv, 3);
+	vmw_ldu_init(dev_priv, 4);
+	vmw_ldu_init(dev_priv, 5);
+	vmw_ldu_init(dev_priv, 6);
+	vmw_ldu_init(dev_priv, 7);
+
+	return 0;
+}
+
+int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv)
+{
+	if (!dev_priv->ldu_priv)
+		return -ENOSYS;
+
+	BUG_ON(!list_empty(&dev_priv->ldu_priv->active));
+
+	kfree(dev_priv->ldu_priv);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
new file mode 100644
index 000000000000..bb6e6a096d25
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -0,0 +1,634 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+#include "ttm/ttm_placement.h"
+
+#include "svga_overlay.h"
+#include "svga_escape.h"
+
+#define VMW_MAX_NUM_STREAMS 1
+
+struct vmw_stream {
+	struct vmw_dma_buffer *buf;
+	bool claimed;
+	bool paused;
+	struct drm_vmw_control_stream_arg saved;
+};
+
+/**
+ * Overlay control
+ */
+struct vmw_overlay {
+	/*
+	 * Each stream is a single overlay. In Xv these are called ports.
+	 */
+	struct mutex mutex;
+	struct vmw_stream stream[VMW_MAX_NUM_STREAMS];
+};
+
+static inline struct vmw_overlay *vmw_overlay(struct drm_device *dev)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	return dev_priv ? dev_priv->overlay_priv : NULL;
+}
+
+struct vmw_escape_header {
+	uint32_t cmd;
+	SVGAFifoCmdEscape body;
+};
+
+struct vmw_escape_video_flush {
+	struct vmw_escape_header escape;
+	SVGAEscapeVideoFlush flush;
+};
+
+static inline void fill_escape(struct vmw_escape_header *header,
+			       uint32_t size)
+{
+	header->cmd = SVGA_CMD_ESCAPE;
+	header->body.nsid = SVGA_ESCAPE_NSID_VMWARE;
+	header->body.size = size;
+}
+
+static inline void fill_flush(struct vmw_escape_video_flush *cmd,
+			      uint32_t stream_id)
+{
+	fill_escape(&cmd->escape, sizeof(cmd->flush));
+	cmd->flush.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_FLUSH;
+	cmd->flush.streamId = stream_id;
+}
+
+/**
+ * Pin or unpin a buffer in vram.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to pin or unpin.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Takes the current masters ttm lock in read.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
+				  struct vmw_dma_buffer *buf,
+				  bool pin, bool interruptible)
+{
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_bo_global *glob = bo->glob;
+	struct ttm_placement *overlay_placement = &vmw_vram_placement;
+	int ret;
+
+	ret = ttm_read_lock(&dev_priv->active_master->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	if (buf->gmr_bound) {
+		vmw_gmr_unbind(dev_priv, buf->gmr_id);
+		spin_lock(&glob->lru_lock);
+		ida_remove(&dev_priv->gmr_ida, buf->gmr_id);
+		spin_unlock(&glob->lru_lock);
+		buf->gmr_bound = NULL;
+	}
+
+	if (pin)
+		overlay_placement = &vmw_vram_ne_placement;
+
+	ret = ttm_bo_validate(bo, overlay_placement, interruptible, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_read_unlock(&dev_priv->active_master->lock);
+
+	return ret;
+}
+
+/**
+ * Send put command to hw.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+static int vmw_overlay_send_put(struct vmw_private *dev_priv,
+				struct vmw_dma_buffer *buf,
+				struct drm_vmw_control_stream_arg *arg,
+				bool interruptible)
+{
+	struct {
+		struct vmw_escape_header escape;
+		struct {
+			struct {
+				uint32_t cmdType;
+				uint32_t streamId;
+			} header;
+			struct {
+				uint32_t registerId;
+				uint32_t value;
+			} items[SVGA_VIDEO_PITCH_3 + 1];
+		} body;
+		struct vmw_escape_video_flush flush;
+	} *cmds;
+	uint32_t offset;
+	int i, ret;
+
+	for (;;) {
+		cmds = vmw_fifo_reserve(dev_priv, sizeof(*cmds));
+		if (cmds)
+			break;
+
+		ret = vmw_fallback_wait(dev_priv, false, true, 0,
+					interruptible, 3*HZ);
+		if (interruptible && ret == -ERESTARTSYS)
+			return ret;
+		else
+			BUG_ON(ret != 0);
+	}
+
+	fill_escape(&cmds->escape, sizeof(cmds->body));
+	cmds->body.header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
+	cmds->body.header.streamId = arg->stream_id;
+
+	for (i = 0; i <= SVGA_VIDEO_PITCH_3; i++)
+		cmds->body.items[i].registerId = i;
+
+	offset = buf->base.offset + arg->offset;
+
+	cmds->body.items[SVGA_VIDEO_ENABLED].value     = true;
+	cmds->body.items[SVGA_VIDEO_FLAGS].value       = arg->flags;
+	cmds->body.items[SVGA_VIDEO_DATA_OFFSET].value = offset;
+	cmds->body.items[SVGA_VIDEO_FORMAT].value      = arg->format;
+	cmds->body.items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
+	cmds->body.items[SVGA_VIDEO_SIZE].value        = arg->size;
+	cmds->body.items[SVGA_VIDEO_WIDTH].value       = arg->width;
+	cmds->body.items[SVGA_VIDEO_HEIGHT].value      = arg->height;
+	cmds->body.items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
+	cmds->body.items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
+	cmds->body.items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
+	cmds->body.items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
+	cmds->body.items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
+	cmds->body.items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
+	cmds->body.items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
+	cmds->body.items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
+	cmds->body.items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
+	cmds->body.items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
+	cmds->body.items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
+
+	fill_flush(&cmds->flush, arg->stream_id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmds));
+
+	return 0;
+}
+
+/**
+ * Send stop command to hw.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
+				 uint32_t stream_id,
+				 bool interruptible)
+{
+	struct {
+		struct vmw_escape_header escape;
+		SVGAEscapeVideoSetRegs body;
+		struct vmw_escape_video_flush flush;
+	} *cmds;
+	int ret;
+
+	for (;;) {
+		cmds = vmw_fifo_reserve(dev_priv, sizeof(*cmds));
+		if (cmds)
+			break;
+
+		ret = vmw_fallback_wait(dev_priv, false, true, 0,
+					interruptible, 3*HZ);
+		if (interruptible && ret == -ERESTARTSYS)
+			return ret;
+		else
+			BUG_ON(ret != 0);
+	}
+
+	fill_escape(&cmds->escape, sizeof(cmds->body));
+	cmds->body.header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
+	cmds->body.header.streamId = stream_id;
+	cmds->body.items[0].registerId = SVGA_VIDEO_ENABLED;
+	cmds->body.items[0].value = false;
+	fill_flush(&cmds->flush, stream_id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmds));
+
+	return 0;
+}
+
+/**
+ * Stop or pause a stream.
+ *
+ * If the stream is paused the no evict flag is removed from the buffer
+ * but left in vram. This allows for instance mode_set to evict it
+ * should it need to.
+ *
+ * The caller must hold the overlay lock.
+ *
+ * @stream_id which stream to stop/pause.
+ * @pause true to pause, false to stop completely.
+ */
+static int vmw_overlay_stop(struct vmw_private *dev_priv,
+			    uint32_t stream_id, bool pause,
+			    bool interruptible)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	struct vmw_stream *stream = &overlay->stream[stream_id];
+	int ret;
+
+	/* no buffer attached the stream is completely stopped */
+	if (!stream->buf)
+		return 0;
+
+	/* If the stream is paused this is already done */
+	if (!stream->paused) {
+		ret = vmw_overlay_send_stop(dev_priv, stream_id,
+					    interruptible);
+		if (ret)
+			return ret;
+
+		/* We just remove the NO_EVICT flag so no -ENOMEM */
+		ret = vmw_dmabuf_pin_in_vram(dev_priv, stream->buf, false,
+					     interruptible);
+		if (interruptible && ret == -ERESTARTSYS)
+			return ret;
+		else
+			BUG_ON(ret != 0);
+	}
+
+	if (!pause) {
+		vmw_dmabuf_unreference(&stream->buf);
+		stream->paused = false;
+	} else {
+		stream->paused = true;
+	}
+
+	return 0;
+}
+
+/**
+ * Update a stream and send any put or stop fifo commands needed.
+ *
+ * The caller must hold the overlay lock.
+ *
+ * Returns
+ * -ENOMEM if buffer doesn't fit in vram.
+ * -ERESTARTSYS if interrupted.
+ */
+static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
+				     struct vmw_dma_buffer *buf,
+				     struct drm_vmw_control_stream_arg *arg,
+				     bool interruptible)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	struct vmw_stream *stream = &overlay->stream[arg->stream_id];
+	int ret = 0;
+
+	if (!buf)
+		return -EINVAL;
+
+	DRM_DEBUG("   %s: old %p, new %p, %spaused\n", __func__,
+		  stream->buf, buf, stream->paused ? "" : "not ");
+
+	if (stream->buf != buf) {
+		ret = vmw_overlay_stop(dev_priv, arg->stream_id,
+				       false, interruptible);
+		if (ret)
+			return ret;
+	} else if (!stream->paused) {
+		/* If the buffers match and not paused then just send
+		 * the put command, no need to do anything else.
+		 */
+		ret = vmw_overlay_send_put(dev_priv, buf, arg, interruptible);
+		if (ret == 0)
+			stream->saved = *arg;
+		else
+			BUG_ON(!interruptible);
+
+		return ret;
+	}
+
+	/* We don't start the old stream if we are interrupted.
+	 * Might return -ENOMEM if it can't fit the buffer in vram.
+	 */
+	ret = vmw_dmabuf_pin_in_vram(dev_priv, buf, true, interruptible);
+	if (ret)
+		return ret;
+
+	ret = vmw_overlay_send_put(dev_priv, buf, arg, interruptible);
+	if (ret) {
+		/* This one needs to happen no matter what. We only remove
+		 * the NO_EVICT flag so this is safe from -ENOMEM.
+		 */
+		BUG_ON(vmw_dmabuf_pin_in_vram(dev_priv, buf, false, false) != 0);
+		return ret;
+	}
+
+	if (stream->buf != buf)
+		stream->buf = vmw_dmabuf_reference(buf);
+	stream->saved = *arg;
+
+	return 0;
+}
+
+/**
+ * Stop all streams.
+ *
+ * Used by the fb code when starting.
+ *
+ * Takes the overlay lock.
+ */
+int vmw_overlay_stop_all(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	int i, ret;
+
+	if (!overlay)
+		return 0;
+
+	mutex_lock(&overlay->mutex);
+
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+		struct vmw_stream *stream = &overlay->stream[i];
+		if (!stream->buf)
+			continue;
+
+		ret = vmw_overlay_stop(dev_priv, i, false, false);
+		WARN_ON(ret != 0);
+	}
+
+	mutex_unlock(&overlay->mutex);
+
+	return 0;
+}
+
+/**
+ * Try to resume all paused streams.
+ *
+ * Used by the kms code after moving a new scanout buffer to vram.
+ *
+ * Takes the overlay lock.
+ */
+int vmw_overlay_resume_all(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	int i, ret;
+
+	if (!overlay)
+		return 0;
+
+	mutex_lock(&overlay->mutex);
+
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+		struct vmw_stream *stream = &overlay->stream[i];
+		if (!stream->paused)
+			continue;
+
+		ret = vmw_overlay_update_stream(dev_priv, stream->buf,
+						&stream->saved, false);
+		if (ret != 0)
+			DRM_INFO("%s: *warning* failed to resume stream %i\n",
+				 __func__, i);
+	}
+
+	mutex_unlock(&overlay->mutex);
+
+	return 0;
+}
+
+/**
+ * Pauses all active streams.
+ *
+ * Used by the kms code when moving a new scanout buffer to vram.
+ *
+ * Takes the overlay lock.
+ */
+int vmw_overlay_pause_all(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	int i, ret;
+
+	if (!overlay)
+		return 0;
+
+	mutex_lock(&overlay->mutex);
+
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+		if (overlay->stream[i].paused)
+			DRM_INFO("%s: *warning* stream %i already paused\n",
+				 __func__, i);
+		ret = vmw_overlay_stop(dev_priv, i, true, false);
+		WARN_ON(ret != 0);
+	}
+
+	mutex_unlock(&overlay->mutex);
+
+	return 0;
+}
+
+int vmw_overlay_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	struct drm_vmw_control_stream_arg *arg =
+	    (struct drm_vmw_control_stream_arg *)data;
+	struct vmw_dma_buffer *buf;
+	struct vmw_resource *res;
+	int ret;
+
+	if (!overlay)
+		return -ENOSYS;
+
+	ret = vmw_user_stream_lookup(dev_priv, tfile, &arg->stream_id, &res);
+	if (ret)
+		return ret;
+
+	mutex_lock(&overlay->mutex);
+
+	if (!arg->enabled) {
+		ret = vmw_overlay_stop(dev_priv, arg->stream_id, false, true);
+		goto out_unlock;
+	}
+
+	ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf);
+	if (ret)
+		goto out_unlock;
+
+	ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
+
+	vmw_dmabuf_unreference(&buf);
+
+out_unlock:
+	mutex_unlock(&overlay->mutex);
+	vmw_resource_unreference(&res);
+
+	return ret;
+}
+
+int vmw_overlay_num_overlays(struct vmw_private *dev_priv)
+{
+	if (!dev_priv->overlay_priv)
+		return 0;
+
+	return VMW_MAX_NUM_STREAMS;
+}
+
+int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	int i, k;
+
+	if (!overlay)
+		return 0;
+
+	mutex_lock(&overlay->mutex);
+
+	for (i = 0, k = 0; i < VMW_MAX_NUM_STREAMS; i++)
+		if (!overlay->stream[i].claimed)
+			k++;
+
+	mutex_unlock(&overlay->mutex);
+
+	return k;
+}
+
+int vmw_overlay_claim(struct vmw_private *dev_priv, uint32_t *out)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	int i;
+
+	if (!overlay)
+		return -ENOSYS;
+
+	mutex_lock(&overlay->mutex);
+
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+
+		if (overlay->stream[i].claimed)
+			continue;
+
+		overlay->stream[i].claimed = true;
+		*out = i;
+		mutex_unlock(&overlay->mutex);
+		return 0;
+	}
+
+	mutex_unlock(&overlay->mutex);
+	return -ESRCH;
+}
+
+int vmw_overlay_unref(struct vmw_private *dev_priv, uint32_t stream_id)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+
+	BUG_ON(stream_id >= VMW_MAX_NUM_STREAMS);
+
+	if (!overlay)
+		return -ENOSYS;
+
+	mutex_lock(&overlay->mutex);
+
+	WARN_ON(!overlay->stream[stream_id].claimed);
+	vmw_overlay_stop(dev_priv, stream_id, false, false);
+	overlay->stream[stream_id].claimed = false;
+
+	mutex_unlock(&overlay->mutex);
+	return 0;
+}
+
+int vmw_overlay_init(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay;
+	int i;
+
+	if (dev_priv->overlay_priv)
+		return -EINVAL;
+
+	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_VIDEO) &&
+	     (dev_priv->fifo.capabilities & SVGA_FIFO_CAP_ESCAPE)) {
+		DRM_INFO("hardware doesn't support overlays\n");
+		return -ENOSYS;
+	}
+
+	overlay = kmalloc(GFP_KERNEL, sizeof(*overlay));
+	if (!overlay)
+		return -ENOMEM;
+
+	memset(overlay, 0, sizeof(*overlay));
+	mutex_init(&overlay->mutex);
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+		overlay->stream[i].buf = NULL;
+		overlay->stream[i].paused = false;
+		overlay->stream[i].claimed = false;
+	}
+
+	dev_priv->overlay_priv = overlay;
+
+	return 0;
+}
+
+int vmw_overlay_close(struct vmw_private *dev_priv)
+{
+	struct vmw_overlay *overlay = dev_priv->overlay_priv;
+	bool forgotten_buffer = false;
+	int i;
+
+	if (!overlay)
+		return -ENOSYS;
+
+	for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) {
+		if (overlay->stream[i].buf) {
+			forgotten_buffer = true;
+			vmw_overlay_stop(dev_priv, i, false, false);
+		}
+	}
+
+	WARN_ON(forgotten_buffer);
+
+	dev_priv->overlay_priv = NULL;
+	kfree(overlay);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_reg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_reg.h
new file mode 100644
index 000000000000..9d0dd3a342eb
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_reg.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * This file contains virtual hardware defines for kernel space.
+ */
+
+#ifndef _VMWGFX_REG_H_
+#define _VMWGFX_REG_H_
+
+#include <linux/types.h>
+
+#define VMWGFX_INDEX_PORT     0x0
+#define VMWGFX_VALUE_PORT     0x1
+#define VMWGFX_IRQSTATUS_PORT 0x8
+
+struct svga_guest_mem_descriptor {
+	__le32 ppn;
+	__le32 num_pages;
+};
+
+struct svga_fifo_cmd_fence {
+	__le32 fence;
+};
+
+#define SVGA_SYNC_GENERIC         1
+#define SVGA_SYNC_FIFOFULL        2
+
+#include "svga_types.h"
+
+#include "svga3d_reg.h"
+
+#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
new file mode 100644
index 000000000000..a1ceed0c8e07
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -0,0 +1,1192 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "vmwgfx_drm.h"
+#include "ttm/ttm_object.h"
+#include "ttm/ttm_placement.h"
+#include "drmP.h"
+
+#define VMW_RES_CONTEXT ttm_driver_type0
+#define VMW_RES_SURFACE ttm_driver_type1
+#define VMW_RES_STREAM ttm_driver_type2
+
+struct vmw_user_context {
+	struct ttm_base_object base;
+	struct vmw_resource res;
+};
+
+struct vmw_user_surface {
+	struct ttm_base_object base;
+	struct vmw_surface srf;
+};
+
+struct vmw_user_dma_buffer {
+	struct ttm_base_object base;
+	struct vmw_dma_buffer dma;
+};
+
+struct vmw_bo_user_rep {
+	uint32_t handle;
+	uint64_t map_handle;
+};
+
+struct vmw_stream {
+	struct vmw_resource res;
+	uint32_t stream_id;
+};
+
+struct vmw_user_stream {
+	struct ttm_base_object base;
+	struct vmw_stream stream;
+};
+
+static inline struct vmw_dma_buffer *
+vmw_dma_buffer(struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct vmw_dma_buffer, base);
+}
+
+static inline struct vmw_user_dma_buffer *
+vmw_user_dma_buffer(struct ttm_buffer_object *bo)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+	return container_of(vmw_bo, struct vmw_user_dma_buffer, dma);
+}
+
+struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
+{
+	kref_get(&res->kref);
+	return res;
+}
+
+static void vmw_resource_release(struct kref *kref)
+{
+	struct vmw_resource *res =
+	    container_of(kref, struct vmw_resource, kref);
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	idr_remove(res->idr, res->id);
+	write_unlock(&dev_priv->resource_lock);
+
+	if (likely(res->hw_destroy != NULL))
+		res->hw_destroy(res);
+
+	if (res->res_free != NULL)
+		res->res_free(res);
+	else
+		kfree(res);
+
+	write_lock(&dev_priv->resource_lock);
+}
+
+void vmw_resource_unreference(struct vmw_resource **p_res)
+{
+	struct vmw_resource *res = *p_res;
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	*p_res = NULL;
+	write_lock(&dev_priv->resource_lock);
+	kref_put(&res->kref, vmw_resource_release);
+	write_unlock(&dev_priv->resource_lock);
+}
+
+static int vmw_resource_init(struct vmw_private *dev_priv,
+			     struct vmw_resource *res,
+			     struct idr *idr,
+			     enum ttm_object_type obj_type,
+			     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+
+	kref_init(&res->kref);
+	res->hw_destroy = NULL;
+	res->res_free = res_free;
+	res->res_type = obj_type;
+	res->idr = idr;
+	res->avail = false;
+	res->dev_priv = dev_priv;
+
+	do {
+		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
+			return -ENOMEM;
+
+		write_lock(&dev_priv->resource_lock);
+		ret = idr_get_new_above(idr, res, 1, &res->id);
+		write_unlock(&dev_priv->resource_lock);
+
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+/**
+ * vmw_resource_activate
+ *
+ * @res:        Pointer to the newly created resource
+ * @hw_destroy: Destroy function. NULL if none.
+ *
+ * Activate a resource after the hardware has been made aware of it.
+ * Set tye destroy function to @destroy. Typically this frees the
+ * resource and destroys the hardware resources associated with it.
+ * Activate basically means that the function vmw_resource_lookup will
+ * find it.
+ */
+
+static void vmw_resource_activate(struct vmw_resource *res,
+				  void (*hw_destroy) (struct vmw_resource *))
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	write_lock(&dev_priv->resource_lock);
+	res->avail = true;
+	res->hw_destroy = hw_destroy;
+	write_unlock(&dev_priv->resource_lock);
+}
+
+struct vmw_resource *vmw_resource_lookup(struct vmw_private *dev_priv,
+					 struct idr *idr, int id)
+{
+	struct vmw_resource *res;
+
+	read_lock(&dev_priv->resource_lock);
+	res = idr_find(idr, id);
+	if (res && res->avail)
+		kref_get(&res->kref);
+	else
+		res = NULL;
+	read_unlock(&dev_priv->resource_lock);
+
+	if (unlikely(res == NULL))
+		return NULL;
+
+	return res;
+}
+
+/**
+ * Context management:
+ */
+
+static void vmw_hw_context_destroy(struct vmw_resource *res)
+{
+
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDestroyContext body;
+	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "destruction.\n");
+		return;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DESTROY);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
+	cmd->body.cid = cpu_to_le32(res->id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+}
+
+static int vmw_context_init(struct vmw_private *dev_priv,
+			    struct vmw_resource *res,
+			    void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDefineContext body;
+	} *cmd;
+
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
+				VMW_RES_CONTEXT, res_free);
+
+	if (unlikely(ret != 0)) {
+		if (res_free == NULL)
+			kfree(res);
+		else
+			res_free(res);
+		return ret;
+	}
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DEFINE);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
+	cmd->body.cid = cpu_to_le32(res->id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+	vmw_resource_activate(res, vmw_hw_context_destroy);
+	return 0;
+}
+
+struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
+{
+	struct vmw_resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+	int ret;
+
+	if (unlikely(res == NULL))
+		return NULL;
+
+	ret = vmw_context_init(dev_priv, res, NULL);
+	return (ret == 0) ? res : NULL;
+}
+
+/**
+ * User-space context management:
+ */
+
+static void vmw_user_context_free(struct vmw_resource *res)
+{
+	struct vmw_user_context *ctx =
+	    container_of(res, struct vmw_user_context, res);
+
+	kfree(ctx);
+}
+
+/**
+ * This function is called when user space has no more references on the
+ * base object. It releases the base-object's reference on the resource object.
+ */
+
+static void vmw_user_context_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_context *ctx =
+	    container_of(base, struct vmw_user_context, base);
+	struct vmw_resource *res = &ctx->res;
+
+	*p_base = NULL;
+	vmw_resource_unreference(&res);
+}
+
+int vmw_context_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_resource *res;
+	struct vmw_user_context *ctx;
+	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret = 0;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->context_idr, arg->cid);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_context_free) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ctx = container_of(res, struct vmw_user_context, res);
+	if (ctx->base.tfile != tfile && !ctx->base.shareable) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	ttm_ref_object_base_unref(tfile, ctx->base.hash.key, TTM_REF_USAGE);
+out:
+	vmw_resource_unreference(&res);
+	return ret;
+}
+
+int vmw_context_define_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_context *ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret;
+
+	if (unlikely(ctx == NULL))
+		return -ENOMEM;
+
+	res = &ctx->res;
+	ctx->base.shareable = false;
+	ctx->base.tfile = NULL;
+
+	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
+	if (unlikely(ret != 0))
+		return ret;
+
+	tmp = vmw_resource_reference(&ctx->res);
+	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
+				   &vmw_user_context_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		goto out_err;
+	}
+
+	arg->cid = res->id;
+out_err:
+	vmw_resource_unreference(&res);
+	return ret;
+
+}
+
+int vmw_context_check(struct vmw_private *dev_priv,
+		      struct ttm_object_file *tfile,
+		      int id)
+{
+	struct vmw_resource *res;
+	int ret = 0;
+
+	read_lock(&dev_priv->resource_lock);
+	res = idr_find(&dev_priv->context_idr, id);
+	if (res && res->avail) {
+		struct vmw_user_context *ctx =
+			container_of(res, struct vmw_user_context, res);
+		if (ctx->base.tfile != tfile && !ctx->base.shareable)
+			ret = -EPERM;
+	} else
+		ret = -EINVAL;
+	read_unlock(&dev_priv->resource_lock);
+
+	return ret;
+}
+
+
+/**
+ * Surface management.
+ */
+
+static void vmw_hw_surface_destroy(struct vmw_resource *res)
+{
+
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDestroySurface body;
+	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "destruction.\n");
+		return;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DESTROY);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
+	cmd->body.sid = cpu_to_le32(res->id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+}
+
+void vmw_surface_res_free(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+
+	kfree(srf->sizes);
+	kfree(srf->snooper.image);
+	kfree(srf);
+}
+
+int vmw_surface_init(struct vmw_private *dev_priv,
+		     struct vmw_surface *srf,
+		     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDefineSurface body;
+	} *cmd;
+	SVGA3dSize *cmd_size;
+	struct vmw_resource *res = &srf->res;
+	struct drm_vmw_size *src_size;
+	size_t submit_size;
+	uint32_t cmd_len;
+	int i;
+
+	BUG_ON(res_free == NULL);
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
+				VMW_RES_SURFACE, res_free);
+
+	if (unlikely(ret != 0)) {
+		res_free(res);
+		return ret;
+	}
+
+	submit_size = sizeof(*cmd) + srf->num_sizes * sizeof(SVGA3dSize);
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed for create surface.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DEFINE);
+	cmd->header.size = cpu_to_le32(cmd_len);
+	cmd->body.sid = cpu_to_le32(res->id);
+	cmd->body.surfaceFlags = cpu_to_le32(srf->flags);
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		cmd->body.face[i].numMipLevels =
+		    cpu_to_le32(srf->mip_levels[i]);
+	}
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = cpu_to_le32(src_size->width);
+		cmd_size->height = cpu_to_le32(src_size->height);
+		cmd_size->depth = cpu_to_le32(src_size->depth);
+	}
+
+	vmw_fifo_commit(dev_priv, submit_size);
+	vmw_resource_activate(res, vmw_hw_surface_destroy);
+	return 0;
+}
+
+static void vmw_user_surface_free(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+	struct vmw_user_surface *user_srf =
+	    container_of(srf, struct vmw_user_surface, srf);
+
+	kfree(srf->sizes);
+	kfree(srf->snooper.image);
+	kfree(user_srf);
+}
+
+int vmw_user_surface_lookup(struct vmw_private *dev_priv,
+			    struct ttm_object_file *tfile,
+			    int sid, struct vmw_surface **out)
+{
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	struct vmw_user_surface *user_srf;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->surface_idr, sid);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_surface_free)
+		return -EINVAL;
+
+	srf = container_of(res, struct vmw_surface, res);
+	user_srf = container_of(srf, struct vmw_user_surface, srf);
+	if (user_srf->base.tfile != tfile && !user_srf->base.shareable)
+		return -EPERM;
+
+	*out = srf;
+	return 0;
+}
+
+static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_surface *user_srf =
+	    container_of(base, struct vmw_user_surface, base);
+	struct vmw_resource *res = &user_srf->srf.res;
+
+	*p_base = NULL;
+	vmw_resource_unreference(&res);
+}
+
+int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	struct vmw_user_surface *user_srf;
+	struct drm_vmw_surface_arg *arg = (struct drm_vmw_surface_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret = 0;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->surface_idr, arg->sid);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_surface_free) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	srf = container_of(res, struct vmw_surface, res);
+	user_srf = container_of(srf, struct vmw_user_surface, srf);
+	if (user_srf->base.tfile != tfile && !user_srf->base.shareable) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	ttm_ref_object_base_unref(tfile, user_srf->base.hash.key,
+				  TTM_REF_USAGE);
+out:
+	vmw_resource_unreference(&res);
+	return ret;
+}
+
+int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_surface *user_srf =
+	    kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	struct vmw_surface *srf;
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	union drm_vmw_surface_create_arg *arg =
+	    (union drm_vmw_surface_create_arg *)data;
+	struct drm_vmw_surface_create_req *req = &arg->req;
+	struct drm_vmw_surface_arg *rep = &arg->rep;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct drm_vmw_size __user *user_sizes;
+	int ret;
+	int i;
+
+	if (unlikely(user_srf == NULL))
+		return -ENOMEM;
+
+	srf = &user_srf->srf;
+	res = &srf->res;
+
+	srf->flags = req->flags;
+	srf->format = req->format;
+	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
+	srf->num_sizes = 0;
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		srf->num_sizes += srf->mip_levels[i];
+
+	if (srf->num_sizes > DRM_VMW_MAX_SURFACE_FACES *
+	    DRM_VMW_MAX_MIP_LEVELS) {
+		ret = -EINVAL;
+		goto out_err0;
+	}
+
+	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_err0;
+	}
+
+	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
+	    req->size_addr;
+
+	ret = copy_from_user(srf->sizes, user_sizes,
+			     srf->num_sizes * sizeof(*srf->sizes));
+	if (unlikely(ret != 0))
+		goto out_err1;
+
+	user_srf->base.shareable = false;
+	user_srf->base.tfile = NULL;
+
+	/**
+	 * From this point, the generic resource management functions
+	 * destroy the object on failure.
+	 */
+
+	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
+	if (unlikely(ret != 0))
+		return ret;
+
+	tmp = vmw_resource_reference(&srf->res);
+	ret = ttm_base_object_init(tfile, &user_srf->base,
+				   req->shareable, VMW_RES_SURFACE,
+				   &vmw_user_surface_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		vmw_resource_unreference(&res);
+		return ret;
+	}
+
+	if (srf->flags & (1 << 9) &&
+	    srf->num_sizes == 1 &&
+	    srf->sizes[0].width == 64 &&
+	    srf->sizes[0].height == 64 &&
+	    srf->format == SVGA3D_A8R8G8B8) {
+
+		srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL);
+		/* clear the image */
+		if (srf->snooper.image)
+			memset(srf->snooper.image, 0x00, 64 * 64 * 4);
+		else
+			DRM_ERROR("Failed to allocate cursor_image\n");
+
+	} else {
+		srf->snooper.image = NULL;
+	}
+	srf->snooper.crtc = NULL;
+
+	rep->sid = res->id;
+	vmw_resource_unreference(&res);
+	return 0;
+out_err1:
+	kfree(srf->sizes);
+out_err0:
+	kfree(user_srf);
+	return ret;
+}
+
+int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	union drm_vmw_surface_reference_arg *arg =
+	    (union drm_vmw_surface_reference_arg *)data;
+	struct drm_vmw_surface_arg *req = &arg->req;
+	struct drm_vmw_surface_create_req *rep = &arg->rep;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	struct vmw_user_surface *user_srf;
+	struct drm_vmw_size __user *user_sizes;
+	int ret;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->surface_idr, req->sid);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_surface_free) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	srf = container_of(res, struct vmw_surface, res);
+	user_srf = container_of(srf, struct vmw_user_surface, srf);
+	if (user_srf->base.tfile != tfile && !user_srf->base.shareable) {
+		DRM_ERROR("Tried to reference none shareable surface\n");
+		ret = -EPERM;
+		goto out;
+	}
+
+	ret = ttm_ref_object_add(tfile, &user_srf->base, TTM_REF_USAGE, NULL);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not add a reference to a surface.\n");
+		goto out;
+	}
+
+	rep->flags = srf->flags;
+	rep->format = srf->format;
+	memcpy(rep->mip_levels, srf->mip_levels, sizeof(srf->mip_levels));
+	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
+	    rep->size_addr;
+
+	if (user_sizes)
+		ret = copy_to_user(user_sizes, srf->sizes,
+				   srf->num_sizes * sizeof(*srf->sizes));
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("copy_to_user failed %p %u\n",
+			  user_sizes, srf->num_sizes);
+		/**
+		 * FIXME: Unreference surface here?
+		 */
+		goto out;
+	}
+out:
+	vmw_resource_unreference(&res);
+	return ret;
+}
+
+int vmw_surface_check(struct vmw_private *dev_priv,
+		      struct ttm_object_file *tfile,
+		      int id)
+{
+	struct vmw_resource *res;
+	int ret = 0;
+
+	read_lock(&dev_priv->resource_lock);
+	res = idr_find(&dev_priv->surface_idr, id);
+	if (res && res->avail) {
+		struct vmw_surface *srf =
+			container_of(res, struct vmw_surface, res);
+		struct vmw_user_surface *usrf =
+			container_of(srf, struct vmw_user_surface, srf);
+
+		if (usrf->base.tfile != tfile && !usrf->base.shareable)
+			ret = -EPERM;
+	} else
+		ret = -EINVAL;
+	read_unlock(&dev_priv->resource_lock);
+
+	return ret;
+}
+
+/**
+ * Buffer management.
+ */
+
+static size_t vmw_dmabuf_acc_size(struct ttm_bo_global *glob,
+				  unsigned long num_pages)
+{
+	static size_t bo_user_size = ~0;
+
+	size_t page_array_size =
+	    (num_pages * sizeof(void *) + PAGE_SIZE - 1) & PAGE_MASK;
+
+	if (unlikely(bo_user_size == ~0)) {
+		bo_user_size = glob->ttm_bo_extra_size +
+		    ttm_round_pot(sizeof(struct vmw_dma_buffer));
+	}
+
+	return bo_user_size + page_array_size;
+}
+
+void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+	struct ttm_bo_global *glob = bo->glob;
+	struct vmw_private *dev_priv =
+		container_of(bo->bdev, struct vmw_private, bdev);
+
+	ttm_mem_global_free(glob->mem_glob, bo->acc_size);
+	if (vmw_bo->gmr_bound) {
+		vmw_gmr_unbind(dev_priv, vmw_bo->gmr_id);
+		spin_lock(&glob->lru_lock);
+		ida_remove(&dev_priv->gmr_ida, vmw_bo->gmr_id);
+		spin_unlock(&glob->lru_lock);
+	}
+	kfree(vmw_bo);
+}
+
+int vmw_dmabuf_init(struct vmw_private *dev_priv,
+		    struct vmw_dma_buffer *vmw_bo,
+		    size_t size, struct ttm_placement *placement,
+		    bool interruptible,
+		    void (*bo_free) (struct ttm_buffer_object *bo))
+{
+	struct ttm_bo_device *bdev = &dev_priv->bdev;
+	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
+	size_t acc_size;
+	int ret;
+
+	BUG_ON(!bo_free);
+
+	acc_size =
+	    vmw_dmabuf_acc_size(bdev->glob,
+				(size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+
+	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
+	if (unlikely(ret != 0)) {
+		/* we must free the bo here as
+		 * ttm_buffer_object_init does so as well */
+		bo_free(&vmw_bo->base);
+		return ret;
+	}
+
+	memset(vmw_bo, 0, sizeof(*vmw_bo));
+
+	INIT_LIST_HEAD(&vmw_bo->gmr_lru);
+	INIT_LIST_HEAD(&vmw_bo->validate_list);
+	vmw_bo->gmr_id = 0;
+	vmw_bo->gmr_bound = false;
+
+	ret = ttm_bo_init(bdev, &vmw_bo->base, size,
+			  ttm_bo_type_device, placement,
+			  0, 0, interruptible,
+			  NULL, acc_size, bo_free);
+	return ret;
+}
+
+static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo)
+{
+	struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo);
+	struct vmw_dma_buffer *vmw_bo = &vmw_user_bo->dma;
+	struct ttm_bo_global *glob = bo->glob;
+	struct vmw_private *dev_priv =
+		container_of(bo->bdev, struct vmw_private, bdev);
+
+	ttm_mem_global_free(glob->mem_glob, bo->acc_size);
+	if (vmw_bo->gmr_bound) {
+		vmw_gmr_unbind(dev_priv, vmw_bo->gmr_id);
+		spin_lock(&glob->lru_lock);
+		ida_remove(&dev_priv->gmr_ida, vmw_bo->gmr_id);
+		spin_unlock(&glob->lru_lock);
+	}
+	kfree(vmw_user_bo);
+}
+
+static void vmw_user_dmabuf_release(struct ttm_base_object **p_base)
+{
+	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct ttm_base_object *base = *p_base;
+	struct ttm_buffer_object *bo;
+
+	*p_base = NULL;
+
+	if (unlikely(base == NULL))
+		return;
+
+	vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, base);
+	bo = &vmw_user_bo->dma.base;
+	ttm_bo_unref(&bo);
+}
+
+int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	union drm_vmw_alloc_dmabuf_arg *arg =
+	    (union drm_vmw_alloc_dmabuf_arg *)data;
+	struct drm_vmw_alloc_dmabuf_req *req = &arg->req;
+	struct drm_vmw_dmabuf_rep *rep = &arg->rep;
+	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct ttm_buffer_object *tmp;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+	vmw_user_bo = kzalloc(sizeof(*vmw_user_bo), GFP_KERNEL);
+	if (unlikely(vmw_user_bo == NULL))
+		return -ENOMEM;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0)) {
+		kfree(vmw_user_bo);
+		return ret;
+	}
+
+	ret = vmw_dmabuf_init(dev_priv, &vmw_user_bo->dma, req->size,
+			      &vmw_vram_placement, true,
+			      &vmw_user_dmabuf_destroy);
+	if (unlikely(ret != 0))
+		return ret;
+
+	tmp = ttm_bo_reference(&vmw_user_bo->dma.base);
+	ret = ttm_base_object_init(vmw_fpriv(file_priv)->tfile,
+				   &vmw_user_bo->base,
+				   false,
+				   ttm_buffer_type,
+				   &vmw_user_dmabuf_release, NULL);
+	if (unlikely(ret != 0)) {
+		ttm_bo_unref(&tmp);
+	} else {
+		rep->handle = vmw_user_bo->base.hash.key;
+		rep->map_handle = vmw_user_bo->dma.base.addr_space_offset;
+		rep->cur_gmr_id = vmw_user_bo->base.hash.key;
+		rep->cur_gmr_offset = 0;
+	}
+	ttm_bo_unref(&tmp);
+
+	ttm_read_unlock(&vmaster->lock);
+
+	return 0;
+}
+
+int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct drm_vmw_unref_dmabuf_arg *arg =
+	    (struct drm_vmw_unref_dmabuf_arg *)data;
+
+	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					 arg->handle,
+					 TTM_REF_USAGE);
+}
+
+uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
+				  uint32_t cur_validate_node)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+
+	if (likely(vmw_bo->on_validate_list))
+		return vmw_bo->cur_validate_node;
+
+	vmw_bo->cur_validate_node = cur_validate_node;
+	vmw_bo->on_validate_list = true;
+
+	return cur_validate_node;
+}
+
+void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+
+	vmw_bo->on_validate_list = false;
+}
+
+uint32_t vmw_dmabuf_gmr(struct ttm_buffer_object *bo)
+{
+	struct vmw_dma_buffer *vmw_bo;
+
+	if (bo->mem.mem_type == TTM_PL_VRAM)
+		return SVGA_GMR_FRAMEBUFFER;
+
+	vmw_bo = vmw_dma_buffer(bo);
+
+	return (vmw_bo->gmr_bound) ? vmw_bo->gmr_id : SVGA_GMR_NULL;
+}
+
+void vmw_dmabuf_set_gmr(struct ttm_buffer_object *bo, uint32_t id)
+{
+	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+	vmw_bo->gmr_bound = true;
+	vmw_bo->gmr_id = id;
+}
+
+int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
+			   uint32_t handle, struct vmw_dma_buffer **out)
+{
+	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct ttm_base_object *base;
+
+	base = ttm_base_object_lookup(tfile, handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Invalid buffer object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		return -ESRCH;
+	}
+
+	if (unlikely(base->object_type != ttm_buffer_type)) {
+		ttm_base_object_unref(&base);
+		printk(KERN_ERR "Invalid buffer object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		return -EINVAL;
+	}
+
+	vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, base);
+	(void)ttm_bo_reference(&vmw_user_bo->dma.base);
+	ttm_base_object_unref(&base);
+	*out = &vmw_user_bo->dma;
+
+	return 0;
+}
+
+/**
+ * TODO: Implement a gmr id eviction mechanism. Currently we just fail
+ * when we're out of ids, causing GMR space to be allocated
+ * out of VRAM.
+ */
+
+int vmw_gmr_id_alloc(struct vmw_private *dev_priv, uint32_t *p_id)
+{
+	struct ttm_bo_global *glob = dev_priv->bdev.glob;
+	int id;
+	int ret;
+
+	do {
+		if (unlikely(ida_pre_get(&dev_priv->gmr_ida, GFP_KERNEL) == 0))
+			return -ENOMEM;
+
+		spin_lock(&glob->lru_lock);
+		ret = ida_get_new(&dev_priv->gmr_ida, &id);
+		spin_unlock(&glob->lru_lock);
+	} while (ret == -EAGAIN);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(id >= dev_priv->max_gmr_ids)) {
+		spin_lock(&glob->lru_lock);
+		ida_remove(&dev_priv->gmr_ida, id);
+		spin_unlock(&glob->lru_lock);
+		return -EBUSY;
+	}
+
+	*p_id = (uint32_t) id;
+	return 0;
+}
+
+/*
+ * Stream managment
+ */
+
+static void vmw_stream_destroy(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct vmw_stream *stream;
+	int ret;
+
+	DRM_INFO("%s: unref\n", __func__);
+	stream = container_of(res, struct vmw_stream, res);
+
+	ret = vmw_overlay_unref(dev_priv, stream->stream_id);
+	WARN_ON(ret != 0);
+}
+
+static int vmw_stream_init(struct vmw_private *dev_priv,
+			   struct vmw_stream *stream,
+			   void (*res_free) (struct vmw_resource *res))
+{
+	struct vmw_resource *res = &stream->res;
+	int ret;
+
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
+				VMW_RES_STREAM, res_free);
+
+	if (unlikely(ret != 0)) {
+		if (res_free == NULL)
+			kfree(stream);
+		else
+			res_free(&stream->res);
+		return ret;
+	}
+
+	ret = vmw_overlay_claim(dev_priv, &stream->stream_id);
+	if (ret) {
+		vmw_resource_unreference(&res);
+		return ret;
+	}
+
+	DRM_INFO("%s: claimed\n", __func__);
+
+	vmw_resource_activate(&stream->res, vmw_stream_destroy);
+	return 0;
+}
+
+/**
+ * User-space context management:
+ */
+
+static void vmw_user_stream_free(struct vmw_resource *res)
+{
+	struct vmw_user_stream *stream =
+	    container_of(res, struct vmw_user_stream, stream.res);
+
+	kfree(stream);
+}
+
+/**
+ * This function is called when user space has no more references on the
+ * base object. It releases the base-object's reference on the resource object.
+ */
+
+static void vmw_user_stream_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_stream *stream =
+	    container_of(base, struct vmw_user_stream, base);
+	struct vmw_resource *res = &stream->stream.res;
+
+	*p_base = NULL;
+	vmw_resource_unreference(&res);
+}
+
+int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_resource *res;
+	struct vmw_user_stream *stream;
+	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret = 0;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->stream_idr, arg->stream_id);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_stream_free) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	stream = container_of(res, struct vmw_user_stream, stream.res);
+	if (stream->base.tfile != tfile) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ttm_ref_object_base_unref(tfile, stream->base.hash.key, TTM_REF_USAGE);
+out:
+	vmw_resource_unreference(&res);
+	return ret;
+}
+
+int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_stream *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret;
+
+	if (unlikely(stream == NULL))
+		return -ENOMEM;
+
+	res = &stream->stream.res;
+	stream->base.shareable = false;
+	stream->base.tfile = NULL;
+
+	ret = vmw_stream_init(dev_priv, &stream->stream, vmw_user_stream_free);
+	if (unlikely(ret != 0))
+		return ret;
+
+	tmp = vmw_resource_reference(res);
+	ret = ttm_base_object_init(tfile, &stream->base, false, VMW_RES_STREAM,
+				   &vmw_user_stream_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		goto out_err;
+	}
+
+	arg->stream_id = res->id;
+out_err:
+	vmw_resource_unreference(&res);
+	return ret;
+}
+
+int vmw_user_stream_lookup(struct vmw_private *dev_priv,
+			   struct ttm_object_file *tfile,
+			   uint32_t *inout_id, struct vmw_resource **out)
+{
+	struct vmw_user_stream *stream;
+	struct vmw_resource *res;
+	int ret;
+
+	res = vmw_resource_lookup(dev_priv, &dev_priv->stream_idr, *inout_id);
+	if (unlikely(res == NULL))
+		return -EINVAL;
+
+	if (res->res_free != &vmw_user_stream_free) {
+		ret = -EINVAL;
+		goto err_ref;
+	}
+
+	stream = container_of(res, struct vmw_user_stream, stream.res);
+	if (stream->base.tfile != tfile) {
+		ret = -EPERM;
+		goto err_ref;
+	}
+
+	*inout_id = stream->stream.stream_id;
+	*out = res;
+	return 0;
+err_ref:
+	vmw_resource_unreference(&res);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
new file mode 100644
index 000000000000..e3df4adfb4d8
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -0,0 +1,99 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct vmw_private *dev_priv;
+
+	if (unlikely(vma->vm_pgoff < VMWGFX_FILE_PAGE_OFFSET)) {
+		if (vmw_fifo_mmap(filp, vma) == 0)
+			return 0;
+		return drm_mmap(filp, vma);
+	}
+
+	file_priv = (struct drm_file *)filp->private_data;
+	dev_priv = vmw_priv(file_priv->minor->dev);
+	return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
+}
+
+static int vmw_ttm_mem_global_init(struct ttm_global_reference *ref)
+{
+	DRM_INFO("global init.\n");
+	return ttm_mem_global_init(ref->object);
+}
+
+static void vmw_ttm_mem_global_release(struct ttm_global_reference *ref)
+{
+	ttm_mem_global_release(ref->object);
+}
+
+int vmw_ttm_global_init(struct vmw_private *dev_priv)
+{
+	struct ttm_global_reference *global_ref;
+	int ret;
+
+	global_ref = &dev_priv->mem_global_ref;
+	global_ref->global_type = TTM_GLOBAL_TTM_MEM;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &vmw_ttm_mem_global_init;
+	global_ref->release = &vmw_ttm_mem_global_release;
+
+	ret = ttm_global_item_ref(global_ref);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed setting up TTM memory accounting.\n");
+		return ret;
+	}
+
+	dev_priv->bo_global_ref.mem_glob =
+		dev_priv->mem_global_ref.object;
+	global_ref = &dev_priv->bo_global_ref.ref;
+	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_bo_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+		ret = ttm_global_item_ref(global_ref);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed setting up TTM buffer objects.\n");
+		goto out_no_bo;
+	}
+
+	return 0;
+out_no_bo:
+	ttm_global_item_unref(&dev_priv->mem_global_ref);
+	return ret;
+}
+
+void vmw_ttm_global_release(struct vmw_private *dev_priv)
+{
+	ttm_global_item_unref(&dev_priv->bo_global_ref.ref);
+	ttm_global_item_unref(&dev_priv->mem_global_ref);
+}
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index d21b3469f6d7..89f725fe064d 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -101,6 +101,8 @@ source "drivers/staging/p9auth/Kconfig"
 
 source "drivers/staging/line6/Kconfig"
 
+source "drivers/gpu/drm/vmwgfx/Kconfig"
+
 source "drivers/gpu/drm/radeon/Kconfig"
 
 source "drivers/staging/octeon/Kconfig"
diff --git a/include/drm/Kbuild b/include/drm/Kbuild
index b940fdfa3b25..1e83f85be819 100644
--- a/include/drm/Kbuild
+++ b/include/drm/Kbuild
@@ -7,4 +7,5 @@ unifdef-y += r128_drm.h
 unifdef-y += radeon_drm.h
 unifdef-y += sis_drm.h
 unifdef-y += savage_drm.h
+unifdef-y += vmwgfx_drm.h
 unifdef-y += via_drm.h
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
new file mode 100644
index 000000000000..2be7e1249b6f
--- /dev/null
+++ b/include/drm/vmwgfx_drm.h
@@ -0,0 +1,574 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef __VMWGFX_DRM_H__
+#define __VMWGFX_DRM_H__
+
+#define DRM_VMW_MAX_SURFACE_FACES 6
+#define DRM_VMW_MAX_MIP_LEVELS 24
+
+#define DRM_VMW_EXT_NAME_LEN 128
+
+#define DRM_VMW_GET_PARAM            0
+#define DRM_VMW_ALLOC_DMABUF         1
+#define DRM_VMW_UNREF_DMABUF         2
+#define DRM_VMW_CURSOR_BYPASS        3
+/* guarded by DRM_VMW_PARAM_NUM_STREAMS != 0*/
+#define DRM_VMW_CONTROL_STREAM       4
+#define DRM_VMW_CLAIM_STREAM         5
+#define DRM_VMW_UNREF_STREAM         6
+/* guarded by DRM_VMW_PARAM_3D == 1 */
+#define DRM_VMW_CREATE_CONTEXT       7
+#define DRM_VMW_UNREF_CONTEXT        8
+#define DRM_VMW_CREATE_SURFACE       9
+#define DRM_VMW_UNREF_SURFACE        10
+#define DRM_VMW_REF_SURFACE          11
+#define DRM_VMW_EXECBUF              12
+#define DRM_VMW_FIFO_DEBUG           13
+#define DRM_VMW_FENCE_WAIT           14
+
+
+/*************************************************************************/
+/**
+ * DRM_VMW_GET_PARAM - get device information.
+ *
+ * DRM_VMW_PARAM_FIFO_OFFSET:
+ * Offset to use to map the first page of the FIFO read-only.
+ * The fifo is mapped using the mmap() system call on the drm device.
+ *
+ * DRM_VMW_PARAM_OVERLAY_IOCTL:
+ * Does the driver support the overlay ioctl.
+ */
+
+#define DRM_VMW_PARAM_NUM_STREAMS      0
+#define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
+#define DRM_VMW_PARAM_3D               2
+#define DRM_VMW_PARAM_FIFO_OFFSET      3
+
+
+/**
+ * struct drm_vmw_getparam_arg
+ *
+ * @value: Returned value. //Out
+ * @param: Parameter to query. //In.
+ *
+ * Argument to the DRM_VMW_GET_PARAM Ioctl.
+ */
+
+struct drm_vmw_getparam_arg {
+	uint64_t value;
+	uint32_t param;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_EXTENSION - Query device extensions.
+ */
+
+/**
+ * struct drm_vmw_extension_rep
+ *
+ * @exists: The queried extension exists.
+ * @driver_ioctl_offset: Ioctl number of the first ioctl in the extension.
+ * @driver_sarea_offset: Offset to any space in the DRI SAREA
+ * used by the extension.
+ * @major: Major version number of the extension.
+ * @minor: Minor version number of the extension.
+ * @pl: Patch level version number of the extension.
+ *
+ * Output argument to the DRM_VMW_EXTENSION Ioctl.
+ */
+
+struct drm_vmw_extension_rep {
+	int32_t exists;
+	uint32_t driver_ioctl_offset;
+	uint32_t driver_sarea_offset;
+	uint32_t major;
+	uint32_t minor;
+	uint32_t pl;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_extension_arg
+ *
+ * @extension - Ascii name of the extension to be queried. //In
+ * @rep - Reply as defined above. //Out
+ *
+ * Argument to the DRM_VMW_EXTENSION Ioctl.
+ */
+
+union drm_vmw_extension_arg {
+	char extension[DRM_VMW_EXT_NAME_LEN];
+	struct drm_vmw_extension_rep rep;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_CONTEXT - Create a host context.
+ *
+ * Allocates a device unique context id, and queues a create context command
+ * for the host. Does not wait for host completion.
+ */
+
+/**
+ * struct drm_vmw_context_arg
+ *
+ * @cid: Device unique context ID.
+ *
+ * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl.
+ * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl.
+ */
+
+struct drm_vmw_context_arg {
+	int32_t cid;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_CONTEXT - Create a host context.
+ *
+ * Frees a global context id, and queues a destroy host command for the host.
+ * Does not wait for host completion. The context ID can be used directly
+ * in the command stream and shows up as the same context ID on the host.
+ */
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_SURFACE - Create a host suface.
+ *
+ * Allocates a device unique surface id, and queues a create surface command
+ * for the host. Does not wait for host completion. The surface ID can be
+ * used directly in the command stream and shows up as the same surface
+ * ID on the host.
+ */
+
+/**
+ * struct drm_wmv_surface_create_req
+ *
+ * @flags: Surface flags as understood by the host.
+ * @format: Surface format as understood by the host.
+ * @mip_levels: Number of mip levels for each face.
+ * An unused face should have 0 encoded.
+ * @size_addr: Address of a user-space array of sruct drm_vmw_size
+ * cast to an uint64_t for 32-64 bit compatibility.
+ * The size of the array should equal the total number of mipmap levels.
+ * @shareable: Boolean whether other clients (as identified by file descriptors)
+ * may reference this surface.
+ *
+ * Input data to the DRM_VMW_CREATE_SURFACE Ioctl.
+ * Output data from the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+struct drm_vmw_surface_create_req {
+	uint32_t flags;
+	uint32_t format;
+	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	uint64_t size_addr;
+	int32_t shareable;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_wmv_surface_arg
+ *
+ * @sid: Surface id of created surface or surface to destroy or reference.
+ *
+ * Output data from the DRM_VMW_CREATE_SURFACE Ioctl.
+ * Input argument to the DRM_VMW_UNREF_SURFACE Ioctl.
+ * Input argument to the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+struct drm_vmw_surface_arg {
+	int32_t sid;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_vmw_size ioctl.
+ *
+ * @width - mip level width
+ * @height - mip level height
+ * @depth - mip level depth
+ *
+ * Description of a mip level.
+ * Input data to the DRM_WMW_CREATE_SURFACE Ioctl.
+ */
+
+struct drm_vmw_size {
+	uint32_t width;
+	uint32_t height;
+	uint32_t depth;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_surface_create_arg
+ *
+ * @rep: Output data as described above.
+ * @req: Input data as described above.
+ *
+ * Argument to the DRM_VMW_CREATE_SURFACE Ioctl.
+ */
+
+union drm_vmw_surface_create_arg {
+	struct drm_vmw_surface_arg rep;
+	struct drm_vmw_surface_create_req req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_REF_SURFACE - Reference a host surface.
+ *
+ * Puts a reference on a host surface with a give sid, as previously
+ * returned by the DRM_VMW_CREATE_SURFACE ioctl.
+ * A reference will make sure the surface isn't destroyed while we hold
+ * it and will allow the calling client to use the surface ID in the command
+ * stream.
+ *
+ * On successful return, the Ioctl returns the surface information given
+ * in the DRM_VMW_CREATE_SURFACE ioctl.
+ */
+
+/**
+ * union drm_vmw_surface_reference_arg
+ *
+ * @rep: Output data as described above.
+ * @req: Input data as described above.
+ *
+ * Argument to the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+union drm_vmw_surface_reference_arg {
+	struct drm_vmw_surface_create_req rep;
+	struct drm_vmw_surface_arg req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_SURFACE - Unreference a host surface.
+ *
+ * Clear a reference previously put on a host surface.
+ * When all references are gone, including the one implicitly placed
+ * on creation,
+ * a destroy surface command will be queued for the host.
+ * Does not wait for completion.
+ */
+
+/*************************************************************************/
+/**
+ * DRM_VMW_EXECBUF
+ *
+ * Submit a command buffer for execution on the host, and return a
+ * fence sequence that when signaled, indicates that the command buffer has
+ * executed.
+ */
+
+/**
+ * struct drm_vmw_execbuf_arg
+ *
+ * @commands: User-space address of a command buffer cast to an uint64_t.
+ * @command-size: Size in bytes of the command buffer.
+ * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
+ * uint64_t.
+ *
+ * Argument to the DRM_VMW_EXECBUF Ioctl.
+ */
+
+struct drm_vmw_execbuf_arg {
+	uint64_t commands;
+	uint32_t command_size;
+	uint32_t pad64;
+	uint64_t fence_rep;
+};
+
+/**
+ * struct drm_vmw_fence_rep
+ *
+ * @fence_seq: Fence sequence associated with a command submission.
+ * @error: This member should've been set to -EFAULT on submission.
+ * The following actions should be take on completion:
+ * error == -EFAULT: Fence communication failed. The host is synchronized.
+ * Use the last fence id read from the FIFO fence register.
+ * error != 0 && error != -EFAULT:
+ * Fence submission failed. The host is synchronized. Use the fence_seq member.
+ * error == 0: All is OK, The host may not be synchronized.
+ * Use the fence_seq member.
+ *
+ * Input / Output data to the DRM_VMW_EXECBUF Ioctl.
+ */
+
+struct drm_vmw_fence_rep {
+	uint64_t fence_seq;
+	int32_t error;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_ALLOC_DMABUF
+ *
+ * Allocate a DMA buffer that is visible also to the host.
+ * NOTE: The buffer is
+ * identified by a handle and an offset, which are private to the guest, but
+ * useable in the command stream. The guest kernel may translate these
+ * and patch up the command stream accordingly. In the future, the offset may
+ * be zero at all times, or it may disappear from the interface before it is
+ * fixed.
+ *
+ * The DMA buffer may stay user-space mapped in the guest at all times,
+ * and is thus suitable for sub-allocation.
+ *
+ * DMA buffers are mapped using the mmap() syscall on the drm device.
+ */
+
+/**
+ * struct drm_vmw_alloc_dmabuf_req
+ *
+ * @size: Required minimum size of the buffer.
+ *
+ * Input data to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+struct drm_vmw_alloc_dmabuf_req {
+	uint32_t size;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_vmw_dmabuf_rep
+ *
+ * @map_handle: Offset to use in the mmap() call used to map the buffer.
+ * @handle: Handle unique to this buffer. Used for unreferencing.
+ * @cur_gmr_id: GMR id to use in the command stream when this buffer is
+ * referenced. See not above.
+ * @cur_gmr_offset: Offset to use in the command stream when this buffer is
+ * referenced. See note above.
+ *
+ * Output data from the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+struct drm_vmw_dmabuf_rep {
+	uint64_t map_handle;
+	uint32_t handle;
+	uint32_t cur_gmr_id;
+	uint32_t cur_gmr_offset;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_dmabuf_arg
+ *
+ * @req: Input data as described above.
+ * @rep: Output data as described above.
+ *
+ * Argument to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+union drm_vmw_alloc_dmabuf_arg {
+	struct drm_vmw_alloc_dmabuf_req req;
+	struct drm_vmw_dmabuf_rep rep;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_DMABUF - Free a DMA buffer.
+ *
+ */
+
+/**
+ * struct drm_vmw_unref_dmabuf_arg
+ *
+ * @handle: Handle indicating what buffer to free. Obtained from the
+ * DRM_VMW_ALLOC_DMABUF Ioctl.
+ *
+ * Argument to the DRM_VMW_UNREF_DMABUF Ioctl.
+ */
+
+struct drm_vmw_unref_dmabuf_arg {
+	uint32_t handle;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FIFO_DEBUG - Get last FIFO submission.
+ *
+ * This IOCTL copies the last FIFO submission directly out of the FIFO buffer.
+ */
+
+/**
+ * struct drm_vmw_fifo_debug_arg
+ *
+ * @debug_buffer: User space address of a debug_buffer cast to an uint64_t //In
+ * @debug_buffer_size: Size in bytes of debug buffer //In
+ * @used_size: Number of bytes copied to the buffer // Out
+ * @did_not_fit: Boolean indicating that the fifo contents did not fit. //Out
+ *
+ * Argument to the DRM_VMW_FIFO_DEBUG Ioctl.
+ */
+
+struct drm_vmw_fifo_debug_arg {
+	uint64_t debug_buffer;
+	uint32_t debug_buffer_size;
+	uint32_t used_size;
+	int32_t did_not_fit;
+	uint32_t pad64;
+};
+
+struct drm_vmw_fence_wait_arg {
+	uint64_t sequence;
+	uint64_t kernel_cookie;
+	int32_t cookie_valid;
+	int32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CONTROL_STREAM - Control overlays, aka streams.
+ *
+ * This IOCTL controls the overlay units of the svga device.
+ * The SVGA overlay units does not work like regular hardware units in
+ * that they do not automaticaly read back the contents of the given dma
+ * buffer. But instead only read back for each call to this ioctl, and
+ * at any point between this call being made and a following call that
+ * either changes the buffer or disables the stream.
+ */
+
+/**
+ * struct drm_vmw_rect
+ *
+ * Defines a rectangle. Used in the overlay ioctl to define
+ * source and destination rectangle.
+ */
+
+struct drm_vmw_rect {
+	int32_t x;
+	int32_t y;
+	uint32_t w;
+	uint32_t h;
+};
+
+/**
+ * struct drm_vmw_control_stream_arg
+ *
+ * @stream_id: Stearm to control
+ * @enabled: If false all following arguments are ignored.
+ * @handle: Handle to buffer for getting data from.
+ * @format: Format of the overlay as understood by the host.
+ * @width: Width of the overlay.
+ * @height: Height of the overlay.
+ * @size: Size of the overlay in bytes.
+ * @pitch: Array of pitches, the two last are only used for YUV12 formats.
+ * @offset: Offset from start of dma buffer to overlay.
+ * @src: Source rect, must be within the defined area above.
+ * @dst: Destination rect, x and y may be negative.
+ *
+ * Argument to the DRM_VMW_CONTROL_STREAM Ioctl.
+ */
+
+struct drm_vmw_control_stream_arg {
+	uint32_t stream_id;
+	uint32_t enabled;
+
+	uint32_t flags;
+	uint32_t color_key;
+
+	uint32_t handle;
+	uint32_t offset;
+	int32_t format;
+	uint32_t size;
+	uint32_t width;
+	uint32_t height;
+	uint32_t pitch[3];
+
+	uint32_t pad64;
+	struct drm_vmw_rect src;
+	struct drm_vmw_rect dst;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CURSOR_BYPASS - Give extra information about cursor bypass.
+ *
+ */
+
+#define DRM_VMW_CURSOR_BYPASS_ALL    (1 << 0)
+#define DRM_VMW_CURSOR_BYPASS_FLAGS       (1)
+
+/**
+ * struct drm_vmw_cursor_bypass_arg
+ *
+ * @flags: Flags.
+ * @crtc_id: Crtc id, only used if DMR_CURSOR_BYPASS_ALL isn't passed.
+ * @xpos: X position of cursor.
+ * @ypos: Y position of cursor.
+ * @xhot: X hotspot.
+ * @yhot: Y hotspot.
+ *
+ * Argument to the DRM_VMW_CURSOR_BYPASS Ioctl.
+ */
+
+struct drm_vmw_cursor_bypass_arg {
+	uint32_t flags;
+	uint32_t crtc_id;
+	int32_t xpos;
+	int32_t ypos;
+	int32_t xhot;
+	int32_t yhot;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CLAIM_STREAM - Claim a single stream.
+ */
+
+/**
+ * struct drm_vmw_context_arg
+ *
+ * @stream_id: Device unique context ID.
+ *
+ * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl.
+ * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl.
+ */
+
+struct drm_vmw_stream_arg {
+	uint32_t stream_id;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_STREAM - Unclaim a stream.
+ *
+ * Return a single stream that was claimed by this process. Also makes
+ * sure that the stream has been stopped.
+ */
+
+#endif
-- 
cgit v1.2.3


From 4056c9a344d60ee96471a5f3b0a3c8a90371c8fd Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:04 +0200
Subject: nfsd: Remove unused dprintk

This doesn't appear to be useful.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 8f641c908450..2973e1135343 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -20,7 +20,6 @@
 # include <linux/fs.h>
 #endif
 #include <linux/nfsd/const.h>
-#include <linux/nfsd/debug.h>
 
 /*
  * This is the old "dentry style" Linux NFSv2 file handle.
@@ -329,9 +328,6 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
 	struct dentry	*dentry = fhp->fh_dentry;
 	struct inode	*inode;
 
-	dfprintk(FILEOP, "nfsd: fh_lock(%s) locked = %d\n",
-			SVCFH_fmt(fhp), fhp->fh_locked);
-
 	BUG_ON(!dentry);
 
 	if (fhp->fh_locked) {
-- 
cgit v1.2.3


From a600ffcbb3743cf1296bee2a41d4824c719d7181 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:35 +0200
Subject: sunrpc: Clean never used include files

Remove include of two headers never used by this file.
Doing so exposed a missing #include <linux/types.h> in
include/linux/sunrpc/rpc_rdma.h.

I did not see any other users dependency but if exist they
should be fixed since these headers are totally irrelevant
to here.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/debug.h    | 3 ---
 include/linux/sunrpc/rpc_rdma.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 10709cbe96fd..c2786f20016f 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -28,9 +28,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-
 /*
  * Enable RPC debugging/profiling.
  */
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 87b895d5c786..b78f16b1dea3 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -40,6 +40,8 @@
 #ifndef _LINUX_SUNRPC_RPC_RDMA_H
 #define _LINUX_SUNRPC_RPC_RDMA_H
 
+#include <linux/types.h>
+
 struct rpcrdma_segment {
 	__be32 rs_handle;	/* Registered memory handle */
 	__be32 rs_length;	/* Length of the chunk in bytes */
-- 
cgit v1.2.3


From d703158229329af7152d159753f849aa7bd55ee6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:47 +0200
Subject: nfsd: Fix independence of a few nfsd related headers

An header should be compilation independent, .i.e pull in
any header who's declarations are directly used by this header.
And not let users re-include all it's dependencies all over
again.

[At the end of the day what's the use of a header if it does
 not have more then one user?]

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs_xdr.h | 1 +
 include/linux/nfsacl.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 62f63fb0c4c8..00a0c8170816 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/nfsacl.h>
+#include <linux/nfs3.h>
 
 /*
  * To change the maximum rsize and wsize supported by the NFS client, adjust
diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index 43011b69297c..f321b578edeb 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -29,6 +29,7 @@
 #ifdef __KERNEL__
 
 #include <linux/posix_acl.h>
+#include <linux/sunrpc/xdr.h>
 
 /* Maximum number of ACL entries over NFS */
 #define NFS_ACL_MAX_ENTRIES	1024
-- 
cgit v1.2.3


From 72579ac9cd68081108277c31781b127d0f420d61 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:59 +0200
Subject: nfsd: Headers Independence and include cleanups

* Add includes that are directly used by headers
* Remove includes that are not needed

These are the changes made:

[xdr.h]
struct nfsd_readdirres has an embedded struct readdir_cd from nfsd.h
fixing that we can drop other includes

[xdr4.h]
embedded types defined both at state.h and nfsd.h

[syscall.h]
After export.h fix none of these stuff is needed.
fix extra space in # include <> statement

[stats.h]
does not need <linux/nfs4.h> but was export to user-mode
so I don't touch it

[state.h]
embedded types from nfsfh.h like struct knfsd_fh. bringing that
eliminates the need for all other includes

[nfsfh.h]
directly manipulating types from sunrpc/svc.h.
Removed Other unused headers.

[nfsd.h]
removed unused headers include

[export.h]
lots of sunrpc/svc.h types and a single prototype declaration
with pointer from nfsfh.h, but all users of export.h do need
nfsfh.h any way. remove now un-needed include.

[const.h]
Unfixed (not independent)

[cache.h]
could do with a forward declaration of "struct svc_rqst;"
from sunrpc/svc.h but all users absolutely will need
sunrpc/svc.h it is easier overall this way.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/cache.h   | 3 +--
 include/linux/nfsd/export.h  | 2 +-
 include/linux/nfsd/nfsd.h    | 4 ----
 include/linux/nfsd/nfsfh.h   | 3 +--
 include/linux/nfsd/state.h   | 4 +---
 include/linux/nfsd/syscall.h | 8 +-------
 include/linux/nfsd/xdr.h     | 3 +--
 include/linux/nfsd/xdr4.h    | 3 ++-
 8 files changed, 8 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 3a3f58934f5e..a165425dea41 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -10,8 +10,7 @@
 #ifndef NFSCACHE_H
 #define NFSCACHE_H
 
-#include <linux/in.h>
-#include <linux/uio.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Representation of a reply cache entry.
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a6d9ef2bb34a..ef3d416fcf67 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -12,7 +12,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/in.h>
+# include <linux/nfsd/nfsfh.h>
 #endif
 
 /*
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index e4518d090a8c..74f67c2aca34 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -11,13 +11,9 @@
 #define LINUX_NFSD_NFSD_H
 
 #include <linux/types.h>
-#include <linux/unistd.h>
-#include <linux/fs.h>
-#include <linux/posix_acl.h>
 #include <linux/mount.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/nfsd/stats.h>
 /*
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 2973e1135343..49523edbc510 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -16,8 +16,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/string.h>
-# include <linux/fs.h>
+# include <linux/sunrpc/svc.h>
 #endif
 #include <linux/nfsd/const.h>
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 5aadf8aa3a97..2af75686e0d3 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -37,9 +37,7 @@
 #ifndef _NFSD4_STATE_H
 #define _NFSD4_STATE_H
 
-#include <linux/list.h>
-#include <linux/kref.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsfh.h>
 
 typedef struct {
 	u32             cl_boot;
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h
index 7a3b565b898f..812bc1e160dc 100644
--- a/include/linux/nfsd/syscall.h
+++ b/include/linux/nfsd/syscall.h
@@ -9,14 +9,8 @@
 #ifndef NFSD_SYSCALL_H
 #define NFSD_SYSCALL_H
 
-# include <linux/types.h>
-#ifdef __KERNEL__
-# include <linux/in.h>
-#endif 
-#include <linux/posix_types.h>
-#include <linux/nfsd/const.h>
+#include <linux/types.h>
 #include <linux/nfsd/export.h>
-#include <linux/nfsd/nfsfh.h>
 
 /*
  * Version of the syscall interface
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index a0132ef58f21..58f824d854c2 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -7,9 +7,8 @@
 #ifndef LINUX_NFSD_H
 #define LINUX_NFSD_H
 
-#include <linux/fs.h>
 #include <linux/vfs.h>
-#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 73164c2b3d29..1bf266239c7e 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -39,7 +39,8 @@
 #ifndef _LINUX_NFSD_XDR4_H
 #define _LINUX_NFSD_XDR4_H
 
-#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/nfsd.h>
 
 #define NFSD4_MAX_TAGLEN	128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
-- 
cgit v1.2.3


From 9a74af21330c8d46efa977d088a62cc1bfa954e9 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:30:56 +0200
Subject: nfsd: Move private headers to source directory

Lots of include/linux/nfsd/* headers are only used by
nfsd module. Move them to the source directory

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/auth.c             |   2 +-
 fs/nfsd/cache.h            |  85 +++++++
 fs/nfsd/export.c           |   3 +-
 fs/nfsd/lockd.c            |   2 +-
 fs/nfsd/nfs2acl.c          |   7 +-
 fs/nfsd/nfs3acl.c          |   7 +-
 fs/nfsd/nfs3proc.c         |   4 +-
 fs/nfsd/nfs3xdr.c          |   2 +-
 fs/nfsd/nfs4callback.c     |   4 +-
 fs/nfsd/nfs4proc.c         |   4 +-
 fs/nfsd/nfs4recover.c      |   5 +-
 fs/nfsd/nfs4state.c        |   2 +-
 fs/nfsd/nfs4xdr.c          |   3 +-
 fs/nfsd/nfscache.c         |   4 +-
 fs/nfsd/nfsctl.c           |   5 +-
 fs/nfsd/nfsd.h             | 335 +++++++++++++++++++++++++++
 fs/nfsd/nfsfh.c            |   2 +-
 fs/nfsd/nfsproc.c          |   4 +-
 fs/nfsd/nfssvc.c           |   4 +-
 fs/nfsd/nfsxdr.c           |   2 +-
 fs/nfsd/state.h            | 409 ++++++++++++++++++++++++++++++++
 fs/nfsd/stats.c            |   4 +-
 fs/nfsd/vfs.c              |  18 +-
 fs/nfsd/xdr.h              | 176 ++++++++++++++
 fs/nfsd/xdr3.h             | 346 +++++++++++++++++++++++++++
 fs/nfsd/xdr4.h             | 564 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/cache.h |  85 -------
 include/linux/nfsd/nfsd.h  | 335 ---------------------------
 include/linux/nfsd/state.h | 409 --------------------------------
 include/linux/nfsd/xdr.h   | 176 --------------
 include/linux/nfsd/xdr3.h  | 346 ---------------------------
 include/linux/nfsd/xdr4.h  | 564 ---------------------------------------------
 32 files changed, 1963 insertions(+), 1955 deletions(-)
 create mode 100644 fs/nfsd/cache.h
 create mode 100644 fs/nfsd/nfsd.h
 create mode 100644 fs/nfsd/state.h
 create mode 100644 fs/nfsd/xdr.h
 create mode 100644 fs/nfsd/xdr3.h
 create mode 100644 fs/nfsd/xdr4.h
 delete mode 100644 include/linux/nfsd/cache.h
 delete mode 100644 include/linux/nfsd/nfsd.h
 delete mode 100644 include/linux/nfsd/state.h
 delete mode 100644 include/linux/nfsd/xdr.h
 delete mode 100644 include/linux/nfsd/xdr3.h
 delete mode 100644 include/linux/nfsd/xdr4.h

(limited to 'include')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index ad354d284cf8..71209d4993d0 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/sched.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "auth.h"
 
 int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..a165425dea41
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,85 @@
+/*
+ * include/linux/nfsd/cache.h
+ *
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef NFSCACHE_H
+#define NFSCACHE_H
+
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+	struct hlist_node	c_hash;
+	struct list_head	c_lru;
+
+	unsigned char		c_state,	/* unused, inprog, done */
+				c_type,		/* status, buffer */
+				c_secure : 1;	/* req came from port < 1024 */
+	struct sockaddr_in	c_addr;
+	__be32			c_xid;
+	u32			c_prot;
+	u32			c_proc;
+	u32			c_vers;
+	unsigned long		c_timestamp;
+	union {
+		struct kvec	u_vec;
+		__be32		u_status;
+	}			c_u;
+};
+
+#define c_replvec		c_u.u_vec
+#define c_replstat		c_u.u_status
+
+/* cache entry states */
+enum {
+	RC_UNUSED,
+	RC_INPROG,
+	RC_DONE
+};
+
+/* return values */
+enum {
+	RC_DROPIT,
+	RC_REPLY,
+	RC_DOIT,
+	RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+	RC_NOCACHE,
+	RC_REPLSTAT,
+	RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY		(HZ/5)
+
+int	nfsd_reply_cache_init(void);
+void	nfsd_reply_cache_shutdown(void);
+int	nfsd_cache_lookup(struct svc_rqst *, int);
+void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+
+#ifdef CONFIG_NFSD_V4
+void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
+#else  /* CONFIG_NFSD_V4 */
+static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
+{
+}
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 68e63f441444..cb3dae2fcd86 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -18,10 +18,11 @@
 #include <linux/module.h>
 #include <linux/exportfs.h>
 
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/syscall.h>
 #include <net/ipv6.h>
 
+#include "nfsd.h"
+
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
 typedef struct auth_domain	svc_client;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 801ef7104ff4..6f12777ed227 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -9,8 +9,8 @@
  */
 
 #include <linux/file.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "nfsd.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index a54628de7715..874e2a94bf4f 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2f5c61bea908..c6011ddbadc0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b694b4304544..90b19ca75b34 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -10,8 +10,8 @@
 #include <linux/ext2_fs.h>
 #include <linux/magic.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 623e13aa6259..c523bb88c10b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/namei.h>
-#include <linux/nfsd/xdr3.h>
+#include "xdr3.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4fe396071b61..f7a315827638 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -34,8 +34,8 @@
  */
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
+#include "nfsd.h"
+#include "state.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 61f682c77e7f..e2b5666f25d1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -36,8 +36,8 @@
  */
 #include <linux/file.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr4.h>
+#include "cache.h"
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 48742f243c25..6744e7f2da0e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,12 +33,13 @@
 *
 */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
 #include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/crypto.h>
 #include <linux/sched.h>
+
+#include "nfsd.h"
+#include "state.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1fe6e29fd500..2923e6c1da18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -36,11 +36,11 @@
 
 #include <linux/file.h>
 #include <linux/smp_lock.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
 #include <linux/swap.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2fa96821f5b5..cab978031100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -43,10 +43,11 @@
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/nfsd_idmap.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/svcauth_gss.h>
+
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 96694b8345ef..18aa9729a380 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -10,8 +10,8 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
+#include "nfsd.h"
+#include "cache.h"
 
 /* Size of reply cache. Common values are:
  * 4.3BSD:	128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e4f49fd6af44..0415680d3f58 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,12 +11,13 @@
 
 #include <linux/nfsd_idmap.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/lockd.h>
 #include <linux/sunrpc/clnt.h>
 
+#include "nfsd.h"
+#include "cache.h"
+
 /*
  *	We have a single directory with 9 nodes in it.
  */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..74f67c2aca34
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,335 @@
+/*
+ * linux/include/linux/nfsd/nfsd.h
+ *
+ * Hodge-podge collection of knfsd-related stuff.
+ * I will sort this out later.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_NFSD_H
+#define LINUX_NFSD_NFSD_H
+
+#include <linux/types.h>
+#include <linux/mount.h>
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/stats.h>
+/*
+ * nfsd version
+ */
+#define NFSD_SUPPORTED_MINOR_VERSION	1
+
+struct readdir_cd {
+	__be32			err;	/* 0, nfserr, or nfserr_eof */
+};
+
+
+extern struct svc_program	nfsd_program;
+extern struct svc_version	nfsd_version2, nfsd_version3,
+				nfsd_version4;
+extern u32			nfsd_supported_minorversion;
+extern struct mutex		nfsd_mutex;
+extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
+
+extern const struct seq_operations nfs_exports_op;
+
+/*
+ * Function prototypes.
+ */
+int		nfsd_svc(unsigned short port, int nrservs);
+int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+
+int		nfsd_nrthreads(void);
+int		nfsd_nrpools(void);
+int		nfsd_get_nrthreads(int n, int *);
+int		nfsd_set_nrthreads(int n, int *);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+#endif
+
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+int nfsd_minorversion(u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
+extern int nfsd_max_blksize;
+
+/* 
+ * NFSv4 State
+ */
+#ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
+int nfs4_state_init(void);
+void nfsd4_free_slabs(void);
+int nfs4_state_start(void);
+void nfs4_state_shutdown(void);
+time_t nfs4_lease_time(void);
+void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
+#else
+static inline int nfs4_state_init(void) { return 0; }
+static inline void nfsd4_free_slabs(void) { }
+static inline int nfs4_state_start(void) { return 0; }
+static inline void nfs4_state_shutdown(void) { }
+static inline time_t nfs4_lease_time(void) { return 0; }
+static inline void nfs4_reset_lease(time_t leasetime) { }
+static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+#endif
+
+/*
+ * lockd binding
+ */
+void		nfsd_lockd_init(void);
+void		nfsd_lockd_shutdown(void);
+
+
+/*
+ * These macros provide pre-xdr'ed values for faster operation.
+ */
+#define	nfs_ok			cpu_to_be32(NFS_OK)
+#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
+#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
+#define	nfserr_io		cpu_to_be32(NFSERR_IO)
+#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
+#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
+#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
+#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
+#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
+#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
+#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
+#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
+#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
+#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
+#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
+#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
+#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
+#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
+#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
+#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
+#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
+#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
+#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
+#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
+#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
+#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
+#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
+#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
+#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
+#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
+#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
+#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
+#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
+#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
+#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
+#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
+#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
+#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
+#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
+#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
+#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
+#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
+#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
+#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
+#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
+#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
+#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
+#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
+#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
+#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
+#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
+#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
+#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
+#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
+#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
+#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
+#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
+#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
+#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
+#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
+#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
+#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
+#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
+#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
+#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
+#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
+#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
+#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
+#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
+#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
+#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
+#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
+#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
+#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
+#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
+#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
+#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
+#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
+#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
+#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
+#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
+#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
+#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
+#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
+#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
+#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
+#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
+#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
+#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
+#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
+#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
+#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
+#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
+#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
+#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
+#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
+#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
+
+/* error codes for internal use */
+/* if a request fails due to kmalloc failure, it gets dropped.
+ *  Client should resend eventually
+ */
+#define	nfserr_dropit		cpu_to_be32(30000)
+/* end-of-file indicator in readdir */
+#define	nfserr_eof		cpu_to_be32(30001)
+/* replay detected */
+#define	nfserr_replay_me	cpu_to_be32(11001)
+/* nfs41 replay detected */
+#define	nfserr_replay_cache	cpu_to_be32(11002)
+
+/* Check for dir entries '.' and '..' */
+#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
+
+/*
+ * Time of server startup
+ */
+extern struct timeval	nfssvc_boot;
+
+#ifdef CONFIG_NFSD_V4
+
+/* before processing a COMPOUND operation, we have to check that there
+ * is enough space in the buffer for XDR encode to succeed.  otherwise,
+ * we might process an operation with side effects, and be unable to
+ * tell the client that the operation succeeded.
+ *
+ * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
+ * READ, READDIR, and READLINK have their own buffer checks.)  if we
+ * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
+ *
+ * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
+ * care is taken to ensure that we never fall below this level for any
+ * reason.
+ */
+#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+
+#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+
+/*
+ * The following attributes are currently not supported by the NFSv4 server:
+ *    ARCHIVE       (deprecated anyway)
+ *    HIDDEN        (unlikely to be supported any time soon)
+ *    MIMETYPE      (unlikely to be supported any time soon)
+ *    QUOTA_*       (will be supported in a forthcoming patch)
+ *    SYSTEM        (unlikely to be supported any time soon)
+ *    TIME_BACKUP   (unlikely to be supported any time soon)
+ *    TIME_CREATE   (unlikely to be supported any time soon)
+ */
+#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
+(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
+ | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
+ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
+ | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
+ | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
+ | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
+ | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
+ | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
+ | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
+ | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
+ | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
+ | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
+ | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
+ | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
+ | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD2 0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+	NFSD4_SUPPORTED_ATTRS_WORD0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+	NFSD4_SUPPORTED_ATTRS_WORD1
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+
+static inline u32 nfsd_suppattrs0(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
+			    : NFSD4_SUPPORTED_ATTRS_WORD0;
+}
+
+static inline u32 nfsd_suppattrs1(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
+			    : NFSD4_SUPPORTED_ATTRS_WORD1;
+}
+
+static inline u32 nfsd_suppattrs2(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
+			    : NFSD4_SUPPORTED_ATTRS_WORD2;
+}
+
+/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
+#define NFSD_WRITEONLY_ATTRS_WORD1							    \
+(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+
+/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
+(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
+#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD2 0
+
+#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
+	NFSD_WRITEABLE_ATTRS_WORD0
+/*
+ * we currently store the exclusive create verifier in the v_{a,m}time
+ * attributes so the client can't set these at create time using EXCLUSIVE4_1
+ */
+#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
+	(NFSD_WRITEABLE_ATTRS_WORD1 & \
+	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
+#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
+	NFSD_WRITEABLE_ATTRS_WORD2
+
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 739948165034..0eb1c59f5ab8 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -12,7 +12,7 @@
 #include <linux/exportfs.h>
 
 #include <linux/sunrpc/svcauth_gss.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "vfs.h"
 #include "auth.h"
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index b6bd9e0d7cd0..21a5f793c3d1 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -9,8 +9,8 @@
 
 #include <linux/namei.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
+#include "cache.h"
+#include "xdr.h"
 #include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b2d7ffac0357..b520ce10bd15 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -15,11 +15,11 @@
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "nfsd.h"
+#include "cache.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 5e0603da39e7..3bec831704af 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/xdr.h>
+#include "xdr.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..2af75686e0d3
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,409 @@
+/*
+ *  linux/include/nfsd/state.h
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson <andros@umich.edu>
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NFSD4_STATE_H
+#define _NFSD4_STATE_H
+
+#include <linux/nfsd/nfsfh.h>
+
+typedef struct {
+	u32             cl_boot;
+	u32             cl_id;
+} clientid_t;
+
+typedef struct {
+	u32             so_boot;
+	u32             so_stateownerid;
+	u32             so_fileid;
+} stateid_opaque_t;
+
+typedef struct {
+	u32                     si_generation;
+	stateid_opaque_t        si_opaque;
+} stateid_t;
+#define si_boot           si_opaque.so_boot
+#define si_stateownerid   si_opaque.so_stateownerid
+#define si_fileid         si_opaque.so_fileid
+
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
+struct nfs4_delegation {
+	struct list_head	dl_perfile;
+	struct list_head	dl_perclnt;
+	struct list_head	dl_recall_lru;  /* delegation recalled */
+	atomic_t		dl_count;       /* ref count */
+	struct nfs4_client	*dl_client;
+	struct nfs4_file	*dl_file;
+	struct file_lock	*dl_flock;
+	struct file		*dl_vfs_file;
+	u32			dl_type;
+	time_t			dl_time;
+/* For recall: */
+	u32			dl_ident;
+	stateid_t		dl_stateid;
+	struct knfsd_fh		dl_fh;
+	int			dl_retries;
+};
+
+/* client delegation callback info */
+struct nfs4_cb_conn {
+	/* SETCLIENTID info */
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
+	u32                     cb_prog;
+	u32			cb_minorversion;
+	u32                     cb_ident;	/* minorversion 0 only */
+	/* RPC client info */
+	atomic_t		cb_set;     /* successful CB_NULL call */
+	struct rpc_clnt *       cb_client;
+};
+
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
+
+struct nfsd4_slot {
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
+};
+
+struct nfsd4_channel_attrs {
+	u32		headerpadsz;
+	u32		maxreq_sz;
+	u32		maxresp_sz;
+	u32		maxresp_cached;
+	u32		maxops;
+	u32		maxreqs;
+	u32		nr_rdma_attrs;
+	u32		rdma_attrs;
+};
+
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
+struct nfsd4_session {
+	struct kref		se_ref;
+	struct list_head	se_hash;	/* hash by sessionid */
+	struct list_head	se_perclnt;
+	u32			se_flags;
+	struct nfs4_client	*se_client;	/* for expire_client */
+	struct nfs4_sessionid	se_sessionid;
+	struct nfsd4_channel_attrs se_fchannel;
+	struct nfsd4_channel_attrs se_bchannel;
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+};
+
+static inline void
+nfsd4_put_session(struct nfsd4_session *ses)
+{
+	extern void free_session(struct kref *kref);
+	kref_put(&ses->se_ref, free_session);
+}
+
+static inline void
+nfsd4_get_session(struct nfsd4_session *ses)
+{
+	kref_get(&ses->se_ref);
+}
+
+/* formatted contents of nfs4_sessionid */
+struct nfsd4_sessionid {
+	clientid_t	clientid;
+	u32		sequence;
+	u32		reserved;
+};
+
+#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
+/*
+ * struct nfs4_client - one per client.  Clientids live here.
+ * 	o Each nfs4_client is hashed by clientid.
+ *
+ * 	o Each nfs4_clients is also hashed by name 
+ * 	  (the opaque quantity initially sent by the client to identify itself).
+ * 	  
+ *	o cl_perclient list is used to ensure no dangling stateowner references
+ *	  when we expire the nfs4_client
+ */
+struct nfs4_client {
+	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
+	struct list_head	cl_strhash; 	/* hash by cl_name */
+	struct list_head	cl_openowners;
+	struct list_head	cl_delegations;
+	struct list_head        cl_lru;         /* tail queue */
+	struct xdr_netobj	cl_name; 	/* id generated by client */
+	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
+	nfs4_verifier		cl_verifier; 	/* generated by client */
+	time_t                  cl_time;        /* time of last lease renewal */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
+	u32			cl_flavor;	/* setclientid pseudoflavor */
+	char			*cl_principal;	/* setclientid principal name */
+	struct svc_cred		cl_cred; 	/* setclientid principal */
+	clientid_t		cl_clientid;	/* generated by server */
+	nfs4_verifier		cl_confirm;	/* generated by server */
+	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+	atomic_t		cl_count;	/* ref count */
+	u32			cl_firststate;	/* recovery dir creation */
+
+	/* for nfs41 */
+	struct list_head	cl_sessions;
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+	u32			cl_exchange_flags;
+	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
+};
+
+/* struct nfs4_client_reset
+ * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+ * upon lease reset, or from upcall to state_daemon (to read in state
+ * from non-volitile storage) upon reboot.
+ */
+struct nfs4_client_reclaim {
+	struct list_head	cr_strhash;	/* hash by cr_name */
+	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
+};
+
+static inline void
+update_stateid(stateid_t *stateid)
+{
+	stateid->si_generation++;
+}
+
+/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
+ * The OPEN response, typically the largest, requires 
+ *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
+ *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
+ *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
+ */
+
+#define NFSD4_REPLAY_ISIZE       112 
+
+/*
+ * Replay buffer, where the result of the last seqid-mutating operation 
+ * is cached. 
+ */
+struct nfs4_replay {
+	__be32			rp_status;
+	unsigned int		rp_buflen;
+	char			*rp_buf;
+	unsigned		intrp_allocated;
+	struct knfsd_fh		rp_openfh;
+	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
+};
+
+/*
+* nfs4_stateowner can either be an open_owner, or a lock_owner
+*
+*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
+*         for lock_owner
+*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
+*         for lock_owner
+*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
+*         struct is reaped.
+*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
+*         and is used to ensure no dangling nfs4_stateid references when we 
+*         release a stateowner.
+*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
+*         close is called to reap associated byte-range locks
+*    so_close_lru: (open) stateowner is placed on this list instead of being
+*         reaped (when so_perfilestate is empty) to hold the last close replay.
+*         reaped by laundramat thread after lease period.
+*/
+struct nfs4_stateowner {
+	struct kref		so_ref;
+	struct list_head        so_idhash;   /* hash by so_id */
+	struct list_head        so_strhash;   /* hash by op_name */
+	struct list_head        so_perclient;
+	struct list_head        so_stateids;
+	struct list_head        so_perstateid; /* for lockowners only */
+	struct list_head	so_close_lru; /* tail queue */
+	time_t			so_time; /* time of placement on so_close_lru */
+	int			so_is_open_owner; /* 1=openowner,0=lockowner */
+	u32                     so_id;
+	struct nfs4_client *    so_client;
+	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+	 * sequence id expected from the client: */
+	u32                     so_seqid;
+	struct xdr_netobj       so_owner;     /* open owner name */
+	int                     so_confirmed; /* successful OPEN_CONFIRM? */
+	struct nfs4_replay	so_replay;
+};
+
+/*
+*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
+*    o fi_perfile list is used to search for conflicting 
+*      share_acces, share_deny on the file.
+*/
+struct nfs4_file {
+	atomic_t		fi_ref;
+	struct list_head        fi_hash;    /* hash by "struct inode *" */
+	struct list_head        fi_stateids;
+	struct list_head	fi_delegations;
+	struct inode		*fi_inode;
+	u32                     fi_id;      /* used with stateowner->so_id 
+					     * for stateid_hashtbl hash */
+	bool			fi_had_conflict;
+};
+
+/*
+* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+*
+* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
+*
+* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
+* 	st_perfile: file_hashtbl[] entry.
+* 	st_perfile_state: nfs4_stateowner->so_perfilestate
+*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
+* 	st_access_bmap: used only for open stateid
+* 	st_deny_bmap: used only for open stateid
+*	st_openstp: open stateid lock stateid was derived from
+*
+* XXX: open stateids and lock stateids have diverged sufficiently that
+* we should consider defining separate structs for the two cases.
+*/
+
+struct nfs4_stateid {
+	struct list_head              st_hash; 
+	struct list_head              st_perfile;
+	struct list_head              st_perstateowner;
+	struct list_head              st_lockowners;
+	struct nfs4_stateowner      * st_stateowner;
+	struct nfs4_file            * st_file;
+	stateid_t                     st_stateid;
+	struct file                 * st_vfs_file;
+	unsigned long                 st_access_bmap;
+	unsigned long                 st_deny_bmap;
+	struct nfs4_stateid         * st_openstp;
+};
+
+/* flags for preprocess_seqid_op() */
+#define HAS_SESSION             0x00000001
+#define CONFIRM                 0x00000002
+#define OPEN_STATE              0x00000004
+#define LOCK_STATE              0x00000008
+#define RD_STATE	        0x00000010
+#define WR_STATE	        0x00000020
+#define CLOSE_STATE             0x00000040
+
+#define seqid_mutating_err(err)                       \
+	(((err) != nfserr_stale_clientid) &&    \
+	((err) != nfserr_bad_seqid) &&          \
+	((err) != nfserr_stale_stateid) &&      \
+	((err) != nfserr_bad_stateid))
+
+struct nfsd4_compound_state;
+
+extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+		stateid_t *stateid, int flags, struct file **filp);
+extern void nfs4_lock_state(void);
+extern void nfs4_unlock_state(void);
+extern int nfs4_in_grace(void);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+extern void put_nfs4_client(struct nfs4_client *clp);
+extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern void nfsd4_init_recdir(char *recdir_name);
+extern int nfsd4_recdir_load(void);
+extern void nfsd4_shutdown_recdir(void);
+extern int nfs4_client_to_reclaim(const char *name);
+extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern void nfsd4_recdir_purge_old(void);
+extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+
+static inline void
+nfs4_put_stateowner(struct nfs4_stateowner *so)
+{
+	kref_put(&so->so_ref, nfs4_free_stateowner);
+}
+
+static inline void
+nfs4_get_stateowner(struct nfs4_stateowner *so)
+{
+	kref_get(&so->so_ref);
+}
+
+#endif   /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index e3e411e9fe4a..3fc69dfd3091 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -25,11 +25,11 @@
 
 #include <linux/seq_file.h>
 #include <linux/module.h>
-
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/stats.h>
 
+#include "nfsd.h"
+
 struct nfsd_stats	nfsdstats;
 struct svc_stat		nfsd_svcstats = {
 	.program	= &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 81ce108c114e..04bdba12d21b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -22,23 +22,25 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
 #include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
+#include <linux/jhash.h>
+#include <linux/ima.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
 #ifdef CONFIG_NFSD_V4
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
 #endif /* CONFIG_NFSD_V4 */
-#include <linux/jhash.h>
-#include <linux/ima.h>
-#include "vfs.h"
 
-#include <asm/uaccess.h>
+#include "nfsd.h"
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..235ee5c3be54
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,176 @@
+/*
+ * linux/include/linux/nfsd/xdr.h
+ *
+ * XDR types for nfsd. This is mainly a typing exercise.
+ */
+
+#ifndef LINUX_NFSD_H
+#define LINUX_NFSD_H
+
+#include <linux/vfs.h>
+#include "nfsd.h"
+
+struct nfsd_fhandle {
+	struct svc_fh		fh;
+};
+
+struct nfsd_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+};
+
+struct nfsd_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd_readargs {
+	struct svc_fh		fh;
+	__u32			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd_writeargs {
+	svc_fh			fh;
+	__u32			offset;
+	int			len;
+	int			vlen;
+};
+
+struct nfsd_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	struct iattr		attrs;
+};
+
+struct nfsd_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+	
+struct nfsd_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd_readdirargs {
+	struct svc_fh		fh;
+	__u32			cookie;
+	__u32			count;
+	__be32 *		buffer;
+};
+
+struct nfsd_attrstat {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_diropres  {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_readlinkres {
+	int			len;
+};
+
+struct nfsd_readres {
+	struct svc_fh		fh;
+	unsigned long		count;
+	struct kstat		stat;
+};
+
+struct nfsd_readdirres {
+	int			count;
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd_statfsres {
+	struct kstatfs		stats;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd_xdrstore {
+	struct nfsd_sattrargs	sattr;
+	struct nfsd_diropargs	dirop;
+	struct nfsd_readargs	read;
+	struct nfsd_writeargs	write;
+	struct nfsd_createargs	create;
+	struct nfsd_renameargs	rename;
+	struct nfsd_linkargs	link;
+	struct nfsd_symlinkargs	symlink;
+	struct nfsd_readdirargs	readdir;
+};
+
+#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
+
+
+int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd_sattrargs *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd_diropargs *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readargs *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd_writeargs *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd_createargs *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd_renameargs *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readlinkargs *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_linkargs *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_symlinkargs *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readdirargs *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+
+int nfssvc_encode_entry(void *, const char *name,
+			int namlen, loff_t offset, u64 ino, unsigned int);
+
+int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+
+/* Helper functions for NFSv2 ACL code */
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..b330756973cf
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,346 @@
+/*
+ * linux/include/linux/nfsd/xdr3.h
+ *
+ * XDR types for NFSv3 in nfsd.
+ *
+ * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LINUX_NFSD_XDR3_H
+#define _LINUX_NFSD_XDR3_H
+
+#include "xdr.h"
+
+struct nfsd3_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+	int			check_guard;
+	time_t			guardtime;
+};
+
+struct nfsd3_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd3_accessargs {
+	struct svc_fh		fh;
+	unsigned int		access;
+};
+
+struct nfsd3_readargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd3_writeargs {
+	svc_fh			fh;
+	__u64			offset;
+	__u32			count;
+	int			stable;
+	__u32			len;
+	int			vlen;
+};
+
+struct nfsd3_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	int			createmode;
+	struct iattr		attrs;
+	__be32 *		verf;
+};
+
+struct nfsd3_mknodargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	__u32			ftype;
+	__u32			major, minor;
+	struct iattr		attrs;
+};
+
+struct nfsd3_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+
+struct nfsd3_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd3_readdirargs {
+	struct svc_fh		fh;
+	__u64			cookie;
+	__u32			dircount;
+	__u32			count;
+	__be32 *		verf;
+	__be32 *		buffer;
+};
+
+struct nfsd3_commitargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+};
+
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+struct nfsd3_attrstat {
+	__be32			status;
+	struct svc_fh		fh;
+	struct kstat            stat;
+};
+
+/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
+struct nfsd3_diropres  {
+	__be32			status;
+	struct svc_fh		dirfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_accessres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			access;
+};
+
+struct nfsd3_readlinkres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			len;
+};
+
+struct nfsd3_readres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			eof;
+};
+
+struct nfsd3_writeres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			committed;
+};
+
+struct nfsd3_renameres {
+	__be32			status;
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+};
+
+struct nfsd3_linkres {
+	__be32			status;
+	struct svc_fh		tfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_readdirres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			count;
+	__be32			verf[2];
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+	__be32 *		offset1;
+	struct svc_rqst *	rqstp;
+
+};
+
+struct nfsd3_fsstatres {
+	__be32			status;
+	struct kstatfs		stats;
+	__u32			invarsec;
+};
+
+struct nfsd3_fsinfores {
+	__be32			status;
+	__u32			f_rtmax;
+	__u32			f_rtpref;
+	__u32			f_rtmult;
+	__u32			f_wtmax;
+	__u32			f_wtpref;
+	__u32			f_wtmult;
+	__u32			f_dtpref;
+	__u64			f_maxfilesize;
+	__u32			f_properties;
+};
+
+struct nfsd3_pathconfres {
+	__be32			status;
+	__u32			p_link_max;
+	__u32			p_name_max;
+	__u32			p_no_trunc;
+	__u32			p_chown_restricted;
+	__u32			p_case_insensitive;
+	__u32			p_case_preserving;
+};
+
+struct nfsd3_commitres {
+	__be32			status;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_getaclres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+/* dummy type for release */
+struct nfsd3_fhandle_pair {
+	__u32			dummy;
+	struct svc_fh		fh1;
+	struct svc_fh		fh2;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd3_xdrstore {
+	struct nfsd3_sattrargs		sattrargs;
+	struct nfsd3_diropargs		diropargs;
+	struct nfsd3_readargs		readargs;
+	struct nfsd3_writeargs		writeargs;
+	struct nfsd3_createargs		createargs;
+	struct nfsd3_renameargs		renameargs;
+	struct nfsd3_linkargs		linkargs;
+	struct nfsd3_symlinkargs	symlinkargs;
+	struct nfsd3_readdirargs	readdirargs;
+	struct nfsd3_diropres 		diropres;
+	struct nfsd3_accessres		accessres;
+	struct nfsd3_readlinkres	readlinkres;
+	struct nfsd3_readres		readres;
+	struct nfsd3_writeres		writeres;
+	struct nfsd3_renameres		renameres;
+	struct nfsd3_linkres		linkres;
+	struct nfsd3_readdirres		readdirres;
+	struct nfsd3_fsstatres		fsstatres;
+	struct nfsd3_fsinfores		fsinfores;
+	struct nfsd3_pathconfres	pathconfres;
+	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
+};
+
+#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
+
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_sattrargs *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropargs *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessargs *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readargs *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_writeargs *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_mknodargs *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameargs *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkargs *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkargs *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_symlinkargs *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitargs *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessres *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkres *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameres *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkres *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirres *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsstatres *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsinfores *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
+				struct nfsd3_pathconfres *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitres *);
+
+int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
+				struct nfsd3_fhandle_pair *);
+int nfs3svc_encode_entry(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+int nfs3svc_encode_entry_plus(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
+				struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+
+#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..83202a1cf07b
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,564 @@
+/*
+ *  include/linux/nfsd/xdr4.h
+ *
+ *  Server-side types for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LINUX_NFSD_XDR4_H
+#define _LINUX_NFSD_XDR4_H
+
+#include "state.h"
+#include "nfsd.h"
+
+#define NFSD4_MAX_TAGLEN	128
+#define XDR_LEN(n)                     (((n) + 3) & ~3)
+
+struct nfsd4_compound_state {
+	struct svc_fh		current_fh;
+	struct svc_fh		save_fh;
+	struct nfs4_stateowner	*replay_owner;
+	/* For sessions DRC */
+	struct nfsd4_session	*session;
+	struct nfsd4_slot	*slot;
+	__be32			*datap;
+	size_t			iovlen;
+	u32			minorversion;
+	u32			status;
+};
+
+static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
+{
+	return cs->slot != NULL;
+}
+
+struct nfsd4_change_info {
+	u32		atomic;
+	bool		change_supported;
+	u32		before_ctime_sec;
+	u32		before_ctime_nsec;
+	u64		before_change;
+	u32		after_ctime_sec;
+	u32		after_ctime_nsec;
+	u64		after_change;
+};
+
+struct nfsd4_access {
+	u32		ac_req_access;      /* request */
+	u32		ac_supported;       /* response */
+	u32		ac_resp_access;     /* response */
+};
+
+struct nfsd4_close {
+	u32		cl_seqid;           /* request */
+	stateid_t	cl_stateid;         /* request+response */
+	struct nfs4_stateowner * cl_stateowner;	/* response */
+};
+
+struct nfsd4_commit {
+	u64		co_offset;          /* request */
+	u32		co_count;           /* request */
+	nfs4_verifier	co_verf;            /* response */
+};
+
+struct nfsd4_create {
+	u32		cr_namelen;         /* request */
+	char *		cr_name;            /* request */
+	u32		cr_type;            /* request */
+	union {                             /* request */
+		struct {
+			u32 namelen;
+			char *name;
+		} link;   /* NF4LNK */
+		struct {
+			u32 specdata1;
+			u32 specdata2;
+		} dev;    /* NF4BLK, NF4CHR */
+	} u;
+	u32		cr_bmval[3];        /* request */
+	struct iattr	cr_iattr;           /* request */
+	struct nfsd4_change_info  cr_cinfo; /* response */
+	struct nfs4_acl *cr_acl;
+};
+#define cr_linklen	u.link.namelen
+#define cr_linkname	u.link.name
+#define cr_specdata1	u.dev.specdata1
+#define cr_specdata2	u.dev.specdata2
+
+struct nfsd4_delegreturn {
+	stateid_t	dr_stateid;
+};
+
+struct nfsd4_getattr {
+	u32		ga_bmval[3];        /* request */
+	struct svc_fh	*ga_fhp;            /* response */
+};
+
+struct nfsd4_link {
+	u32		li_namelen;         /* request */
+	char *		li_name;            /* request */
+	struct nfsd4_change_info  li_cinfo; /* response */
+};
+
+struct nfsd4_lock_denied {
+	clientid_t	ld_clientid;
+	struct nfs4_stateowner   *ld_sop;
+	u64             ld_start;
+	u64             ld_length;
+	u32             ld_type;
+};
+
+struct nfsd4_lock {
+	/* request */
+	u32             lk_type;
+	u32             lk_reclaim;         /* boolean */
+	u64             lk_offset;
+	u64             lk_length;
+	u32             lk_is_new;
+	union {
+		struct {
+			u32             open_seqid;
+			stateid_t       open_stateid;
+			u32             lock_seqid;
+			clientid_t      clientid;
+			struct xdr_netobj owner;
+		} new;
+		struct {
+			stateid_t       lock_stateid;
+			u32             lock_seqid;
+		} old;
+	} v;
+
+	/* response */
+	union {
+		struct {
+			stateid_t               stateid;
+		} ok;
+		struct nfsd4_lock_denied        denied;
+	} u;
+	/* The lk_replay_owner is the open owner in the open_to_lock_owner
+	 * case and the lock owner otherwise: */
+	struct nfs4_stateowner *lk_replay_owner;
+};
+#define lk_new_open_seqid       v.new.open_seqid
+#define lk_new_open_stateid     v.new.open_stateid
+#define lk_new_lock_seqid       v.new.lock_seqid
+#define lk_new_clientid         v.new.clientid
+#define lk_new_owner            v.new.owner
+#define lk_old_lock_stateid     v.old.lock_stateid
+#define lk_old_lock_seqid       v.old.lock_seqid
+
+#define lk_rflags       u.ok.rflags
+#define lk_resp_stateid u.ok.stateid
+#define lk_denied       u.denied
+
+
+struct nfsd4_lockt {
+	u32				lt_type;
+	clientid_t			lt_clientid;
+	struct xdr_netobj		lt_owner;
+	u64				lt_offset;
+	u64				lt_length;
+	struct nfs4_stateowner * 	lt_stateowner;
+	struct nfsd4_lock_denied  	lt_denied;
+};
+
+ 
+struct nfsd4_locku {
+	u32             lu_type;
+	u32             lu_seqid;
+	stateid_t       lu_stateid;
+	u64             lu_offset;
+	u64             lu_length;
+	struct nfs4_stateowner  *lu_stateowner;
+};
+
+
+struct nfsd4_lookup {
+	u32		lo_len;             /* request */
+	char *		lo_name;            /* request */
+};
+
+struct nfsd4_putfh {
+	u32		pf_fhlen;           /* request */
+	char		*pf_fhval;          /* request */
+};
+
+struct nfsd4_open {
+	u32		op_claim_type;      /* request */
+	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
+	u32		op_delegate_type;   /* request - CLAIM_PREV only */
+	stateid_t       op_delegate_stateid; /* request - response */
+	u32		op_create;     	    /* request */
+	u32		op_createmode;      /* request */
+	u32		op_bmval[3];        /* request */
+	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
+	nfs4_verifier	verf;               /* EXCLUSIVE4 */
+	clientid_t	op_clientid;        /* request */
+	struct xdr_netobj op_owner;           /* request */
+	u32		op_seqid;           /* request */
+	u32		op_share_access;    /* request */
+	u32		op_share_deny;      /* request */
+	stateid_t	op_stateid;         /* response */
+	u32		op_recall;          /* recall */
+	struct nfsd4_change_info  op_cinfo; /* response */
+	u32		op_rflags;          /* response */
+	int		op_truncate;        /* used during processing */
+	struct nfs4_stateowner *op_stateowner; /* used during processing */
+	struct nfs4_acl *op_acl;
+};
+#define op_iattr	iattr
+#define op_verf		verf
+
+struct nfsd4_open_confirm {
+	stateid_t	oc_req_stateid		/* request */;
+	u32		oc_seqid    		/* request */;
+	stateid_t	oc_resp_stateid		/* response */;
+	struct nfs4_stateowner * oc_stateowner;	/* response */
+};
+
+struct nfsd4_open_downgrade {
+	stateid_t       od_stateid;
+	u32             od_seqid;
+	u32             od_share_access;
+	u32             od_share_deny;
+	struct nfs4_stateowner *od_stateowner;
+};
+
+
+struct nfsd4_read {
+	stateid_t	rd_stateid;         /* request */
+	u64		rd_offset;          /* request */
+	u32		rd_length;          /* request */
+	int		rd_vlen;
+	struct file     *rd_filp;
+	
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+};
+
+struct nfsd4_readdir {
+	u64		rd_cookie;          /* request */
+	nfs4_verifier	rd_verf;            /* request */
+	u32		rd_dircount;        /* request */
+	u32		rd_maxcount;        /* request */
+	u32		rd_bmval[3];        /* request */
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd4_release_lockowner {
+	clientid_t        rl_clientid;
+	struct xdr_netobj rl_owner;
+};
+struct nfsd4_readlink {
+	struct svc_rqst *rl_rqstp;          /* request */
+	struct svc_fh *	rl_fhp;             /* request */
+};
+
+struct nfsd4_remove {
+	u32		rm_namelen;         /* request */
+	char *		rm_name;            /* request */
+	struct nfsd4_change_info  rm_cinfo; /* response */
+};
+
+struct nfsd4_rename {
+	u32		rn_snamelen;        /* request */
+	char *		rn_sname;           /* request */
+	u32		rn_tnamelen;        /* request */
+	char *		rn_tname;           /* request */
+	struct nfsd4_change_info  rn_sinfo; /* response */
+	struct nfsd4_change_info  rn_tinfo; /* response */
+};
+
+struct nfsd4_secinfo {
+	u32 si_namelen;					/* request */
+	char *si_name;					/* request */
+	struct svc_export *si_exp;			/* response */
+};
+
+struct nfsd4_setattr {
+	stateid_t	sa_stateid;         /* request */
+	u32		sa_bmval[3];        /* request */
+	struct iattr	sa_iattr;           /* request */
+	struct nfs4_acl *sa_acl;
+};
+
+struct nfsd4_setclientid {
+	nfs4_verifier	se_verf;            /* request */
+	u32		se_namelen;         /* request */
+	char *		se_name;            /* request */
+	u32		se_callback_prog;   /* request */
+	u32		se_callback_netid_len;  /* request */
+	char *		se_callback_netid_val;  /* request */
+	u32		se_callback_addr_len;   /* request */
+	char *		se_callback_addr_val;   /* request */
+	u32		se_callback_ident;  /* request */
+	clientid_t	se_clientid;        /* response */
+	nfs4_verifier	se_confirm;         /* response */
+};
+
+struct nfsd4_setclientid_confirm {
+	clientid_t	sc_clientid;
+	nfs4_verifier	sc_confirm;
+};
+
+/* also used for NVERIFY */
+struct nfsd4_verify {
+	u32		ve_bmval[3];        /* request */
+	u32		ve_attrlen;         /* request */
+	char *		ve_attrval;         /* request */
+};
+
+struct nfsd4_write {
+	stateid_t	wr_stateid;         /* request */
+	u64		wr_offset;          /* request */
+	u32		wr_stable_how;      /* request */
+	u32		wr_buflen;          /* request */
+	int		wr_vlen;
+
+	u32		wr_bytes_written;   /* response */
+	u32		wr_how_written;     /* response */
+	nfs4_verifier	wr_verifier;        /* response */
+};
+
+struct nfsd4_exchange_id {
+	nfs4_verifier	verifier;
+	struct xdr_netobj clname;
+	u32		flags;
+	clientid_t	clientid;
+	u32		seqid;
+	int		spa_how;
+};
+
+struct nfsd4_sequence {
+	struct nfs4_sessionid	sessionid;		/* request/response */
+	u32			seqid;			/* request/response */
+	u32			slotid;			/* request/response */
+	u32			maxslots;		/* request/response */
+	u32			cachethis;		/* request */
+#if 0
+	u32			target_maxslots;	/* response */
+	u32			status_flags;		/* response */
+#endif /* not yet */
+};
+
+struct nfsd4_destroy_session {
+	struct nfs4_sessionid	sessionid;
+};
+
+struct nfsd4_op {
+	int					opnum;
+	__be32					status;
+	union {
+		struct nfsd4_access		access;
+		struct nfsd4_close		close;
+		struct nfsd4_commit		commit;
+		struct nfsd4_create		create;
+		struct nfsd4_delegreturn	delegreturn;
+		struct nfsd4_getattr		getattr;
+		struct svc_fh *			getfh;
+		struct nfsd4_link		link;
+		struct nfsd4_lock		lock;
+		struct nfsd4_lockt		lockt;
+		struct nfsd4_locku		locku;
+		struct nfsd4_lookup		lookup;
+		struct nfsd4_verify		nverify;
+		struct nfsd4_open		open;
+		struct nfsd4_open_confirm	open_confirm;
+		struct nfsd4_open_downgrade	open_downgrade;
+		struct nfsd4_putfh		putfh;
+		struct nfsd4_read		read;
+		struct nfsd4_readdir		readdir;
+		struct nfsd4_readlink		readlink;
+		struct nfsd4_remove		remove;
+		struct nfsd4_rename		rename;
+		clientid_t			renew;
+		struct nfsd4_secinfo		secinfo;
+		struct nfsd4_setattr		setattr;
+		struct nfsd4_setclientid	setclientid;
+		struct nfsd4_setclientid_confirm setclientid_confirm;
+		struct nfsd4_verify		verify;
+		struct nfsd4_write		write;
+		struct nfsd4_release_lockowner	release_lockowner;
+
+		/* NFSv4.1 */
+		struct nfsd4_exchange_id	exchange_id;
+		struct nfsd4_create_session	create_session;
+		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_sequence		sequence;
+	} u;
+	struct nfs4_replay *			replay;
+};
+
+struct nfsd4_compoundargs {
+	/* scratch variables for XDR decode */
+	__be32 *			p;
+	__be32 *			end;
+	struct page **			pagelist;
+	int				pagelen;
+	__be32				tmp[8];
+	__be32 *			tmpp;
+	struct tmpbuf {
+		struct tmpbuf *next;
+		void (*release)(const void *);
+		void *buf;
+	}				*to_free;
+
+	struct svc_rqst			*rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				minorversion;
+	u32				opcnt;
+	struct nfsd4_op			*ops;
+	struct nfsd4_op			iops[8];
+};
+
+struct nfsd4_compoundres {
+	/* scratch variables for XDR encode */
+	__be32 *			p;
+	__be32 *			end;
+	struct xdr_buf *		xbuf;
+	struct svc_rqst *		rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				opcnt;
+	__be32 *			tagp; /* tag, opcount encode location */
+	struct nfsd4_compound_state	cstate;
+};
+
+static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
+{
+	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
+}
+
+static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+{
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
+}
+
+#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
+
+static inline void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
+	cinfo->atomic = 1;
+	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+	if (cinfo->change_supported) {
+		cinfo->before_change = fhp->fh_pre_change;
+		cinfo->after_change = fhp->fh_post_change;
+	} else {
+		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+	}
+}
+
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundargs *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundres *);
+void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+		       struct dentry *dentry, __be32 *buffer, int *countp,
+		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid *setclid);
+extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid_confirm *setclientid_confirm);
+extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
+extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+		struct nfsd4_sequence *seq);
+extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+struct nfsd4_exchange_id *);
+		extern __be32 nfsd4_create_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_create_session *);
+extern __be32 nfsd4_sequence(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_sequence *);
+extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_destroy_session *);
+extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+		struct nfsd4_open *open);
+extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+		struct svc_fh *current_fh, struct nfsd4_open *open);
+extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_close *close);
+extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_open_downgrade *od);
+extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		struct nfsd4_lock *lock);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_lockt *lockt);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_locku *locku);
+extern __be32
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_release_lockowner *rlockowner);
+extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
+			  struct nfsd4_compound_state *, clientid_t *clid);
+#endif
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
deleted file mode 100644
index a165425dea41..000000000000
--- a/include/linux/nfsd/cache.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * include/linux/nfsd/cache.h
- *
- * Request reply cache. This was heavily inspired by the
- * implementation in 4.3BSD/4.4BSD.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef NFSCACHE_H
-#define NFSCACHE_H
-
-#include <linux/sunrpc/svc.h>
-
-/*
- * Representation of a reply cache entry.
- */
-struct svc_cacherep {
-	struct hlist_node	c_hash;
-	struct list_head	c_lru;
-
-	unsigned char		c_state,	/* unused, inprog, done */
-				c_type,		/* status, buffer */
-				c_secure : 1;	/* req came from port < 1024 */
-	struct sockaddr_in	c_addr;
-	__be32			c_xid;
-	u32			c_prot;
-	u32			c_proc;
-	u32			c_vers;
-	unsigned long		c_timestamp;
-	union {
-		struct kvec	u_vec;
-		__be32		u_status;
-	}			c_u;
-};
-
-#define c_replvec		c_u.u_vec
-#define c_replstat		c_u.u_status
-
-/* cache entry states */
-enum {
-	RC_UNUSED,
-	RC_INPROG,
-	RC_DONE
-};
-
-/* return values */
-enum {
-	RC_DROPIT,
-	RC_REPLY,
-	RC_DOIT,
-	RC_INTR
-};
-
-/*
- * Cache types.
- * We may want to add more types one day, e.g. for diropres and
- * attrstat replies. Using cache entries with fixed length instead
- * of buffer pointers may be more efficient.
- */
-enum {
-	RC_NOCACHE,
-	RC_REPLSTAT,
-	RC_REPLBUFF,
-};
-
-/*
- * If requests are retransmitted within this interval, they're dropped.
- */
-#define RC_DELAY		(HZ/5)
-
-int	nfsd_reply_cache_init(void);
-void	nfsd_reply_cache_shutdown(void);
-int	nfsd_cache_lookup(struct svc_rqst *, int);
-void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-
-#ifdef CONFIG_NFSD_V4
-void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
-#else  /* CONFIG_NFSD_V4 */
-static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-}
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* NFSCACHE_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
deleted file mode 100644
index 74f67c2aca34..000000000000
--- a/include/linux/nfsd/nfsd.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * linux/include/linux/nfsd/nfsd.h
- *
- * Hodge-podge collection of knfsd-related stuff.
- * I will sort this out later.
- *
- * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_NFSD_NFSD_H
-#define LINUX_NFSD_NFSD_H
-
-#include <linux/types.h>
-#include <linux/mount.h>
-
-#include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
-#include <linux/nfsd/stats.h>
-/*
- * nfsd version
- */
-#define NFSD_SUPPORTED_MINOR_VERSION	1
-
-struct readdir_cd {
-	__be32			err;	/* 0, nfserr, or nfserr_eof */
-};
-
-
-extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
-				nfsd_version4;
-extern u32			nfsd_supported_minorversion;
-extern struct mutex		nfsd_mutex;
-extern struct svc_serv		*nfsd_serv;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_mem;
-extern unsigned int		nfsd_drc_mem_used;
-
-extern const struct seq_operations nfs_exports_op;
-
-/*
- * Function prototypes.
- */
-int		nfsd_svc(unsigned short port, int nrservs);
-int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
-
-int		nfsd_nrthreads(void);
-int		nfsd_nrpools(void);
-int		nfsd_get_nrthreads(int n, int *);
-int		nfsd_set_nrthreads(int n, int *);
-
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-#ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
-#else
-#define nfsd_acl_version2 NULL
-#endif
-#ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
-#else
-#define nfsd_acl_version3 NULL
-#endif
-#endif
-
-enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
-int nfsd_create_serv(void);
-
-extern int nfsd_max_blksize;
-
-/* 
- * NFSv4 State
- */
-#ifdef CONFIG_NFSD_V4
-extern unsigned int max_delegations;
-int nfs4_state_init(void);
-void nfsd4_free_slabs(void);
-int nfs4_state_start(void);
-void nfs4_state_shutdown(void);
-time_t nfs4_lease_time(void);
-void nfs4_reset_lease(time_t leasetime);
-int nfs4_reset_recoverydir(char *recdir);
-#else
-static inline int nfs4_state_init(void) { return 0; }
-static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { return 0; }
-static inline void nfs4_state_shutdown(void) { }
-static inline time_t nfs4_lease_time(void) { return 0; }
-static inline void nfs4_reset_lease(time_t leasetime) { }
-static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
-#endif
-
-/*
- * lockd binding
- */
-void		nfsd_lockd_init(void);
-void		nfsd_lockd_shutdown(void);
-
-
-/*
- * These macros provide pre-xdr'ed values for faster operation.
- */
-#define	nfs_ok			cpu_to_be32(NFS_OK)
-#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
-#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
-#define	nfserr_io		cpu_to_be32(NFSERR_IO)
-#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
-#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
-#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
-#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
-#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
-#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
-#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
-#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
-#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
-#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
-#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
-#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
-#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
-#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
-#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
-#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
-#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
-#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
-#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
-#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
-#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
-#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
-#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
-#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
-#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
-#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
-#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
-#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
-#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
-#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
-#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
-#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
-#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
-#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
-#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
-#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
-#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
-#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
-#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
-#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
-#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
-#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
-#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
-#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
-#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
-#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
-#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
-#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
-#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
-#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
-#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
-#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
-#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
-#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
-#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
-#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
-#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
-#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
-#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
-#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
-#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
-#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
-#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
-#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
-#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
-#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
-#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
-#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
-#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
-#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
-#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
-#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
-#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
-#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
-#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
-#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
-#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
-#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
-#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
-#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
-#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
-#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
-#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
-#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
-#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
-#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
-#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
-#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
-#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
-#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
-#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
-#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
-#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
-#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
-#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
-
-/* error codes for internal use */
-/* if a request fails due to kmalloc failure, it gets dropped.
- *  Client should resend eventually
- */
-#define	nfserr_dropit		cpu_to_be32(30000)
-/* end-of-file indicator in readdir */
-#define	nfserr_eof		cpu_to_be32(30001)
-/* replay detected */
-#define	nfserr_replay_me	cpu_to_be32(11001)
-/* nfs41 replay detected */
-#define	nfserr_replay_cache	cpu_to_be32(11002)
-
-/* Check for dir entries '.' and '..' */
-#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
-
-/*
- * Time of server startup
- */
-extern struct timeval	nfssvc_boot;
-
-#ifdef CONFIG_NFSD_V4
-
-/* before processing a COMPOUND operation, we have to check that there
- * is enough space in the buffer for XDR encode to succeed.  otherwise,
- * we might process an operation with side effects, and be unable to
- * tell the client that the operation succeeded.
- *
- * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
- * READ, READDIR, and READLINK have their own buffer checks.)  if we
- * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
- *
- * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
- * care is taken to ensure that we never fall below this level for any
- * reason.
- */
-#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
-#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
-
-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
-#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
-
-/*
- * The following attributes are currently not supported by the NFSv4 server:
- *    ARCHIVE       (deprecated anyway)
- *    HIDDEN        (unlikely to be supported any time soon)
- *    MIMETYPE      (unlikely to be supported any time soon)
- *    QUOTA_*       (will be supported in a forthcoming patch)
- *    SYSTEM        (unlikely to be supported any time soon)
- *    TIME_BACKUP   (unlikely to be supported any time soon)
- *    TIME_CREATE   (unlikely to be supported any time soon)
- */
-#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
-(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
- | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
- | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
- | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
- | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
- | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
- | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
- | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
- | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
- | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
- | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
- | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
- | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
- | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
- | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD2 0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
-	NFSD4_SUPPORTED_ATTRS_WORD0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
-	NFSD4_SUPPORTED_ATTRS_WORD1
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
-
-static inline u32 nfsd_suppattrs0(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
-			    : NFSD4_SUPPORTED_ATTRS_WORD0;
-}
-
-static inline u32 nfsd_suppattrs1(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
-			    : NFSD4_SUPPORTED_ATTRS_WORD1;
-}
-
-static inline u32 nfsd_suppattrs2(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
-			    : NFSD4_SUPPORTED_ATTRS_WORD2;
-}
-
-/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1							    \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
-
-/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
-#define NFSD_WRITEABLE_ATTRS_WORD2 0
-
-#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
-	NFSD_WRITEABLE_ATTRS_WORD0
-/*
- * we currently store the exclusive create verifier in the v_{a,m}time
- * attributes so the client can't set these at create time using EXCLUSIVE4_1
- */
-#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
-	(NFSD_WRITEABLE_ATTRS_WORD1 & \
-	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
-#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
-	NFSD_WRITEABLE_ATTRS_WORD2
-
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* LINUX_NFSD_NFSD_H */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
deleted file mode 100644
index 2af75686e0d3..000000000000
--- a/include/linux/nfsd/state.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- *  linux/include/nfsd/state.h
- *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson <andros@umich.edu>
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _NFSD4_STATE_H
-#define _NFSD4_STATE_H
-
-#include <linux/nfsd/nfsfh.h>
-
-typedef struct {
-	u32             cl_boot;
-	u32             cl_id;
-} clientid_t;
-
-typedef struct {
-	u32             so_boot;
-	u32             so_stateownerid;
-	u32             so_fileid;
-} stateid_opaque_t;
-
-typedef struct {
-	u32                     si_generation;
-	stateid_opaque_t        si_opaque;
-} stateid_t;
-#define si_boot           si_opaque.so_boot
-#define si_stateownerid   si_opaque.so_stateownerid
-#define si_fileid         si_opaque.so_fileid
-
-#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
-#define STATEID_VAL(s) \
-	(s)->si_boot, \
-	(s)->si_stateownerid, \
-	(s)->si_fileid, \
-	(s)->si_generation
-
-struct nfsd4_cb_sequence {
-	/* args/res */
-	u32			cbs_minorversion;
-	struct nfs4_client	*cbs_clp;
-};
-
-struct nfs4_delegation {
-	struct list_head	dl_perfile;
-	struct list_head	dl_perclnt;
-	struct list_head	dl_recall_lru;  /* delegation recalled */
-	atomic_t		dl_count;       /* ref count */
-	struct nfs4_client	*dl_client;
-	struct nfs4_file	*dl_file;
-	struct file_lock	*dl_flock;
-	struct file		*dl_vfs_file;
-	u32			dl_type;
-	time_t			dl_time;
-/* For recall: */
-	u32			dl_ident;
-	stateid_t		dl_stateid;
-	struct knfsd_fh		dl_fh;
-	int			dl_retries;
-};
-
-/* client delegation callback info */
-struct nfs4_cb_conn {
-	/* SETCLIENTID info */
-	struct sockaddr_storage	cb_addr;
-	size_t			cb_addrlen;
-	u32                     cb_prog;
-	u32			cb_minorversion;
-	u32                     cb_ident;	/* minorversion 0 only */
-	/* RPC client info */
-	atomic_t		cb_set;     /* successful CB_NULL call */
-	struct rpc_clnt *       cb_client;
-};
-
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND	16
-/* Maximum  session per slot cache size */
-#define NFSD_SLOT_CACHE_SIZE		1024
-/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
-#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
-#define NFSD_MAX_MEM_PER_SESSION  \
-		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
-
-struct nfsd4_slot {
-	bool	sl_inuse;
-	bool	sl_cachethis;
-	u16	sl_opcnt;
-	u32	sl_seqid;
-	__be32	sl_status;
-	u32	sl_datalen;
-	char	sl_data[];
-};
-
-struct nfsd4_channel_attrs {
-	u32		headerpadsz;
-	u32		maxreq_sz;
-	u32		maxresp_sz;
-	u32		maxresp_cached;
-	u32		maxops;
-	u32		maxreqs;
-	u32		nr_rdma_attrs;
-	u32		rdma_attrs;
-};
-
-struct nfsd4_create_session {
-	clientid_t			clientid;
-	struct nfs4_sessionid		sessionid;
-	u32				seqid;
-	u32				flags;
-	struct nfsd4_channel_attrs	fore_channel;
-	struct nfsd4_channel_attrs	back_channel;
-	u32				callback_prog;
-	u32				uid;
-	u32				gid;
-};
-
-/* The single slot clientid cache structure */
-struct nfsd4_clid_slot {
-	u32				sl_seqid;
-	__be32				sl_status;
-	struct nfsd4_create_session	sl_cr_ses;
-};
-
-struct nfsd4_session {
-	struct kref		se_ref;
-	struct list_head	se_hash;	/* hash by sessionid */
-	struct list_head	se_perclnt;
-	u32			se_flags;
-	struct nfs4_client	*se_client;	/* for expire_client */
-	struct nfs4_sessionid	se_sessionid;
-	struct nfsd4_channel_attrs se_fchannel;
-	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
-};
-
-static inline void
-nfsd4_put_session(struct nfsd4_session *ses)
-{
-	extern void free_session(struct kref *kref);
-	kref_put(&ses->se_ref, free_session);
-}
-
-static inline void
-nfsd4_get_session(struct nfsd4_session *ses)
-{
-	kref_get(&ses->se_ref);
-}
-
-/* formatted contents of nfs4_sessionid */
-struct nfsd4_sessionid {
-	clientid_t	clientid;
-	u32		sequence;
-	u32		reserved;
-};
-
-#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
-
-/*
- * struct nfs4_client - one per client.  Clientids live here.
- * 	o Each nfs4_client is hashed by clientid.
- *
- * 	o Each nfs4_clients is also hashed by name 
- * 	  (the opaque quantity initially sent by the client to identify itself).
- * 	  
- *	o cl_perclient list is used to ensure no dangling stateowner references
- *	  when we expire the nfs4_client
- */
-struct nfs4_client {
-	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
-	struct list_head	cl_strhash; 	/* hash by cl_name */
-	struct list_head	cl_openowners;
-	struct list_head	cl_delegations;
-	struct list_head        cl_lru;         /* tail queue */
-	struct xdr_netobj	cl_name; 	/* id generated by client */
-	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
-	nfs4_verifier		cl_verifier; 	/* generated by client */
-	time_t                  cl_time;        /* time of last lease renewal */
-	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
-	u32			cl_flavor;	/* setclientid pseudoflavor */
-	char			*cl_principal;	/* setclientid principal name */
-	struct svc_cred		cl_cred; 	/* setclientid principal */
-	clientid_t		cl_clientid;	/* generated by server */
-	nfs4_verifier		cl_confirm;	/* generated by server */
-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
-	atomic_t		cl_count;	/* ref count */
-	u32			cl_firststate;	/* recovery dir creation */
-
-	/* for nfs41 */
-	struct list_head	cl_sessions;
-	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
-	u32			cl_exchange_flags;
-	struct nfs4_sessionid	cl_sessionid;
-
-	/* for nfs41 callbacks */
-	/* We currently support a single back channel with a single slot */
-	unsigned long		cl_cb_slot_busy;
-	u32			cl_cb_seq_nr;
-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
-	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
-						/* wait here for slots */
-};
-
-/* struct nfs4_client_reset
- * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
- * upon lease reset, or from upcall to state_daemon (to read in state
- * from non-volitile storage) upon reboot.
- */
-struct nfs4_client_reclaim {
-	struct list_head	cr_strhash;	/* hash by cr_name */
-	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
-};
-
-static inline void
-update_stateid(stateid_t *stateid)
-{
-	stateid->si_generation++;
-}
-
-/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
- * The OPEN response, typically the largest, requires 
- *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
- *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
- *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
- */
-
-#define NFSD4_REPLAY_ISIZE       112 
-
-/*
- * Replay buffer, where the result of the last seqid-mutating operation 
- * is cached. 
- */
-struct nfs4_replay {
-	__be32			rp_status;
-	unsigned int		rp_buflen;
-	char			*rp_buf;
-	unsigned		intrp_allocated;
-	struct knfsd_fh		rp_openfh;
-	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
-};
-
-/*
-* nfs4_stateowner can either be an open_owner, or a lock_owner
-*
-*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
-*         for lock_owner
-*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
-*         for lock_owner
-*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
-*         struct is reaped.
-*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
-*         and is used to ensure no dangling nfs4_stateid references when we 
-*         release a stateowner.
-*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
-*         close is called to reap associated byte-range locks
-*    so_close_lru: (open) stateowner is placed on this list instead of being
-*         reaped (when so_perfilestate is empty) to hold the last close replay.
-*         reaped by laundramat thread after lease period.
-*/
-struct nfs4_stateowner {
-	struct kref		so_ref;
-	struct list_head        so_idhash;   /* hash by so_id */
-	struct list_head        so_strhash;   /* hash by op_name */
-	struct list_head        so_perclient;
-	struct list_head        so_stateids;
-	struct list_head        so_perstateid; /* for lockowners only */
-	struct list_head	so_close_lru; /* tail queue */
-	time_t			so_time; /* time of placement on so_close_lru */
-	int			so_is_open_owner; /* 1=openowner,0=lockowner */
-	u32                     so_id;
-	struct nfs4_client *    so_client;
-	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
-	 * sequence id expected from the client: */
-	u32                     so_seqid;
-	struct xdr_netobj       so_owner;     /* open owner name */
-	int                     so_confirmed; /* successful OPEN_CONFIRM? */
-	struct nfs4_replay	so_replay;
-};
-
-/*
-*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
-*    o fi_perfile list is used to search for conflicting 
-*      share_acces, share_deny on the file.
-*/
-struct nfs4_file {
-	atomic_t		fi_ref;
-	struct list_head        fi_hash;    /* hash by "struct inode *" */
-	struct list_head        fi_stateids;
-	struct list_head	fi_delegations;
-	struct inode		*fi_inode;
-	u32                     fi_id;      /* used with stateowner->so_id 
-					     * for stateid_hashtbl hash */
-	bool			fi_had_conflict;
-};
-
-/*
-* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
-*
-* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
-*
-* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
-* 	st_perfile: file_hashtbl[] entry.
-* 	st_perfile_state: nfs4_stateowner->so_perfilestate
-*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
-* 	st_access_bmap: used only for open stateid
-* 	st_deny_bmap: used only for open stateid
-*	st_openstp: open stateid lock stateid was derived from
-*
-* XXX: open stateids and lock stateids have diverged sufficiently that
-* we should consider defining separate structs for the two cases.
-*/
-
-struct nfs4_stateid {
-	struct list_head              st_hash; 
-	struct list_head              st_perfile;
-	struct list_head              st_perstateowner;
-	struct list_head              st_lockowners;
-	struct nfs4_stateowner      * st_stateowner;
-	struct nfs4_file            * st_file;
-	stateid_t                     st_stateid;
-	struct file                 * st_vfs_file;
-	unsigned long                 st_access_bmap;
-	unsigned long                 st_deny_bmap;
-	struct nfs4_stateid         * st_openstp;
-};
-
-/* flags for preprocess_seqid_op() */
-#define HAS_SESSION             0x00000001
-#define CONFIRM                 0x00000002
-#define OPEN_STATE              0x00000004
-#define LOCK_STATE              0x00000008
-#define RD_STATE	        0x00000010
-#define WR_STATE	        0x00000020
-#define CLOSE_STATE             0x00000040
-
-#define seqid_mutating_err(err)                       \
-	(((err) != nfserr_stale_clientid) &&    \
-	((err) != nfserr_bad_seqid) &&          \
-	((err) != nfserr_stale_stateid) &&      \
-	((err) != nfserr_bad_stateid))
-
-struct nfsd4_compound_state;
-
-extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
-		stateid_t *stateid, int flags, struct file **filp);
-extern void nfs4_lock_state(void);
-extern void nfs4_unlock_state(void);
-extern int nfs4_in_grace(void);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
-extern void put_nfs4_client(struct nfs4_client *clp);
-extern void nfs4_free_stateowner(struct kref *kref);
-extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp);
-extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
-extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern void nfsd4_init_recdir(char *recdir_name);
-extern int nfsd4_recdir_load(void);
-extern void nfsd4_shutdown_recdir(void);
-extern int nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
-extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
-extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
-
-static inline void
-nfs4_put_stateowner(struct nfs4_stateowner *so)
-{
-	kref_put(&so->so_ref, nfs4_free_stateowner);
-}
-
-static inline void
-nfs4_get_stateowner(struct nfs4_stateowner *so)
-{
-	kref_get(&so->so_ref);
-}
-
-#endif   /* NFSD4_STATE_H */
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
deleted file mode 100644
index 58f824d854c2..000000000000
--- a/include/linux/nfsd/xdr.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr.h
- *
- * XDR types for nfsd. This is mainly a typing exercise.
- */
-
-#ifndef LINUX_NFSD_H
-#define LINUX_NFSD_H
-
-#include <linux/vfs.h>
-#include <linux/nfsd/nfsd.h>
-
-struct nfsd_fhandle {
-	struct svc_fh		fh;
-};
-
-struct nfsd_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-};
-
-struct nfsd_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd_readargs {
-	struct svc_fh		fh;
-	__u32			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd_writeargs {
-	svc_fh			fh;
-	__u32			offset;
-	int			len;
-	int			vlen;
-};
-
-struct nfsd_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	struct iattr		attrs;
-};
-
-struct nfsd_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-	
-struct nfsd_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd_readdirargs {
-	struct svc_fh		fh;
-	__u32			cookie;
-	__u32			count;
-	__be32 *		buffer;
-};
-
-struct nfsd_attrstat {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_diropres  {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_readlinkres {
-	int			len;
-};
-
-struct nfsd_readres {
-	struct svc_fh		fh;
-	unsigned long		count;
-	struct kstat		stat;
-};
-
-struct nfsd_readdirres {
-	int			count;
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd_statfsres {
-	struct kstatfs		stats;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd_xdrstore {
-	struct nfsd_sattrargs	sattr;
-	struct nfsd_diropargs	dirop;
-	struct nfsd_readargs	read;
-	struct nfsd_writeargs	write;
-	struct nfsd_createargs	create;
-	struct nfsd_renameargs	rename;
-	struct nfsd_linkargs	link;
-	struct nfsd_symlinkargs	symlink;
-	struct nfsd_readdirargs	readdir;
-};
-
-#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
-
-
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
-
-int nfssvc_encode_entry(void *, const char *name,
-			int namlen, loff_t offset, u64 ino, unsigned int);
-
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-
-/* Helper functions for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
-__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-#endif /* LINUX_NFSD_H */
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
deleted file mode 100644
index 421eddd65a25..000000000000
--- a/include/linux/nfsd/xdr3.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr3.h
- *
- * XDR types for NFSv3 in nfsd.
- *
- * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef _LINUX_NFSD_XDR3_H
-#define _LINUX_NFSD_XDR3_H
-
-#include <linux/nfsd/xdr.h>
-
-struct nfsd3_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-	int			check_guard;
-	time_t			guardtime;
-};
-
-struct nfsd3_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd3_accessargs {
-	struct svc_fh		fh;
-	unsigned int		access;
-};
-
-struct nfsd3_readargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd3_writeargs {
-	svc_fh			fh;
-	__u64			offset;
-	__u32			count;
-	int			stable;
-	__u32			len;
-	int			vlen;
-};
-
-struct nfsd3_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	int			createmode;
-	struct iattr		attrs;
-	__be32 *		verf;
-};
-
-struct nfsd3_mknodargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	__u32			ftype;
-	__u32			major, minor;
-	struct iattr		attrs;
-};
-
-struct nfsd3_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-
-struct nfsd3_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd3_readdirargs {
-	struct svc_fh		fh;
-	__u64			cookie;
-	__u32			dircount;
-	__u32			count;
-	__be32 *		verf;
-	__be32 *		buffer;
-};
-
-struct nfsd3_commitargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-};
-
-struct nfsd3_getaclargs {
-	struct svc_fh		fh;
-	int			mask;
-};
-
-struct posix_acl;
-struct nfsd3_setaclargs {
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-struct nfsd3_attrstat {
-	__be32			status;
-	struct svc_fh		fh;
-	struct kstat            stat;
-};
-
-/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
-struct nfsd3_diropres  {
-	__be32			status;
-	struct svc_fh		dirfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_accessres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			access;
-};
-
-struct nfsd3_readlinkres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			len;
-};
-
-struct nfsd3_readres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			eof;
-};
-
-struct nfsd3_writeres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			committed;
-};
-
-struct nfsd3_renameres {
-	__be32			status;
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-};
-
-struct nfsd3_linkres {
-	__be32			status;
-	struct svc_fh		tfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_readdirres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			count;
-	__be32			verf[2];
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-	__be32 *		offset1;
-	struct svc_rqst *	rqstp;
-
-};
-
-struct nfsd3_fsstatres {
-	__be32			status;
-	struct kstatfs		stats;
-	__u32			invarsec;
-};
-
-struct nfsd3_fsinfores {
-	__be32			status;
-	__u32			f_rtmax;
-	__u32			f_rtpref;
-	__u32			f_rtmult;
-	__u32			f_wtmax;
-	__u32			f_wtpref;
-	__u32			f_wtmult;
-	__u32			f_dtpref;
-	__u64			f_maxfilesize;
-	__u32			f_properties;
-};
-
-struct nfsd3_pathconfres {
-	__be32			status;
-	__u32			p_link_max;
-	__u32			p_name_max;
-	__u32			p_no_trunc;
-	__u32			p_chown_restricted;
-	__u32			p_case_insensitive;
-	__u32			p_case_preserving;
-};
-
-struct nfsd3_commitres {
-	__be32			status;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_getaclres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-/* dummy type for release */
-struct nfsd3_fhandle_pair {
-	__u32			dummy;
-	struct svc_fh		fh1;
-	struct svc_fh		fh2;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd3_xdrstore {
-	struct nfsd3_sattrargs		sattrargs;
-	struct nfsd3_diropargs		diropargs;
-	struct nfsd3_readargs		readargs;
-	struct nfsd3_writeargs		writeargs;
-	struct nfsd3_createargs		createargs;
-	struct nfsd3_renameargs		renameargs;
-	struct nfsd3_linkargs		linkargs;
-	struct nfsd3_symlinkargs	symlinkargs;
-	struct nfsd3_readdirargs	readdirargs;
-	struct nfsd3_diropres 		diropres;
-	struct nfsd3_accessres		accessres;
-	struct nfsd3_readlinkres	readlinkres;
-	struct nfsd3_readres		readres;
-	struct nfsd3_writeres		writeres;
-	struct nfsd3_renameres		renameres;
-	struct nfsd3_linkres		linkres;
-	struct nfsd3_readdirres		readdirres;
-	struct nfsd3_fsstatres		fsstatres;
-	struct nfsd3_fsinfores		fsinfores;
-	struct nfsd3_pathconfres	pathconfres;
-	struct nfsd3_commitres		commitres;
-	struct nfsd3_getaclres		getaclres;
-};
-
-#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
-
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
-
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
-int nfs3svc_encode_entry(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-int nfs3svc_encode_entry_plus(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-/* Helper functions for NFSv3 ACL code */
-__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
-				struct svc_fh *fhp);
-__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-
-#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
deleted file mode 100644
index 1bf266239c7e..000000000000
--- a/include/linux/nfsd/xdr4.h
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  include/linux/nfsd/xdr4.h
- *
- *  Server-side types for NFSv4.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson   <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _LINUX_NFSD_XDR4_H
-#define _LINUX_NFSD_XDR4_H
-
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/nfsd.h>
-
-#define NFSD4_MAX_TAGLEN	128
-#define XDR_LEN(n)                     (((n) + 3) & ~3)
-
-struct nfsd4_compound_state {
-	struct svc_fh		current_fh;
-	struct svc_fh		save_fh;
-	struct nfs4_stateowner	*replay_owner;
-	/* For sessions DRC */
-	struct nfsd4_session	*session;
-	struct nfsd4_slot	*slot;
-	__be32			*datap;
-	size_t			iovlen;
-	u32			minorversion;
-	u32			status;
-};
-
-static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
-{
-	return cs->slot != NULL;
-}
-
-struct nfsd4_change_info {
-	u32		atomic;
-	bool		change_supported;
-	u32		before_ctime_sec;
-	u32		before_ctime_nsec;
-	u64		before_change;
-	u32		after_ctime_sec;
-	u32		after_ctime_nsec;
-	u64		after_change;
-};
-
-struct nfsd4_access {
-	u32		ac_req_access;      /* request */
-	u32		ac_supported;       /* response */
-	u32		ac_resp_access;     /* response */
-};
-
-struct nfsd4_close {
-	u32		cl_seqid;           /* request */
-	stateid_t	cl_stateid;         /* request+response */
-	struct nfs4_stateowner * cl_stateowner;	/* response */
-};
-
-struct nfsd4_commit {
-	u64		co_offset;          /* request */
-	u32		co_count;           /* request */
-	nfs4_verifier	co_verf;            /* response */
-};
-
-struct nfsd4_create {
-	u32		cr_namelen;         /* request */
-	char *		cr_name;            /* request */
-	u32		cr_type;            /* request */
-	union {                             /* request */
-		struct {
-			u32 namelen;
-			char *name;
-		} link;   /* NF4LNK */
-		struct {
-			u32 specdata1;
-			u32 specdata2;
-		} dev;    /* NF4BLK, NF4CHR */
-	} u;
-	u32		cr_bmval[3];        /* request */
-	struct iattr	cr_iattr;           /* request */
-	struct nfsd4_change_info  cr_cinfo; /* response */
-	struct nfs4_acl *cr_acl;
-};
-#define cr_linklen	u.link.namelen
-#define cr_linkname	u.link.name
-#define cr_specdata1	u.dev.specdata1
-#define cr_specdata2	u.dev.specdata2
-
-struct nfsd4_delegreturn {
-	stateid_t	dr_stateid;
-};
-
-struct nfsd4_getattr {
-	u32		ga_bmval[3];        /* request */
-	struct svc_fh	*ga_fhp;            /* response */
-};
-
-struct nfsd4_link {
-	u32		li_namelen;         /* request */
-	char *		li_name;            /* request */
-	struct nfsd4_change_info  li_cinfo; /* response */
-};
-
-struct nfsd4_lock_denied {
-	clientid_t	ld_clientid;
-	struct nfs4_stateowner   *ld_sop;
-	u64             ld_start;
-	u64             ld_length;
-	u32             ld_type;
-};
-
-struct nfsd4_lock {
-	/* request */
-	u32             lk_type;
-	u32             lk_reclaim;         /* boolean */
-	u64             lk_offset;
-	u64             lk_length;
-	u32             lk_is_new;
-	union {
-		struct {
-			u32             open_seqid;
-			stateid_t       open_stateid;
-			u32             lock_seqid;
-			clientid_t      clientid;
-			struct xdr_netobj owner;
-		} new;
-		struct {
-			stateid_t       lock_stateid;
-			u32             lock_seqid;
-		} old;
-	} v;
-
-	/* response */
-	union {
-		struct {
-			stateid_t               stateid;
-		} ok;
-		struct nfsd4_lock_denied        denied;
-	} u;
-	/* The lk_replay_owner is the open owner in the open_to_lock_owner
-	 * case and the lock owner otherwise: */
-	struct nfs4_stateowner *lk_replay_owner;
-};
-#define lk_new_open_seqid       v.new.open_seqid
-#define lk_new_open_stateid     v.new.open_stateid
-#define lk_new_lock_seqid       v.new.lock_seqid
-#define lk_new_clientid         v.new.clientid
-#define lk_new_owner            v.new.owner
-#define lk_old_lock_stateid     v.old.lock_stateid
-#define lk_old_lock_seqid       v.old.lock_seqid
-
-#define lk_rflags       u.ok.rflags
-#define lk_resp_stateid u.ok.stateid
-#define lk_denied       u.denied
-
-
-struct nfsd4_lockt {
-	u32				lt_type;
-	clientid_t			lt_clientid;
-	struct xdr_netobj		lt_owner;
-	u64				lt_offset;
-	u64				lt_length;
-	struct nfs4_stateowner * 	lt_stateowner;
-	struct nfsd4_lock_denied  	lt_denied;
-};
-
- 
-struct nfsd4_locku {
-	u32             lu_type;
-	u32             lu_seqid;
-	stateid_t       lu_stateid;
-	u64             lu_offset;
-	u64             lu_length;
-	struct nfs4_stateowner  *lu_stateowner;
-};
-
-
-struct nfsd4_lookup {
-	u32		lo_len;             /* request */
-	char *		lo_name;            /* request */
-};
-
-struct nfsd4_putfh {
-	u32		pf_fhlen;           /* request */
-	char		*pf_fhval;          /* request */
-};
-
-struct nfsd4_open {
-	u32		op_claim_type;      /* request */
-	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
-	u32		op_delegate_type;   /* request - CLAIM_PREV only */
-	stateid_t       op_delegate_stateid; /* request - response */
-	u32		op_create;     	    /* request */
-	u32		op_createmode;      /* request */
-	u32		op_bmval[3];        /* request */
-	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
-	nfs4_verifier	verf;               /* EXCLUSIVE4 */
-	clientid_t	op_clientid;        /* request */
-	struct xdr_netobj op_owner;           /* request */
-	u32		op_seqid;           /* request */
-	u32		op_share_access;    /* request */
-	u32		op_share_deny;      /* request */
-	stateid_t	op_stateid;         /* response */
-	u32		op_recall;          /* recall */
-	struct nfsd4_change_info  op_cinfo; /* response */
-	u32		op_rflags;          /* response */
-	int		op_truncate;        /* used during processing */
-	struct nfs4_stateowner *op_stateowner; /* used during processing */
-	struct nfs4_acl *op_acl;
-};
-#define op_iattr	iattr
-#define op_verf		verf
-
-struct nfsd4_open_confirm {
-	stateid_t	oc_req_stateid		/* request */;
-	u32		oc_seqid    		/* request */;
-	stateid_t	oc_resp_stateid		/* response */;
-	struct nfs4_stateowner * oc_stateowner;	/* response */
-};
-
-struct nfsd4_open_downgrade {
-	stateid_t       od_stateid;
-	u32             od_seqid;
-	u32             od_share_access;
-	u32             od_share_deny;
-	struct nfs4_stateowner *od_stateowner;
-};
-
-
-struct nfsd4_read {
-	stateid_t	rd_stateid;         /* request */
-	u64		rd_offset;          /* request */
-	u32		rd_length;          /* request */
-	int		rd_vlen;
-	struct file     *rd_filp;
-	
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-};
-
-struct nfsd4_readdir {
-	u64		rd_cookie;          /* request */
-	nfs4_verifier	rd_verf;            /* request */
-	u32		rd_dircount;        /* request */
-	u32		rd_maxcount;        /* request */
-	u32		rd_bmval[3];        /* request */
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd4_release_lockowner {
-	clientid_t        rl_clientid;
-	struct xdr_netobj rl_owner;
-};
-struct nfsd4_readlink {
-	struct svc_rqst *rl_rqstp;          /* request */
-	struct svc_fh *	rl_fhp;             /* request */
-};
-
-struct nfsd4_remove {
-	u32		rm_namelen;         /* request */
-	char *		rm_name;            /* request */
-	struct nfsd4_change_info  rm_cinfo; /* response */
-};
-
-struct nfsd4_rename {
-	u32		rn_snamelen;        /* request */
-	char *		rn_sname;           /* request */
-	u32		rn_tnamelen;        /* request */
-	char *		rn_tname;           /* request */
-	struct nfsd4_change_info  rn_sinfo; /* response */
-	struct nfsd4_change_info  rn_tinfo; /* response */
-};
-
-struct nfsd4_secinfo {
-	u32 si_namelen;					/* request */
-	char *si_name;					/* request */
-	struct svc_export *si_exp;			/* response */
-};
-
-struct nfsd4_setattr {
-	stateid_t	sa_stateid;         /* request */
-	u32		sa_bmval[3];        /* request */
-	struct iattr	sa_iattr;           /* request */
-	struct nfs4_acl *sa_acl;
-};
-
-struct nfsd4_setclientid {
-	nfs4_verifier	se_verf;            /* request */
-	u32		se_namelen;         /* request */
-	char *		se_name;            /* request */
-	u32		se_callback_prog;   /* request */
-	u32		se_callback_netid_len;  /* request */
-	char *		se_callback_netid_val;  /* request */
-	u32		se_callback_addr_len;   /* request */
-	char *		se_callback_addr_val;   /* request */
-	u32		se_callback_ident;  /* request */
-	clientid_t	se_clientid;        /* response */
-	nfs4_verifier	se_confirm;         /* response */
-};
-
-struct nfsd4_setclientid_confirm {
-	clientid_t	sc_clientid;
-	nfs4_verifier	sc_confirm;
-};
-
-/* also used for NVERIFY */
-struct nfsd4_verify {
-	u32		ve_bmval[3];        /* request */
-	u32		ve_attrlen;         /* request */
-	char *		ve_attrval;         /* request */
-};
-
-struct nfsd4_write {
-	stateid_t	wr_stateid;         /* request */
-	u64		wr_offset;          /* request */
-	u32		wr_stable_how;      /* request */
-	u32		wr_buflen;          /* request */
-	int		wr_vlen;
-
-	u32		wr_bytes_written;   /* response */
-	u32		wr_how_written;     /* response */
-	nfs4_verifier	wr_verifier;        /* response */
-};
-
-struct nfsd4_exchange_id {
-	nfs4_verifier	verifier;
-	struct xdr_netobj clname;
-	u32		flags;
-	clientid_t	clientid;
-	u32		seqid;
-	int		spa_how;
-};
-
-struct nfsd4_sequence {
-	struct nfs4_sessionid	sessionid;		/* request/response */
-	u32			seqid;			/* request/response */
-	u32			slotid;			/* request/response */
-	u32			maxslots;		/* request/response */
-	u32			cachethis;		/* request */
-#if 0
-	u32			target_maxslots;	/* response */
-	u32			status_flags;		/* response */
-#endif /* not yet */
-};
-
-struct nfsd4_destroy_session {
-	struct nfs4_sessionid	sessionid;
-};
-
-struct nfsd4_op {
-	int					opnum;
-	__be32					status;
-	union {
-		struct nfsd4_access		access;
-		struct nfsd4_close		close;
-		struct nfsd4_commit		commit;
-		struct nfsd4_create		create;
-		struct nfsd4_delegreturn	delegreturn;
-		struct nfsd4_getattr		getattr;
-		struct svc_fh *			getfh;
-		struct nfsd4_link		link;
-		struct nfsd4_lock		lock;
-		struct nfsd4_lockt		lockt;
-		struct nfsd4_locku		locku;
-		struct nfsd4_lookup		lookup;
-		struct nfsd4_verify		nverify;
-		struct nfsd4_open		open;
-		struct nfsd4_open_confirm	open_confirm;
-		struct nfsd4_open_downgrade	open_downgrade;
-		struct nfsd4_putfh		putfh;
-		struct nfsd4_read		read;
-		struct nfsd4_readdir		readdir;
-		struct nfsd4_readlink		readlink;
-		struct nfsd4_remove		remove;
-		struct nfsd4_rename		rename;
-		clientid_t			renew;
-		struct nfsd4_secinfo		secinfo;
-		struct nfsd4_setattr		setattr;
-		struct nfsd4_setclientid	setclientid;
-		struct nfsd4_setclientid_confirm setclientid_confirm;
-		struct nfsd4_verify		verify;
-		struct nfsd4_write		write;
-		struct nfsd4_release_lockowner	release_lockowner;
-
-		/* NFSv4.1 */
-		struct nfsd4_exchange_id	exchange_id;
-		struct nfsd4_create_session	create_session;
-		struct nfsd4_destroy_session	destroy_session;
-		struct nfsd4_sequence		sequence;
-	} u;
-	struct nfs4_replay *			replay;
-};
-
-struct nfsd4_compoundargs {
-	/* scratch variables for XDR decode */
-	__be32 *			p;
-	__be32 *			end;
-	struct page **			pagelist;
-	int				pagelen;
-	__be32				tmp[8];
-	__be32 *			tmpp;
-	struct tmpbuf {
-		struct tmpbuf *next;
-		void (*release)(const void *);
-		void *buf;
-	}				*to_free;
-
-	struct svc_rqst			*rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				minorversion;
-	u32				opcnt;
-	struct nfsd4_op			*ops;
-	struct nfsd4_op			iops[8];
-};
-
-struct nfsd4_compoundres {
-	/* scratch variables for XDR encode */
-	__be32 *			p;
-	__be32 *			end;
-	struct xdr_buf *		xbuf;
-	struct svc_rqst *		rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				opcnt;
-	__be32 *			tagp; /* tag, opcount encode location */
-	struct nfsd4_compound_state	cstate;
-};
-
-static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
-}
-
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-{
-	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
-}
-
-#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
-	cinfo->atomic = 1;
-	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
-	if (cinfo->change_supported) {
-		cinfo->before_change = fhp->fh_pre_change;
-		cinfo->after_change = fhp->fh_post_change;
-	} else {
-		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-	}
-}
-
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
-void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
-void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		       struct dentry *dentry, __be32 *buffer, int *countp,
-		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
-extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
-extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
-extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
-extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
-		struct nfsd4_sequence *seq);
-extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-struct nfsd4_exchange_id *);
-		extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
-extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
-extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
-		struct nfsd4_open *open);
-extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
-		struct svc_fh *current_fh, struct nfsd4_open *open);
-extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
-extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
-extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
-extern __be32
-nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
-extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
-extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
-#endif
-
-/*
- * Local variables:
- *  c-basic-offset: 8
- * End:
- */
-- 
cgit v1.2.3


From e8e8753f7a32ce4f636771126fc8eba0dc4ad817 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 14 Dec 2009 12:53:32 -0500
Subject: nfsd: new interface to advertise export features

Soon we will add the new V4ROOT flag, and allow the INSECURE flag to
vary by pseudoflavor.  It would be useful for nfs-utils (for example,
for improved exportfs error reporting) to be able to know when this
happens.  Use this new interface for that purpose.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c            | 21 +++++++++++++++++++++
 include/linux/nfsd/export.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0415680d3f58..e7051ac4dc73 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -31,6 +31,7 @@ enum {
 	NFSD_Getfd,
 	NFSD_Getfs,
 	NFSD_List,
+	NFSD_Export_features,
 	NFSD_Fh,
 	NFSD_FO_UnlockIP,
 	NFSD_FO_UnlockFS,
@@ -149,6 +150,24 @@ static const struct file_operations exports_operations = {
 	.owner		= THIS_MODULE,
 };
 
+static int export_features_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
+	return 0;
+}
+
+static int export_features_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, export_features_show, NULL);
+}
+
+static struct file_operations export_features_operations = {
+	.open		= export_features_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
@@ -1306,6 +1325,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+		[NFSD_Export_features] = {"export_features",
+					&export_features_operations, S_IRUGO},
 		[NFSD_FO_UnlockIP] = {"unlock_ip",
 					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index ef3d416fcf67..4f1df1d7312c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,7 +39,8 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
-#define NFSEXP_ALLFLAGS		0xFE3F
+/* All flags that we claim to support.  (Note we don't support NOACL.) */
+#define NFSEXP_ALLFLAGS		0x7E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From 12045a6ee9908b38b6d286530c7d816e39071346 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 8 Dec 2009 18:15:52 -0500
Subject: nfsd: let "insecure" flag vary by pseudoflavor

This was an oversight; it should be among the export flags that can be
allowed to vary by pseudoflavor.  This allows an administrator to (for
example) allow auth_sys mounts only from low ports, but allow auth_krb5
mounts to use any port.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsfh.c             | 4 +++-
 include/linux/nfsd/export.h | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0eb1c59f5ab8..951938d6c495 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -88,8 +88,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
 static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 					  struct svc_export *exp)
 {
+	int flags = nfsexp_flags(rqstp, exp);
+
 	/* Check if the request originated from a secure port. */
-	if (!rqstp->rq_secure && EX_SECURE(exp)) {
+	if (!rqstp->rq_secure && (flags & NFSEXP_INSECURE_PORT)) {
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 		dprintk(KERN_WARNING
 		       "nfsd: request from insecure port %s!\n",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4f1df1d7312c..4cafbe1255f0 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -44,7 +44,8 @@
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-					| NFSEXP_ALLSQUASH)
+					| NFSEXP_ALLSQUASH \
+					| NFSEXP_INSECURE_PORT)
 
 #ifdef __KERNEL__
 
@@ -109,7 +110,6 @@ struct svc_expkey {
 	struct path		ek_path;
 };
 
-#define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
 #define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-- 
cgit v1.2.3


From 9e1b9b80721661bd63b3662453767b22cd614fe7 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 7 Nov 2009 21:03:54 +0000
Subject: module: make MODULE_SYMBOL_PREFIX into a CONFIG option

The next commit will require the use of MODULE_SYMBOL_PREFIX in
.tmp_exports-asm.S.  Currently it is mixed in with C structure
definitions in "asm/module.h".  Move the definition of this arch option
into Kconfig, so it can be easily accessed by any code.

This also lets modpost.c use the same definition.  Previously modpost
relied on a hardcoded list of architectures in mk_elfconfig.c.

A build test for blackfin, one of the two MODULE_SYMBOL_PREFIX archs,
showed the generated code was unchanged.  vmlinux was identical save
for build ids, and an apparently randomized suffix on a single "__key"
symbol in the kallsyms data).

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Mike Frysinger <vapier@gentoo.org> (blackfin)
CC: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/blackfin/Kconfig              | 4 ++++
 arch/blackfin/include/asm/module.h | 2 --
 arch/blackfin/kernel/vmlinux.lds.S | 2 --
 arch/h8300/Kconfig                 | 4 ++++
 arch/h8300/include/asm/module.h    | 2 --
 arch/h8300/kernel/vmlinux.lds.S    | 1 -
 include/asm-generic/vmlinux.lds.h  | 8 ++++++--
 include/linux/module.h             | 6 ++++--
 scripts/Makefile.lib               | 5 +++++
 scripts/mod/Makefile               | 2 +-
 scripts/mod/mk_elfconfig.c         | 9 ---------
 scripts/mod/modpost.c              | 9 +++++++++
 12 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ae6a60f10120..2180433213b7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -5,6 +5,10 @@
 
 mainmenu "Blackfin Kernel Configuration"
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	def_bool n
 
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index 9c1cfffddd9b..4282b169ead9 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,8 +7,6 @@
 #ifndef _ASM_BFIN_MODULE_H
 #define _ASM_BFIN_MODULE_H
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #define Elf_Shdr        Elf32_Shdr
 #define Elf_Sym         Elf32_Sym
 #define Elf_Ehdr        Elf32_Ehdr
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 10e12539000e..f39707c6590d 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -4,8 +4,6 @@
  * Licensed under the GPL-2 or later
  */
 
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/mem_map.h>
 #include <asm/page.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 9420648352b8..53cc669e6d59 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -10,6 +10,10 @@ config H8300
 	default y
 	select HAVE_IDE
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	bool
 	default n
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
index de23231f3196..8e46724b7c09 100644
--- a/arch/h8300/include/asm/module.h
+++ b/arch/h8300/include/asm/module.h
@@ -8,6 +8,4 @@ struct mod_arch_specific { };
 #define Elf_Sym Elf32_Sym
 #define Elf_Ehdr Elf32_Ehdr
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index b9e24907e6ea..03d356d96e5d 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,3 @@
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b6e818f4b247..67e652068e0e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,8 +52,12 @@
 #define LOAD_OFFSET 0
 #endif
 
-#ifndef VMLINUX_SYMBOL
-#define VMLINUX_SYMBOL(_sym_) _sym_
+#ifndef SYMBOL_PREFIX
+#define VMLINUX_SYMBOL(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
 #endif
 
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 482efc865acf..6cb1a3cab5d3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -25,8 +25,10 @@
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
-/* some toolchains uses a `_' prefix for all user symbols */
-#ifndef MODULE_SYMBOL_PREFIX
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
 #define MODULE_SYMBOL_PREFIX ""
 #endif
 
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ffdafb26f539..224d85e72ef1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \
 		$(CFLAGS_GCOV))
 endif
 
+ifdef CONFIG_SYMBOL_PREFIX
+_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))
+endif
+
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 11d69c35e5b4..ff954f8168c1 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -8,7 +8,7 @@ modpost-objs	:= modpost.o file2alias.o sumversion.o
 $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
 
 quiet_cmd_elfconfig = MKELF   $@
-      cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@
+      cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@
 
 $(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE
 	$(call if_changed,elfconfig)
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 6a96d47bd1e6..639bca7ba559 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -9,9 +9,6 @@ main(int argc, char **argv)
 	unsigned char ei[EI_NIDENT];
 	union { short s; char c[2]; } endian_test;
 
-	if (argc != 2) {
-		fprintf(stderr, "Error: no arch\n");
-	}
 	if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) {
 		fprintf(stderr, "Error: input truncated\n");
 		return 1;
@@ -55,12 +52,6 @@ main(int argc, char **argv)
 	else
 		exit(1);
 
-	if ((strcmp(argv[1], "h8300") == 0)
-	    || (strcmp(argv[1], "blackfin") == 0))
-		printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
-	else
-		printf("#define MODULE_SYMBOL_PREFIX \"\"\n");
-
 	return 0;
 }
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 801a16a17545..fb0f9b711af3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -15,8 +15,17 @@
 #include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
+#include "../../include/linux/autoconf.h"
 #include "../../include/linux/license.h"
 
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define MODULE_SYMBOL_PREFIX ""
+#endif
+
+
 /* Are we using CONFIG_MODVERSIONS? */
 int modversions = 0;
 /* Warn about undefined symbols? (do so if we have vmlinux) */
-- 
cgit v1.2.3


From e36c54582c6f14adc9e10473e2aec2cc4f0acc03 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Dec 2009 15:58:33 -0500
Subject: tracing: Fix return of trace_dump_stack()

The trace_dump_stack() returned a value for a void function.

Also, added the missing stub for trace_dump_stack() when tracing is
not configured.

Reported-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20091214162713.GA31060@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 1 +
 kernel/trace/trace.c   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5ad4199fb073..f1dc752da0d2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -527,6 +527,7 @@ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
+static inline void trace_dump_stack(void) { }
 static inline int
 trace_printk(const char *fmt, ...)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bd7b969a729a..ee61915935d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1158,7 +1158,7 @@ void trace_dump_stack(void)
 	unsigned long flags;
 
 	if (tracing_disabled || tracing_selftest_running)
-		return 0;
+		return;
 
 	local_save_flags(flags);
 
-- 
cgit v1.2.3


From 0b5ccb2ee250136dd7385b1c7da28417d0d4d32d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 15 Dec 2009 16:59:18 +0100
Subject: ipv6: reassembly: use seperate reassembly queues for conntrack and
 local delivery

Currently the same reassembly queue might be used for packets reassembled
by conntrack in different positions in the stack (PREROUTING/LOCAL_OUT),
as well as local delivery. This can cause "packet jumps" when the fragment
completing a reassembled packet is queued from a different position in the
stack than the previous ones.

Add a "user" identifier to the reassembly queue key to seperate the queues
of each caller, similar to what we do for IPv4.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ipv6.h                             |  7 +++++++
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 13 +++++++++++--
 net/ipv6/netfilter/nf_conntrack_reasm.c        |  7 ++++---
 net/ipv6/reassembly.c                          |  5 ++++-
 5 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 92db8617d188..d6916035bcea 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -350,8 +350,15 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1,
 
 struct inet_frag_queue;
 
+enum ip6_defrag_users {
+	IP6_DEFRAG_LOCAL_DELIVER,
+	IP6_DEFRAG_CONNTRACK_IN,
+	IP6_DEFRAG_CONNTRACK_OUT,
+};
+
 struct ip6_create_arg {
 	__be32 id;
+	u32 user;
 	struct in6_addr *src;
 	struct in6_addr *dst;
 };
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index abc55ad75c2b..1ee717eb5b09 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -9,7 +9,7 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
 extern int nf_ct_frag6_init(void);
 extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
 extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 			       struct net_device *in,
 			       struct net_device *out,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 5f2ec208a8c3..c0a82fe78321 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -187,6 +187,16 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+						struct sk_buff *skb)
+{
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP6_DEFRAG_CONNTRACK_IN;
+	else
+		return IP6_DEFRAG_CONNTRACK_OUT;
+
+}
+
 static unsigned int ipv6_defrag(unsigned int hooknum,
 				struct sk_buff *skb,
 				const struct net_device *in,
@@ -199,8 +209,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	if (skb->nfct)
 		return NF_ACCEPT;
 
-	reasm = nf_ct_frag6_gather(skb);
-
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e0b9424fa1b2..312c20adc83f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -168,13 +168,14 @@ out:
 /* Creation primitives. */
 
 static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
 	unsigned int hash;
 
 	arg.id = id;
+	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
 
@@ -559,7 +560,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 	return 0;
 }
 
-struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 {
 	struct sk_buff *clone;
 	struct net_device *dev = skb->dev;
@@ -605,7 +606,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
 		nf_ct_frag6_evictor();
 
-	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
+	fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4d98549a6868..3b3a95607125 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -72,6 +72,7 @@ struct frag_queue
 	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
+	u32			user;
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
@@ -141,7 +142,7 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 	struct ip6_create_arg *arg = a;
 
 	fq = container_of(q, struct frag_queue, q);
-	return (fq->id == arg->id &&
+	return (fq->id == arg->id && fq->user == arg->user &&
 			ipv6_addr_equal(&fq->saddr, arg->src) &&
 			ipv6_addr_equal(&fq->daddr, arg->dst));
 }
@@ -163,6 +164,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 	struct ip6_create_arg *arg = a;
 
 	fq->id = arg->id;
+	fq->user = arg->user;
 	ipv6_addr_copy(&fq->saddr, arg->src);
 	ipv6_addr_copy(&fq->daddr, arg->dst);
 }
@@ -243,6 +245,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	unsigned int hash;
 
 	arg.id = id;
+	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
 
-- 
cgit v1.2.3


From 8fa9ff6849bb86c59cc2ea9faadf3cb2d5223497 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 15 Dec 2009 16:59:59 +0100
Subject: netfilter: fix crashes in bridge netfilter caused by fragment jumps

When fragments from bridge netfilter are passed to IPv4 or IPv6 conntrack
and a reassembly queue with the same fragment key already exists from
reassembling a similar packet received on a different device (f.i. with
multicasted fragments), the reassembled packet might continue on a different
codepath than where the head fragment originated. This can cause crashes
in bridge netfilter when a fragment received on a non-bridge device (and
thus with skb->nf_bridge == NULL) continues through the bridge netfilter
code.

Add a new reassembly identifier for packets originating from bridge
netfilter and use it to put those packets in insolated queues.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14805

Reported-and-Tested-by: Chong Qiao <qiaochong@loongson.cn>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ip.h                               |  1 +
 include/net/ipv6.h                             |  1 +
 net/ipv4/netfilter/nf_defrag_ipv4.c            | 21 +++++++++++++++++----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ++++++
 4 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index e6b9d12d5f62..85108cfbb1ae 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -337,6 +337,7 @@ enum ip_defrag_users {
 	IP_DEFRAG_CALL_RA_CHAIN,
 	IP_DEFRAG_CONNTRACK_IN,
 	IP_DEFRAG_CONNTRACK_OUT,
+	IP_DEFRAG_CONNTRACK_BRIDGE_IN,
 	IP_DEFRAG_VS_IN,
 	IP_DEFRAG_VS_OUT,
 	IP_DEFRAG_VS_FWD
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d6916035bcea..ccab5946c830 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -354,6 +354,7 @@ enum ip6_defrag_users {
 	IP6_DEFRAG_LOCAL_DELIVER,
 	IP6_DEFRAG_CONNTRACK_IN,
 	IP6_DEFRAG_CONNTRACK_OUT,
+	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
 };
 
 struct ip6_create_arg {
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index fa2d6b6fc3e5..331ead3ebd1b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -14,6 +14,7 @@
 #include <net/route.h>
 #include <net/ip.h>
 
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
@@ -34,6 +35,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 	return err;
 }
 
+static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
+					      struct sk_buff *skb)
+{
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP_DEFRAG_CONNTRACK_BRIDGE_IN;
+#endif
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP_DEFRAG_CONNTRACK_IN;
+	else
+		return IP_DEFRAG_CONNTRACK_OUT;
+}
+
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  struct sk_buff *skb,
 					  const struct net_device *in,
@@ -50,10 +65,8 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 	/* Gather fragments. */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (nf_ct_ipv4_gather_frags(skb,
-					    hooknum == NF_INET_PRE_ROUTING ?
-					    IP_DEFRAG_CONNTRACK_IN :
-					    IP_DEFRAG_CONNTRACK_OUT))
+		enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+		if (nf_ct_ipv4_gather_frags(skb, user))
 			return NF_STOLEN;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c0a82fe78321..0956ebabbff2 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -20,6 +20,7 @@
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -190,6 +191,11 @@ out:
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 						struct sk_buff *skb)
 {
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN;
+#endif
 	if (hooknum == NF_INET_PRE_ROUTING)
 		return IP6_DEFRAG_CONNTRACK_IN;
 	else
-- 
cgit v1.2.3


From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:53 -0800
Subject: rpc: add rpc_queue_empty function

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/sched.c           | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 1906782ec86b..9157405f9320 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -229,6 +229,7 @@ void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
+int		rpc_queue_empty(struct rpc_wait_queue *);
 void		rpc_delay(struct rpc_task *, unsigned long);
 void *		rpc_malloc(struct rpc_task *, size_t);
 void		rpc_free(void *);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef74ba0666c..89ea8e69ec78 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
 		__rpc_do_wake_up_task(queue, task);
 }
 
+/*
+ * Tests whether rpc queue is empty
+ */
+int rpc_queue_empty(struct rpc_wait_queue *queue)
+{
+	int res;
+
+	spin_lock_bh(&queue->lock);
+	res = queue->qlen;
+	spin_unlock_bh(&queue->lock);
+	return (res == 0);
+}
+EXPORT_SYMBOL_GPL(rpc_queue_empty);
+
 /*
  * Wake up a task on a specific queue
  */
-- 
cgit v1.2.3


From cf3b01b54880debb01ea7d471123da5887a7c2cb Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:55 -0800
Subject: rpc: add a new priority in RPC task

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9157405f9320..7bc7fd5291ce 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -173,7 +173,8 @@ struct rpc_task_setup {
 #define RPC_PRIORITY_LOW	(-1)
 #define RPC_PRIORITY_NORMAL	(0)
 #define RPC_PRIORITY_HIGH	(1)
-#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
+#define RPC_PRIORITY_PRIVILEGED	(2)
+#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW)
 
 struct rpc_timer {
 	struct timer_list timer;
@@ -255,6 +256,16 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 	return __rpc_wait_for_completion_task(task, NULL);
 }
 
+static inline void rpc_task_set_priority(struct rpc_task *task, unsigned char prio)
+{
+	task->tk_priority = prio - RPC_PRIORITY_LOW;
+}
+
+static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char prio)
+{
+	return (task->tk_priority + RPC_PRIORITY_LOW == prio);
+}
+
 #ifdef RPC_DEBUG
 static inline const char * rpc_qname(struct rpc_wait_queue *q)
 {
-- 
cgit v1.2.3


From eb4c86c6a5adec423c9e615d4937fdddd06a16c5 Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Wed, 9 Sep 2009 14:58:22 -0400
Subject: nfsd: introduce export flag for v4 pseudoroot

NFSv4 differs from v2 and v3 in that it presents a single unified
filesystem tree, whereas v2 and v3 exported multiple filesystem (whose
roots could be found using a separate mount protocol).

Our original NFSv4 server implementation asked the administrator to
designate a single filesystem as the NFSv4 root, then to mount
filesystems they wished to export underneath.  (Often using bind mounts
of already-existing filesystems.)

This was conceptually simple, and allowed easy implementation, but
created a serious obstacle to upgrading between v2/v3: since the paths
to v4 filesystems were different, administrators would have to adjust
all the paths in client-side mount commands when switching to v4.

Various workarounds are possible.  For example, the administrator could
export "/" and designate it as the v4 root.  However, the security risks
of that approach are obvious, and in any case we shouldn't be requiring
the administrator to take extra steps to fix this problem; instead, the
server should present consistent paths across different versions by
default.

These patches take a modified version of that approach: we provide a new
export option which exports only a subset of a filesystem.  With this
flag, it becomes safe for mountd to export "/" by default, with no need
for additional configuration.

We begin just by defining the new flag.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c            |  1 +
 include/linux/nfsd/export.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cb3dae2fcd86..c64d55f319bd 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1425,6 +1425,7 @@ static struct flags {
 	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+	{ NFSEXP_V4ROOT, {"v4root", ""}},
 #ifdef MSNFS
 	{ NFSEXP_MSNFS, {"msnfs", ""}},
 #endif
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4cafbe1255f0..41f0d4e25374 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,6 +39,16 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
+/*
+ * The NFSEXP_V4ROOT flag causes the kernel to give access only to NFSv4
+ * clients, and only to the single directory that is the root of the
+ * export; further lookup and readdir operations are treated as if every
+ * subdirectory was a mountpoint, and ignored if they are not themselves
+ * exported.  This is used by nfsd and mountd to construct the NFSv4
+ * pseudofilesystem, which provides access only to paths leading to each
+ * exported filesystem.
+ */
+#define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
 #define NFSEXP_ALLFLAGS		0x7E3F
 
-- 
cgit v1.2.3


From f13c12c634e124d5d31f912b969d542a016d6105 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 15 Dec 2009 19:43:11 +0100
Subject: perf_events: Fix perf_event_attr layout

The miss-alignment of bp_addr created a 32bit hole, causing
different structure packings on 32 and 64 bit machines.

Fix that by moving __reserve_2 into that hole.

Further, remove the useless struct and redundant __bp_reserve
muck.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1260902591.8023.781.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 12 +++---------
 kernel/perf_event.c        |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 64a53f74c9a9..5fcbf7d2712a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,17 +211,11 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	struct { /* Hardware breakpoint info */
-		__u64		bp_addr;
-		__u32		bp_type;
-		__u32		bp_len;
-		__u64		__bp_reserved_1;
-		__u64		__bp_reserved_2;
-	};
-
 	__u32			__reserved_2;
 
-	__u64			__reserved_3;
+	__u64			bp_addr;
+	__u32			bp_type;
+	__u32			bp_len;
 };
 
 /*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8823b0885183..0dd8e5d02c66 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4564,7 +4564,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+	if (attr->__reserved_1 || attr->__reserved_2)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f2511774863487e61b56a97da07ebf8dd61d7836 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 13 Dec 2009 20:29:01 +0100
Subject: PM: Add initcall_debug style timing for suspend/resume

In order to diagnose overall suspend/resume times, we need
basic instrumentation to break down the total time into per
device timing, similar to initcall_debug.

This patch adds the basic timing instrumentation, needed
for a scritps/bootgraph.pl equivalent or humans.
The bootgraph.pl program is still a work in progress, but
is far enough along to know that this patch is sufficient.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 31 +++++++++++++++++++++++++++++++
 include/linux/init.h      |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 8aa2443182d5..30f0ceebd36c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -25,6 +25,7 @@
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 
 #include "../base.h"
 #include "power.h"
@@ -172,6 +173,13 @@ static int pm_op(struct device *dev,
 		 pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -219,6 +227,14 @@ static int pm_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
@@ -236,6 +252,13 @@ static int pm_noirq_op(struct device *dev,
 			pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s_i+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -283,6 +306,14 @@ static int pm_noirq_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		printk("initcall %s_i+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
diff --git a/include/linux/init.h b/include/linux/init.h
index ff8bde520d03..ab1d31f9352b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -149,6 +149,8 @@ void prepare_namespace(void);
 
 extern void (*late_time_init)(void);
 
+extern int initcall_debug;
+
 #endif
   
 #ifndef MODULE
-- 
cgit v1.2.3


From 3d8986c7585457c45fd349b2c542c7c1ecd20843 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Dec 2009 14:09:03 -0500
Subject: nfsd: enable V4ROOT exports

With the v4root option now enforced everywhere it should be, it is safe
to advertise support for it to mountd.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 41f0d4e25374..8ae78a61eea4 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -50,7 +50,7 @@
  */
 #define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x7E3F
+#define NFSEXP_ALLFLAGS		0x17E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From c7af6b0895229bd080b86afc91302b66f6df0378 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 18:29:33 -0500
Subject: nfsd: remove unused field rq_reffh

This field is never referenced anywhere else.  I don't know what it was
intended for.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d1567d627557..5a3085b9b394 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -273,10 +273,6 @@ struct svc_rqst {
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	struct knfsd_fh *	rq_reffh;	/* Referrence filehandle, used to
-						 * determine what device number
-						 * to report (real or virtual)
-						 */
 	int			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-- 
cgit v1.2.3


From 1557aca7904ed6fadd22cdc3364754070bb3d3c3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 19:36:06 -0500
Subject: nfsd: move most of nfsfh.h to fs/nfsd

Most of this can be trivially moved to a private header as well.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c           |   1 +
 fs/nfsd/nfsfh.h            | 208 +++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h            |   1 +
 fs/nfsd/vfs.h              |   2 +
 fs/nfsd/xdr.h              |   1 +
 include/linux/nfsd/nfsfh.h | 199 -------------------------------------------
 6 files changed, 213 insertions(+), 199 deletions(-)
 create mode 100644 fs/nfsd/nfsfh.h

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7d5ba1b0ffcf..b26a3644fbb9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
 #include <net/ipv6.h>
 
 #include "nfsd.h"
+#include "nfsfh.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
+/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+
+#ifndef _LINUX_NFSD_FH_INT_H
+#define _LINUX_NFSD_FH_INT_H
+
+#include <linux/nfsd/nfsfh.h>
+
+enum nfsd_fsid {
+	FSID_DEV = 0,
+	FSID_NUM,
+	FSID_MAJOR_MINOR,
+	FSID_ENCODE_DEV,
+	FSID_UUID4_INUM,
+	FSID_UUID8,
+	FSID_UUID16,
+	FSID_UUID16_INUM,
+};
+
+enum fsid_source {
+	FSIDSOURCE_DEV,
+	FSIDSOURCE_FSID,
+	FSIDSOURCE_UUID,
+};
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
+
+
+/* This might look a little large to "inline" but in all calls except
+ * one, 'vers' is constant so moste of the function disappears.
+ */
+static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
+			   u32 fsid, unsigned char *uuid)
+{
+	u32 *up;
+	switch(vers) {
+	case FSID_DEV:
+		fsidv[0] = htonl((MAJOR(dev)<<16) |
+				 MINOR(dev));
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+	case FSID_NUM:
+		fsidv[0] = fsid;
+		break;
+	case FSID_MAJOR_MINOR:
+		fsidv[0] = htonl(MAJOR(dev));
+		fsidv[1] = htonl(MINOR(dev));
+		fsidv[2] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_ENCODE_DEV:
+		fsidv[0] = new_encode_dev(dev);
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_UUID4_INUM:
+		/* 4 byte fsid and inode number */
+		up = (u32*)uuid;
+		fsidv[0] = ino_t_to_u32(ino);
+		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
+		break;
+
+	case FSID_UUID8:
+		/* 8 byte fsid  */
+		up = (u32*)uuid;
+		fsidv[0] = up[0] ^ up[2];
+		fsidv[1] = up[1] ^ up[3];
+		break;
+
+	case FSID_UUID16:
+		/* 16 byte fsid - NFSv3+ only */
+		memcpy(fsidv, uuid, 16);
+		break;
+
+	case FSID_UUID16_INUM:
+		/* 8 byte inode and 16 byte fsid */
+		*(u64*)fsidv = (u64)ino;
+		memcpy(fsidv+2, uuid, 16);
+		break;
+	default: BUG();
+	}
+}
+
+static inline int key_len(int type)
+{
+	switch(type) {
+	case FSID_DEV:		return 8;
+	case FSID_NUM: 		return 4;
+	case FSID_MAJOR_MINOR:	return 12;
+	case FSID_ENCODE_DEV:	return 8;
+	case FSID_UUID4_INUM:	return 8;
+	case FSID_UUID8:	return 8;
+	case FSID_UUID16:	return 16;
+	case FSID_UUID16_INUM:	return 24;
+	default: return 0;
+	}
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+extern char * SVCFH_fmt(struct svc_fh *fhp);
+
+/*
+ * Function prototypes
+ */
+__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
+__be32	fh_update(struct svc_fh *);
+void	fh_put(struct svc_fh *);
+
+static __inline__ struct svc_fh *
+fh_copy(struct svc_fh *dst, struct svc_fh *src)
+{
+	WARN_ON(src->fh_dentry || src->fh_locked);
+			
+	*dst = *src;
+	return dst;
+}
+
+static inline void
+fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+{
+	dst->fh_size = src->fh_size;
+	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
+}
+
+static __inline__ struct svc_fh *
+fh_init(struct svc_fh *fhp, int maxsize)
+{
+	memset(fhp, 0, sizeof(*fhp));
+	fhp->fh_maxsize = maxsize;
+	return fhp;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Fill in the pre_op attr for the wcc data
+ */
+static inline void
+fill_pre_wcc(struct svc_fh *fhp)
+{
+	struct inode    *inode;
+
+	inode = fhp->fh_dentry->d_inode;
+	if (!fhp->fh_pre_saved) {
+		fhp->fh_pre_mtime = inode->i_mtime;
+		fhp->fh_pre_ctime = inode->i_ctime;
+		fhp->fh_pre_size  = inode->i_size;
+		fhp->fh_pre_change = inode->i_version;
+		fhp->fh_pre_saved = 1;
+	}
+}
+
+extern void fill_post_wcc(struct svc_fh *);
+#else
+#define	fill_pre_wcc(ignored)
+#define fill_post_wcc(notused)
+#endif /* CONFIG_NFSD_V3 */
+
+
+/*
+ * Lock a file handle/inode
+ * NOTE: both fh_lock and fh_unlock are done "by hand" in
+ * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+ * so, any changes here should be reflected there.
+ */
+
+static inline void
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct inode	*inode;
+
+	BUG_ON(!dentry);
+
+	if (fhp->fh_locked) {
+		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name);
+		return;
+	}
+
+	inode = dentry->d_inode;
+	mutex_lock_nested(&inode->i_mutex, subclass);
+	fill_pre_wcc(fhp);
+	fhp->fh_locked = 1;
+}
+
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+	fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
+/*
+ * Unlock a file handle/inode
+ */
+static inline void
+fh_unlock(struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_dentry);
+
+	if (fhp->fh_locked) {
+		fill_post_wcc(fhp);
+		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+		fhp->fh_locked = 0;
+	}
+}
+
+#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2af75686e0d3..775b8d281d6a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -38,6 +38,7 @@
 #define _NFSD4_STATE_H
 
 #include <linux/nfsd/nfsfh.h>
+#include "nfsfh.h"
 
 typedef struct {
 	u32             cl_boot;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index f4fa6d351bbd..4b1de0a9ea75 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -5,6 +5,8 @@
 #ifndef LINUX_NFSD_VFS_H
 #define LINUX_NFSD_VFS_H
 
+#include "nfsfh.h"
+
 /*
  * Flags for nfsd_permission
  */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 235ee5c3be54..87fe6f64b8f7 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -9,6 +9,7 @@
 
 #include <linux/vfs.h>
 #include "nfsd.h"
+#include "nfsfh.h"
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 49523edbc510..65e333afaee4 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -162,205 +162,6 @@ typedef struct svc_fh {
 
 } svc_fh;
 
-enum nfsd_fsid {
-	FSID_DEV = 0,
-	FSID_NUM,
-	FSID_MAJOR_MINOR,
-	FSID_ENCODE_DEV,
-	FSID_UUID4_INUM,
-	FSID_UUID8,
-	FSID_UUID16,
-	FSID_UUID16_INUM,
-};
-
-enum fsid_source {
-	FSIDSOURCE_DEV,
-	FSIDSOURCE_FSID,
-	FSIDSOURCE_UUID,
-};
-extern enum fsid_source fsid_source(struct svc_fh *fhp);
-
-
-/* This might look a little large to "inline" but in all calls except
- * one, 'vers' is constant so moste of the function disappears.
- */
-static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
-			   u32 fsid, unsigned char *uuid)
-{
-	u32 *up;
-	switch(vers) {
-	case FSID_DEV:
-		fsidv[0] = htonl((MAJOR(dev)<<16) |
-				 MINOR(dev));
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-	case FSID_NUM:
-		fsidv[0] = fsid;
-		break;
-	case FSID_MAJOR_MINOR:
-		fsidv[0] = htonl(MAJOR(dev));
-		fsidv[1] = htonl(MINOR(dev));
-		fsidv[2] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_ENCODE_DEV:
-		fsidv[0] = new_encode_dev(dev);
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_UUID4_INUM:
-		/* 4 byte fsid and inode number */
-		up = (u32*)uuid;
-		fsidv[0] = ino_t_to_u32(ino);
-		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
-		break;
-
-	case FSID_UUID8:
-		/* 8 byte fsid  */
-		up = (u32*)uuid;
-		fsidv[0] = up[0] ^ up[2];
-		fsidv[1] = up[1] ^ up[3];
-		break;
-
-	case FSID_UUID16:
-		/* 16 byte fsid - NFSv3+ only */
-		memcpy(fsidv, uuid, 16);
-		break;
-
-	case FSID_UUID16_INUM:
-		/* 8 byte inode and 16 byte fsid */
-		*(u64*)fsidv = (u64)ino;
-		memcpy(fsidv+2, uuid, 16);
-		break;
-	default: BUG();
-	}
-}
-
-static inline int key_len(int type)
-{
-	switch(type) {
-	case FSID_DEV:		return 8;
-	case FSID_NUM: 		return 4;
-	case FSID_MAJOR_MINOR:	return 12;
-	case FSID_ENCODE_DEV:	return 8;
-	case FSID_UUID4_INUM:	return 8;
-	case FSID_UUID8:	return 8;
-	case FSID_UUID16:	return 16;
-	case FSID_UUID16_INUM:	return 24;
-	default: return 0;
-	}
-}
-
-/*
- * Shorthand for dprintk()'s
- */
-extern char * SVCFH_fmt(struct svc_fh *fhp);
-
-/*
- * Function prototypes
- */
-__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
-__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
-__be32	fh_update(struct svc_fh *);
-void	fh_put(struct svc_fh *);
-
-static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, struct svc_fh *src)
-{
-	WARN_ON(src->fh_dentry || src->fh_locked);
-			
-	*dst = *src;
-	return dst;
-}
-
-static inline void
-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
-{
-	dst->fh_size = src->fh_size;
-	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
-}
-
-static __inline__ struct svc_fh *
-fh_init(struct svc_fh *fhp, int maxsize)
-{
-	memset(fhp, 0, sizeof(*fhp));
-	fhp->fh_maxsize = maxsize;
-	return fhp;
-}
-
-#ifdef CONFIG_NFSD_V3
-/*
- * Fill in the pre_op attr for the wcc data
- */
-static inline void
-fill_pre_wcc(struct svc_fh *fhp)
-{
-	struct inode    *inode;
-
-	inode = fhp->fh_dentry->d_inode;
-	if (!fhp->fh_pre_saved) {
-		fhp->fh_pre_mtime = inode->i_mtime;
-		fhp->fh_pre_ctime = inode->i_ctime;
-		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = inode->i_version;
-		fhp->fh_pre_saved = 1;
-	}
-}
-
-extern void fill_post_wcc(struct svc_fh *);
-#else
-#define	fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
-
-
-/*
- * Lock a file handle/inode
- * NOTE: both fh_lock and fh_unlock are done "by hand" in
- * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
- * so, any changes here should be reflected there.
- */
-
-static inline void
-fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
-{
-	struct dentry	*dentry = fhp->fh_dentry;
-	struct inode	*inode;
-
-	BUG_ON(!dentry);
-
-	if (fhp->fh_locked) {
-		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
-		return;
-	}
-
-	inode = dentry->d_inode;
-	mutex_lock_nested(&inode->i_mutex, subclass);
-	fill_pre_wcc(fhp);
-	fhp->fh_locked = 1;
-}
-
-static inline void
-fh_lock(struct svc_fh *fhp)
-{
-	fh_lock_nested(fhp, I_MUTEX_NORMAL);
-}
-
-/*
- * Unlock a file handle/inode
- */
-static inline void
-fh_unlock(struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_dentry);
-
-	if (fhp->fh_locked) {
-		fill_post_wcc(fhp);
-		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
-		fhp->fh_locked = 0;
-	}
-}
 #endif /* __KERNEL__ */
 
 
-- 
cgit v1.2.3


From c1e7c3ae59b065bf7ff24a05cb609b2f9e314db6 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Mon, 14 Dec 2009 21:45:19 +0000
Subject: bzip2/lzma/gzip: pre-boot malloc doesn't return NULL on failure

The trivial malloc implementation used in the pre-boot environment by the
decompressors returns a bad pointer on failure (falling through after
calling error).  This is doubly wrong - the callers expect malloc to
return NULL on failure, second the error function is intended to be
used by the decompressors to propagate errors to *their* callers.  The
decompressors have no access to any state set by the error function.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
LKML-Reference: <4b26b1ef.hIInb2AYPMtImAJO%phillip@lougher.demon.co.uk>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/decompress/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 12ff8c3f1d05..5032b9a31ae7 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -25,7 +25,7 @@ static void *malloc(int size)
 	void *p;
 
 	if (size < 0)
-		error("Malloc error");
+		return NULL;
 	if (!malloc_ptr)
 		malloc_ptr = free_mem_ptr;
 
@@ -35,7 +35,7 @@ static void *malloc(int size)
 	malloc_ptr += size;
 
 	if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
-		error("Out of memory");
+		return NULL;
 
 	malloc_count++;
 	return p;
-- 
cgit v1.2.3


From 3a58176e4fa47d8232e04131b023f3f2ecd7084b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 11 Dec 2009 15:23:22 +0800
Subject: ACPICA: Remove messages if predefined repair(s) are successful

Repair mechanism was considered too wordy. Now, messages are only
unconditionally emitted if the return object cannot be repaired.
Existing messages for successful repairs were converted to
ACPI_DEBUG_PRINT messages for now. ACPICA BZ 827.

http://www.acpica.org/bugzilla/show_bug.cgi?id=827

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/nspredef.c  | 10 +++++-----
 drivers/acpi/acpica/nsrepair.c  | 32 ++++++++++++++++++--------------
 drivers/acpi/acpica/nsrepair2.c | 21 +++++++++++++--------
 include/acpi/acoutput.h         |  8 +++++---
 4 files changed, 41 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index c6297f57fea8..e352467413f5 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -462,11 +462,11 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 		if (count < expected_count) {
 			goto package_too_small;
 		} else if (count > expected_count) {
-			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
-					      data->node_flags,
-					      "Return Package is larger than needed - "
-					      "found %u, expected %u", count,
-					      expected_count));
+			ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+					  "%s: Return Package is larger than needed - "
+					  "found %u, expected %u\n",
+					  data->pathname, count,
+					  expected_count));
 		}
 
 		/* Validate all elements of the returned package */
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 062a016d4554..907f60c9c9e5 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -118,6 +118,8 @@ acpi_ns_repair_object(struct acpi_predefined_data *data,
 	union acpi_operand_object *new_object;
 	acpi_status status;
 
+	ACPI_FUNCTION_NAME(ns_repair_object);
+
 	/*
 	 * At this point, we know that the type of the returned object was not
 	 * one of the expected types for this predefined name. Attempt to
@@ -171,19 +173,18 @@ acpi_ns_repair_object(struct acpi_predefined_data *data,
 			return_object->common.reference_count--;
 		}
 
-		ACPI_INFO_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-				      "Converted %s to expected %s at index %u",
-				      acpi_ut_get_object_type_name
-				      (return_object),
-				      acpi_ut_get_object_type_name(new_object),
-				      package_index));
+		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+				  "%s: Converted %s to expected %s at index %u\n",
+				  data->pathname,
+				  acpi_ut_get_object_type_name(return_object),
+				  acpi_ut_get_object_type_name(new_object),
+				  package_index));
 	} else {
-		ACPI_INFO_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-				      "Converted %s to expected %s",
-				      acpi_ut_get_object_type_name
-				      (return_object),
-				      acpi_ut_get_object_type_name
-				      (new_object)));
+		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+				  "%s: Converted %s to expected %s\n",
+				  data->pathname,
+				  acpi_ut_get_object_type_name(return_object),
+				  acpi_ut_get_object_type_name(new_object)));
 	}
 
 	/* Delete old object, install the new return object */
@@ -528,6 +529,8 @@ acpi_ns_repair_package_list(struct acpi_predefined_data *data,
 {
 	union acpi_operand_object *pkg_obj_desc;
 
+	ACPI_FUNCTION_NAME(ns_repair_package_list);
+
 	/*
 	 * Create the new outer package and populate it. The new package will
 	 * have a single element, the lone subpackage.
@@ -544,8 +547,9 @@ acpi_ns_repair_package_list(struct acpi_predefined_data *data,
 	*obj_desc_ptr = pkg_obj_desc;
 	data->flags |= ACPI_OBJECT_REPAIRED;
 
-	ACPI_INFO_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-			      "Repaired Incorrectly formed Package"));
+	ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+			  "%s: Repaired incorrectly formed Package\n",
+			  data->pathname));
 
 	return (AE_OK);
 }
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 846df7047a81..199333880c8d 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -255,6 +255,8 @@ acpi_ns_repair_FDE(struct acpi_predefined_data *data,
 	u32 *dword_buffer;
 	u32 i;
 
+	ACPI_FUNCTION_NAME(ns_repair_FDE);
+
 	switch (return_object->common.type) {
 	case ACPI_TYPE_BUFFER:
 
@@ -296,8 +298,9 @@ acpi_ns_repair_FDE(struct acpi_predefined_data *data,
 			byte_buffer++;
 		}
 
-		ACPI_INFO_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-				      "Expanded Byte Buffer to expected DWord Buffer"));
+		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+				  "%s Expanded Byte Buffer to expected DWord Buffer\n",
+				  data->pathname));
 		break;
 
 	default:
@@ -445,6 +448,8 @@ acpi_ns_check_sorted_list(struct acpi_predefined_data *data,
 	u32 previous_value;
 	acpi_status status;
 
+	ACPI_FUNCTION_NAME(ns_check_sorted_list);
+
 	/* The top-level object must be a package */
 
 	if (return_object->common.type != ACPI_TYPE_PACKAGE) {
@@ -460,8 +465,9 @@ acpi_ns_check_sorted_list(struct acpi_predefined_data *data,
 	 */
 	status = acpi_ns_remove_null_elements(return_object);
 	if (status == AE_NULL_ENTRY) {
-		ACPI_INFO_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-				      "NULL elements removed from package"));
+		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+				  "%s: NULL elements removed from package\n",
+				  data->pathname));
 
 		/* Exit if package is now zero length */
 
@@ -522,10 +528,9 @@ acpi_ns_check_sorted_list(struct acpi_predefined_data *data,
 
 			data->flags |= ACPI_OBJECT_REPAIRED;
 
-			ACPI_INFO_PREDEFINED((AE_INFO, data->pathname,
-					      data->node_flags,
-					      "Repaired unsorted list - now sorted by %s",
-					      sort_key_name));
+			ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+					  "%s: Repaired unsorted list - now sorted by %s\n",
+					  data->pathname, sort_key_name));
 			return (AE_OK);
 		}
 
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 5c823d5ab783..d814da4b5365 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -85,7 +85,8 @@
 #define ACPI_LV_INIT                0x00000001
 #define ACPI_LV_DEBUG_OBJECT        0x00000002
 #define ACPI_LV_INFO                0x00000004
-#define ACPI_LV_ALL_EXCEPTIONS      0x00000007
+#define ACPI_LV_REPAIR              0x00000008
+#define ACPI_LV_ALL_EXCEPTIONS      0x0000000F
 
 /* Trace verbosity level 1 [Standard Trace Level] */
 
@@ -143,6 +144,7 @@
 #define ACPI_DB_INIT                ACPI_DEBUG_LEVEL (ACPI_LV_INIT)
 #define ACPI_DB_DEBUG_OBJECT        ACPI_DEBUG_LEVEL (ACPI_LV_DEBUG_OBJECT)
 #define ACPI_DB_INFO                ACPI_DEBUG_LEVEL (ACPI_LV_INFO)
+#define ACPI_DB_REPAIR              ACPI_DEBUG_LEVEL (ACPI_LV_REPAIR)
 #define ACPI_DB_ALL_EXCEPTIONS      ACPI_DEBUG_LEVEL (ACPI_LV_ALL_EXCEPTIONS)
 
 /* Trace level -- also used in the global "DebugLevel" */
@@ -174,8 +176,8 @@
 
 /* Defaults for debug_level, debug and normal */
 
-#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INFO)
-#define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT)
+#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INFO | ACPI_LV_REPAIR)
+#define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_REPAIR)
 #define ACPI_DEBUG_ALL              (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
 
 #if defined (ACPI_DEBUG_OUTPUT) || !defined (ACPI_NO_ERROR_MESSAGES)
-- 
cgit v1.2.3


From 88e5071525ad6814be3a8a2792ce9e81a0cca22a Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 15 Dec 2009 12:45:15 +0800
Subject: ACPICA: Update version to 20091214

Version 20091214.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 5e1ad3cd1bbd..86e9735a96bd 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20091112
+#define ACPI_CA_VERSION                 0x20091214
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 9065ce4500085b9ca66b19d3c4d21a73cb410173 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 17 Nov 2009 17:05:19 -0700
Subject: PNP: add interface to retrieve ACPI device from a PNPACPI device

Add pnp_acpi_device(pnp_dev), which takes a PNP device and returns the
associated ACPI device (or NULL, if the device is not a PNPACPI device).

This allows us to write a PNP driver that can manage both traditional
PNPBIOS and ACPI devices, treating ACPI-only functionality as an optional
extension.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/pnp/pnpacpi/core.c |  3 ++-
 include/linux/pnp.h        | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index b2348fc2378e..5314bf630bc4 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -144,7 +144,7 @@ static int pnpacpi_resume(struct pnp_dev *dev)
 }
 #endif
 
-static struct pnp_protocol pnpacpi_protocol = {
+struct pnp_protocol pnpacpi_protocol = {
 	.name	 = "Plug and Play ACPI",
 	.get	 = pnpacpi_get_resources,
 	.set	 = pnpacpi_set_resources,
@@ -154,6 +154,7 @@ static struct pnp_protocol pnpacpi_protocol = {
 	.resume = pnpacpi_resume,
 #endif
 };
+EXPORT_SYMBOL(pnpacpi_protocol);
 
 static int __init pnpacpi_add_device(struct acpi_device *device)
 {
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index fddfafaed024..7c4193eb0072 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -334,6 +334,19 @@ extern struct pnp_protocol pnpbios_protocol;
 #define pnp_device_is_pnpbios(dev) 0
 #endif
 
+#ifdef CONFIG_PNPACPI
+extern struct pnp_protocol pnpacpi_protocol;
+
+static inline struct acpi_device *pnp_acpi_device(struct pnp_dev *dev)
+{
+	if (dev->protocol == &pnpacpi_protocol)
+		return dev->data;
+	return NULL;
+}
+#else
+#define pnp_acpi_device(dev) 0
+#endif
+
 /* status */
 #define PNP_READY		0x0000
 #define PNP_ATTACHED		0x0001
-- 
cgit v1.2.3


From bb5b7c11263dbbe78253cd05945a6bf8f55add8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Dec 2009 20:56:42 -0800
Subject: tcp: Revert per-route SACK/DSACK/TIMESTAMP changes.

It creates a regression, triggering badness for SYN_RECV
sockets, for example:

[19148.022102] Badness at net/ipv4/inet_connection_sock.c:293
[19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000
[19148.023035] REGS: eeecbd30 TRAP: 0700   Not tainted  (2.6.32)
[19148.023496] MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 24002442  XER: 00000000
[19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000

This is likely caused by the change in the 'estab' parameter
passed to tcp_parse_options() when invoked by the functions
in net/ipv4/tcp_minisocks.c

But even if that is fixed, the ->conn_request() changes made in
this patch series is fundamentally wrong.  They try to use the
listening socket's 'dst' to probe the route settings.  The
listening socket doesn't even have a route, and you can't
get the right route (the child request one) until much later
after we setup all of the state, and it must be done by hand.

This stuff really isn't ready, so the best thing to do is a
full revert.  This reverts the following commits:

f55017a93f1a74d50244b1254b9a2bd7ac9bbf7d
022c3f7d82f0f1c68018696f2f027b87b9bb45c2
1aba721eba1d84a2defce45b950272cee1e6c72a
cda42ebd67ee5fdf09d7057b5a4584d36fe8a335
345cda2fd695534be5a4494f1b59da9daed33663
dc343475ed062e13fc260acccaab91d7d80fd5b2
05eaade2782fb0c90d3034fd7a7d5a16266182bb
6a2a2d6bf8581216e08be15fcb563cfd6c430e1e

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  6 ++----
 include/net/dst.h         |  2 +-
 include/net/tcp.h         |  3 +--
 net/ipv4/syncookies.c     | 27 +++++++++++++--------------
 net/ipv4/tcp_input.c      | 24 ++++++++----------------
 net/ipv4/tcp_ipv4.c       | 21 +++++++++------------
 net/ipv4/tcp_minisocks.c  | 10 +++++-----
 net/ipv4/tcp_output.c     | 18 +++++-------------
 net/ipv6/syncookies.c     | 28 +++++++++++++---------------
 net/ipv6/tcp_ipv6.c       |  3 +--
 10 files changed, 58 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 14fc906ed602..05330fc5b436 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -368,11 +368,9 @@ enum {
 #define RTAX_MAX (__RTAX_MAX - 1)
 
 #define RTAX_FEATURE_ECN	0x00000001
-#define RTAX_FEATURE_NO_SACK	0x00000002
-#define RTAX_FEATURE_NO_TSTAMP	0x00000004
+#define RTAX_FEATURE_SACK	0x00000002
+#define RTAX_FEATURE_TIMESTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
-#define RTAX_FEATURE_NO_WSCALE	0x00000010
-#define RTAX_FEATURE_NO_DSACK	0x00000020
 
 struct rta_session {
 	__u8	proto;
diff --git a/include/net/dst.h b/include/net/dst.h
index 387cb3cfde7e..39c4a5963e12 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric)
 static inline u32
 dst_feature(const struct dst_entry *dst, u32 feature)
 {
-	return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0);
+	return dst_metric(dst, RTAX_FEATURES) & feature;
 }
 
 static inline u32 dst_mtu(const struct dst_entry *dst)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1b6f7d348cee..34f5cc24d903 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -408,8 +408,7 @@ extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
 						  u8 **hvpp,
-						  int estab,
-						  struct dst_entry *dst);
+						  int estab);
 
 extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26399ad2a289..66fd80ef2473 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
@@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
 	/* Try to redo what tcp_v4_send_synack did. */
 	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12cab7d74dba..28e029632493 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3727,7 +3727,7 @@ old_ack:
  * the fast version below fails.
  */
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab,  struct dst_entry *dst)
+		       u8 **hvpp, int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_WINDOW:
 				if (opsize == TCPOLEN_WINDOW && th->syn &&
-				    !estab && sysctl_tcp_window_scaling &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
+				    !estab && sysctl_tcp_window_scaling) {
 					__u8 snd_wscale = *(__u8 *)ptr;
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
@@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 			case TCPOPT_TIMESTAMP:
 				if ((opsize == TCPOLEN_TIMESTAMP) &&
 				    ((estab && opt_rx->tstamp_ok) ||
-				     (!estab && sysctl_tcp_timestamps &&
-				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
+				     (!estab && sysctl_tcp_timestamps))) {
 					opt_rx->saw_tstamp = 1;
 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-				    !estab && sysctl_tcp_sack &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
+				    !estab && sysctl_tcp_sack) {
 					opt_rx->sack_ok = 1;
 					tcp_sack_reset(opt_rx);
 				}
@@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
 	return 1;
 }
 
@@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
 
 	if (th->ack) {
 		/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 15e96030ce47..65b8ebfd078a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
-	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
-	ireq->opt = tcp_v4_save_options(sk, skb);
-
-	dst = inet_csk_route_req(sk, req);
-	if(!dst)
-		goto drop_and_free;
-
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
@@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	ireq = inet_rsk(req);
+	ireq->loc_addr = daddr;
+	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
+	ireq->opt = tcp_v4_save_options(sk, skb);
+
 	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_release;
+		goto drop_and_free;
 
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 87accec8d097..f206ee5dda80 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
@@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 
-	if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 93316a96d820..383ce237640f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
-	struct dst_entry *dst = __sk_dst_get(sk);
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
 	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
 			 tcp_cookie_size_check(cvp->cookie_desired) :
@@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	opts->mss = tcp_advertise_mss(sk);
 	remaining -= TCPOLEN_MSS_ALIGNED;
 
-	if (likely(sysctl_tcp_timestamps &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
-		   *md5 == NULL)) {
+	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
-	if (likely(sysctl_tcp_window_scaling &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
+	if (likely(sysctl_tcp_window_scaling)) {
 		opts->ws = tp->rx_opt.rcv_wscale;
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(sysctl_tcp_sack &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
+	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
 		if (unlikely(!(OPTION_TS & opts->options)))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk)
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
 	 */
 	tp->tcp_header_len = sizeof(struct tcphdr) +
-		(sysctl_tcp_timestamps &&
-		(!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
-		  TCPOLEN_TSTAMP_ALIGNED : 0));
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk)
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
-				  (sysctl_tcp_window_scaling &&
-				   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
+				  sysctl_tcp_window_scaling,
 				  &rcv_wscale);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5b9af508b8f2..7208a06576c6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -185,6 +185,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
@@ -218,6 +225,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->expires = 0UL;
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -253,21 +266,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ee9cf62458d4..febfd595a40d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
@@ -1208,7 +1207,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
-- 
cgit v1.2.3


From 588f9ce6ca61ecb4663ee6ef2f75d2d96c73151e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Be more aggressive at freeing non LRU caches

shake_page handles more types of page caches than lru_drain_all()

- per cpu page allocator pages
- per CPU LRU

Stops early when the page became free.

Used in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  |  1 +
 mm/memory-failure.c | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d65ae4ba0e0..68c84bb2ad3f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1335,6 +1335,7 @@ extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int ref);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
+extern void shake_page(struct page *p);
 extern atomic_long_t mce_bad_pages;
 
 #endif /* __KERNEL__ */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 50d4f8d7024a..38fcbb22eab9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -82,6 +82,28 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
 	return ret;
 }
 
+/*
+ * When a unknown page type is encountered drain as many buffers as possible
+ * in the hope to turn the page into a LRU or free page, which we can handle.
+ */
+void shake_page(struct page *p)
+{
+	if (!PageSlab(p)) {
+		lru_add_drain_all();
+		if (PageLRU(p))
+			return;
+		drain_all_pages();
+		if (PageLRU(p) || is_free_buddy_page(p))
+			return;
+	}
+	/*
+	 * Could call shrink_slab here (which would also
+	 * shrink other caches). Unfortunately that might
+	 * also access the corrupted page, which could be fatal.
+	 */
+}
+EXPORT_SYMBOL_GPL(shake_page);
+
 /*
  * Kill all processes that have a poisoned page mapped and then isolate
  * the page.
-- 
cgit v1.2.3


From 82ba011b9041dd31c15e4f63797b08aa0a288e61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Turn ref argument into flags argument

Now that "ref" is just a boolean turn it into
a flags argument. First step is only a single flag
that makes the code's intention more clear, but more
may follow.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  | 5 ++++-
 mm/madvise.c        | 2 +-
 mm/memory-failure.c | 5 +++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68c84bb2ad3f..135e19198cd3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1331,8 +1331,11 @@ extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 				 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 
+enum mf_flags {
+	MF_COUNT_INCREASED = 1 << 0,
+};
 extern void memory_failure(unsigned long pfn, int trapno);
-extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/mm/madvise.c b/mm/madvise.c
index 18970aec0d2f..6ca34f0cd4aa 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -237,7 +237,7 @@ static int madvise_hwpoison(unsigned long start, unsigned long end)
 		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
 		       page_to_pfn(p), start);
 		/* Ignore return value for now */
-		__memory_failure(page_to_pfn(p), 0, 1);
+		__memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
 	}
 	return ret;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4253e14fa709..3338c443272c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -737,7 +737,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		      ret != SWAP_SUCCESS, pfn);
 }
 
-int __memory_failure(unsigned long pfn, int trapno, int ref)
+int __memory_failure(unsigned long pfn, int trapno, int flags)
 {
 	unsigned long lru_flag;
 	struct page_state *ps;
@@ -773,7 +773,8 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
 	 * In fact it's dangerous to directly bump up page count from 0,
 	 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
 	 */
-	if (!ref && !get_page_unless_zero(compound_head(p))) {
+	if (!(flags & MF_COUNT_INCREASED) &&
+		!get_page_unless_zero(compound_head(p))) {
 		action_result(pfn, "free or high order kernel", IGNORED);
 		return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
 	}
-- 
cgit v1.2.3


From 847ce401df392b0704369fd3f75df614ac1414b4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:58 +0100
Subject: HWPOISON: Add unpoisoning support

The unpoisoning interface is useful for stress testing tools to
reclaim poisoned pages (to prevent OOM)

There is no hardware level unpoisioning, so this
cannot be used for real memory errors, only for software injected errors.

Note that it may leak pages silently - those who have been removed from
LRU cache, but not isolated from page cache/swap cache at hwpoison time.
Especially the stress test of dirty swap cache pages shall reboot system
before exhausting memory.

AK: Fix comments, add documentation, add printks, rename symbol

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/vm/hwpoison.txt | 16 ++++++++--
 include/linux/mm.h            |  1 +
 include/linux/page-flags.h    |  2 +-
 mm/hwpoison-inject.c          | 36 +++++++++++++++++++----
 mm/memory-failure.c           | 68 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 3ffadf8da61f..f047e75acb23 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -98,10 +98,22 @@ madvise(MADV_POISON, ....)
 
 
 hwpoison-inject module through debugfs
-	/sys/debug/hwpoison/corrupt-pfn
 
-Inject hwpoison fault at PFN echoed into this file
+/sys/debug/hwpoison/
 
+corrupt-pfn
+
+Inject hwpoison fault at PFN echoed into this file.
+
+unpoison-pfn
+
+Software-unpoison page at PFN echoed into this file. This
+way a page can be reused again.
+This only works for Linux injected failures, not for real
+memory failures.
+
+Note these injection interfaces are not stable and might change between
+kernel versions
 
 Architecture specific MCE injector
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 135e19198cd3..8cdb941fc7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1336,6 +1336,7 @@ enum mf_flags {
 };
 extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int flags);
+extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 49e907bd067f..f9df6308af95 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -275,7 +275,7 @@ PAGEFLAG_FALSE(Uncached)
 
 #ifdef CONFIG_MEMORY_FAILURE
 PAGEFLAG(HWPoison, hwpoison)
-TESTSETFLAG(HWPoison, hwpoison)
+TESTSCFLAG(HWPoison, hwpoison)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
 #else
 PAGEFLAG_FALSE(HWPoison)
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index e1d85137f086..6e35e563bf50 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 
-static struct dentry *hwpoison_dir, *corrupt_pfn;
+static struct dentry *hwpoison_dir;
 
 static int hwpoison_inject(void *data, u64 val)
 {
@@ -14,7 +14,16 @@ static int hwpoison_inject(void *data, u64 val)
 	return __memory_failure(val, 18, 0);
 }
 
+static int hwpoison_unpoison(void *data, u64 val)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return unpoison_memory(val);
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
 
 static void pfn_inject_exit(void)
 {
@@ -24,16 +33,31 @@ static void pfn_inject_exit(void)
 
 static int pfn_inject_init(void)
 {
+	struct dentry *dentry;
+
 	hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
 	if (hwpoison_dir == NULL)
 		return -ENOMEM;
-	corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+
+	/*
+	 * Note that the below poison/unpoison interfaces do not involve
+	 * hardware status change, hence do not require hardware support.
+	 * They are mainly for testing hwpoison in software level.
+	 */
+	dentry = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
 					  NULL, &hwpoison_fops);
-	if (corrupt_pfn == NULL) {
-		pfn_inject_exit();
-		return -ENOMEM;
-	}
+	if (!dentry)
+		goto fail;
+
+	dentry = debugfs_create_file("unpoison-pfn", 0600, hwpoison_dir,
+				     NULL, &unpoison_fops);
+	if (!dentry)
+		goto fail;
+
 	return 0;
+fail:
+	pfn_inject_exit();
+	return -ENOMEM;
 }
 
 module_init(pfn_inject_init);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5055b940df5f..ed6e91c87a54 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -838,6 +838,16 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * and in many cases impossible, so we just avoid it here.
 	 */
 	lock_page_nosync(p);
+
+	/*
+	 * unpoison always clear PG_hwpoison inside page lock
+	 */
+	if (!PageHWPoison(p)) {
+		action_result(pfn, "unpoisoned", IGNORED);
+		res = 0;
+		goto out;
+	}
+
 	wait_on_page_writeback(p);
 
 	/*
@@ -893,3 +903,61 @@ void memory_failure(unsigned long pfn, int trapno)
 {
 	__memory_failure(pfn, trapno, 0);
 }
+
+/**
+ * unpoison_memory - Unpoison a previously poisoned page
+ * @pfn: Page number of the to be unpoisoned page
+ *
+ * Software-unpoison a page that has been poisoned by
+ * memory_failure() earlier.
+ *
+ * This is only done on the software-level, so it only works
+ * for linux injected failures, not real hardware failures
+ *
+ * Returns 0 for success, otherwise -errno.
+ */
+int unpoison_memory(unsigned long pfn)
+{
+	struct page *page;
+	struct page *p;
+	int freeit = 0;
+
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+
+	p = pfn_to_page(pfn);
+	page = compound_head(p);
+
+	if (!PageHWPoison(p)) {
+		pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
+		return 0;
+	}
+
+	if (!get_page_unless_zero(page)) {
+		if (TestClearPageHWPoison(p))
+			atomic_long_dec(&mce_bad_pages);
+		pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
+		return 0;
+	}
+
+	lock_page_nosync(page);
+	/*
+	 * This test is racy because PG_hwpoison is set outside of page lock.
+	 * That's acceptable because that won't trigger kernel panic. Instead,
+	 * the PG_hwpoison page will be caught and isolated on the entrance to
+	 * the free buddy page pool.
+	 */
+	if (TestClearPageHWPoison(p)) {
+		pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
+		atomic_long_dec(&mce_bad_pages);
+		freeit = 1;
+	}
+	unlock_page(page);
+
+	put_page(page);
+	if (freeit)
+		put_page(page);
+
+	return 0;
+}
+EXPORT_SYMBOL(unpoison_memory);
-- 
cgit v1.2.3


From 1a9b5b7fe0c5dad8a635288882d36785dea742f9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: mm: export stable page flags

Rename get_uflags() to stable_page_flags() and make it a global function
for use in the hwpoison page flags filter, which need to compare user
page flags with the value provided by user space.

Also move KPF_* to kernel-page-flags.h for use by user space tools.

Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
CC: Nick Piggin <npiggin@suse.de>
CC: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/proc/page.c                    | 45 +++-----------------------------------
 include/linux/kernel-page-flags.h | 46 +++++++++++++++++++++++++++++++++++++++
 include/linux/page-flags.h        |  2 ++
 3 files changed, 51 insertions(+), 42 deletions(-)
 create mode 100644 include/linux/kernel-page-flags.h

(limited to 'include')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
+#include <linux/kernel-page-flags.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
  * physical page flags.
  */
 
-/* These macros are used to decouple internal flags from exported ones */
-
-#define KPF_LOCKED		0
-#define KPF_ERROR		1
-#define KPF_REFERENCED		2
-#define KPF_UPTODATE		3
-#define KPF_DIRTY		4
-#define KPF_LRU			5
-#define KPF_ACTIVE		6
-#define KPF_SLAB		7
-#define KPF_WRITEBACK		8
-#define KPF_RECLAIM		9
-#define KPF_BUDDY		10
-
-/* 11-20: new additions in 2.6.31 */
-#define KPF_MMAP		11
-#define KPF_ANON		12
-#define KPF_SWAPCACHE		13
-#define KPF_SWAPBACKED		14
-#define KPF_COMPOUND_HEAD	15
-#define KPF_COMPOUND_TAIL	16
-#define KPF_HUGE		17
-#define KPF_UNEVICTABLE		18
-#define KPF_HWPOISON		19
-#define KPF_NOPAGE		20
-
-#define KPF_KSM			21
-
-/* kernel hacking assistances
- * WARNING: subject to change, never rely on them!
- */
-#define KPF_RESERVED		32
-#define KPF_MLOCKED		33
-#define KPF_MAPPEDTODISK	34
-#define KPF_PRIVATE		35
-#define KPF_PRIVATE_2		36
-#define KPF_OWNER_PRIVATE	37
-#define KPF_ARCH		38
-#define KPF_UNCACHED		39
-
 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
 {
 	return ((kflags >> kbit) & 1) << ubit;
 }
 
-static u64 get_uflags(struct page *page)
+u64 stable_page_flags(struct page *page)
 {
 	u64 k;
 	u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
 		else
 			ppage = NULL;
 
-		if (put_user(get_uflags(ppage), out)) {
+		if (put_user(stable_page_flags(ppage), out)) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
new file mode 100644
index 000000000000..bd92a89f4b0a
--- /dev/null
+++ b/include/linux/kernel-page-flags.h
@@ -0,0 +1,46 @@
+#ifndef LINUX_KERNEL_PAGE_FLAGS_H
+#define LINUX_KERNEL_PAGE_FLAGS_H
+
+/*
+ * Stable page flag bits exported to user space
+ */
+
+#define KPF_LOCKED		0
+#define KPF_ERROR		1
+#define KPF_REFERENCED		2
+#define KPF_UPTODATE		3
+#define KPF_DIRTY		4
+#define KPF_LRU			5
+#define KPF_ACTIVE		6
+#define KPF_SLAB		7
+#define KPF_WRITEBACK		8
+#define KPF_RECLAIM		9
+#define KPF_BUDDY		10
+
+/* 11-20: new additions in 2.6.31 */
+#define KPF_MMAP		11
+#define KPF_ANON		12
+#define KPF_SWAPCACHE		13
+#define KPF_SWAPBACKED		14
+#define KPF_COMPOUND_HEAD	15
+#define KPF_COMPOUND_TAIL	16
+#define KPF_HUGE		17
+#define KPF_UNEVICTABLE		18
+#define KPF_HWPOISON		19
+#define KPF_NOPAGE		20
+
+#define KPF_KSM			21
+
+/* kernel hacking assistances
+ * WARNING: subject to change, never rely on them!
+ */
+#define KPF_RESERVED		32
+#define KPF_MLOCKED		33
+#define KPF_MAPPEDTODISK	34
+#define KPF_PRIVATE		35
+#define KPF_PRIVATE_2		36
+#define KPF_OWNER_PRIVATE	37
+#define KPF_ARCH		38
+#define KPF_UNCACHED		39
+
+#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f9df6308af95..feee2ba8d06a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -282,6 +282,8 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
+u64 stable_page_flags(struct page *page);
+
 static inline int PageUptodate(struct page *page)
 {
 	int ret = test_bit(PG_uptodate, &(page)->flags);
-- 
cgit v1.2.3


From e42d9d5d47961fb5db0be65b56dd52fe7b2421f1 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: rename and export try_get_mem_cgroup_from_page()

So that the hwpoison injector can get mem_cgroup for arbitrary page
and thus know whether it is owned by some mem_cgroup task(s).

[AK: Merged with latest git tree]

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 11 ++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bf9213b2db8f..fc9bae82ac42 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,6 +68,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
+extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 static inline
@@ -189,6 +190,11 @@ mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 {
 }
 
+static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e0c2066495e3..b5ac61ce7346 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1379,25 +1379,22 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 	return container_of(css, struct mem_cgroup, css);
 }
 
-static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
+struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
-	struct mem_cgroup *mem;
+	struct mem_cgroup *mem = NULL;
 	struct page_cgroup *pc;
 	unsigned short id;
 	swp_entry_t ent;
 
 	VM_BUG_ON(!PageLocked(page));
 
-	if (!PageSwapCache(page))
-		return NULL;
-
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		mem = pc->mem_cgroup;
 		if (mem && !css_tryget(&mem->css))
 			mem = NULL;
-	} else {
+	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup(ent);
 		rcu_read_lock();
@@ -1743,7 +1740,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	 */
 	if (!PageSwapCache(page))
 		goto charge_cur_mm;
-	mem = try_get_mem_cgroup_from_swapcache(page);
+	mem = try_get_mem_cgroup_from_page(page);
 	if (!mem)
 		goto charge_cur_mm;
 	*ptr = mem;
-- 
cgit v1.2.3


From d324236b3333e87c8825b35f2104184734020d35 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: add accessor to mem_cgroup.css

So that an outside user can free the reference count grabbed by
try_get_mem_cgroup_from_page().

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h | 7 +++++++
 mm/memcontrol.c            | 5 +++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fc9bae82ac42..2c30a1116d84 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -81,6 +81,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 	return cgroup == mem;
 }
 
+extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
+
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
@@ -206,6 +208,11 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
+static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return NULL;
+}
+
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b5ac61ce7346..9eee80d6d490 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -282,6 +282,11 @@ mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
 }
 
+struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return &mem->css;
+}
+
 static struct mem_cgroup_per_zone *
 page_cgroup_zoneinfo(struct page_cgroup *pc)
 {
-- 
cgit v1.2.3


From facb6011f3993947283fa15d039dacb4ad140230 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:20:00 +0100
Subject: HWPOISON: Add soft page offline support

This is a simpler, gentler variant of memory_failure() for soft page
offlining controlled from user space.  It doesn't kill anything, just
tries to invalidate and if that doesn't work migrate the
page away.

This is useful for predictive failure analysis, where a page has
a high rate of corrected errors, but hasn't gone bad yet. Instead
it can be offlined early and avoided.

The offlining is controlled from sysfs, including a new generic
entry point for hard page offlining for symmetry too.

We use the page isolate facility to prevent re-allocation
race. Normally this is only used by memory hotplug. To avoid
races with memory allocation I am using lock_system_sleep().
This avoids the situation where memory hotplug is about
to isolate a page range and then hwpoison undoes that work.
This is a big hammer currently, but the simplest solution
currently.

When the page is not free or LRU we try to free pages
from slab and other caches. The slab freeing is currently
quite dumb and does not try to focus on the specific slab
cache which might own the page. This could be potentially
improved later.

Thanks to Fengguang Wu and Haicheng Li for some fixes.

[Added fix from Andrew Morton to adapt to new migrate_pages prototype]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 .../ABI/testing/sysfs-memory-page-offline          |  44 +++++
 drivers/base/memory.c                              |  61 +++++++
 include/linux/mm.h                                 |   3 +-
 mm/hwpoison-inject.c                               |   2 +-
 mm/memory-failure.c                                | 194 ++++++++++++++++++++-
 5 files changed, 297 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-memory-page-offline

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline
new file mode 100644
index 000000000000..e14703f12fdf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-memory-page-offline
@@ -0,0 +1,44 @@
+What:		/sys/devices/system/memory/soft_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Soft-offline the memory page containing the physical address
+		written into this file. Input is a hex number specifying the
+		physical address of the page. The kernel will then attempt
+		to soft-offline it, by moving the contents elsewhere or
+		dropping it if possible. The kernel will then be placed
+		on the bad page list and never be reused.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		The page must be still accessible, not poisoned. The
+		kernel will never kill anything for this, but rather
+		fail the offline.  Return value is the size of the
+		number, or a error when the offlining failed.  Reading
+		the file is not allowed.
+
+What:		/sys/devices/system/memory/hard_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Hard-offline the memory page containing the physical
+		address written into this file. Input is a hex number
+		specifying the physical address of the page. The
+		kernel will then attempt to hard-offline the page, by
+		trying to drop the page or killing any owner or
+		triggering IO errors if needed.  Note this may kill
+		any processes owning the page. The kernel will avoid
+		to access this page assuming it's poisoned by the
+		hardware.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		Return value is the size of the number, or a error when
+		the offlining failed.
+		Reading the file is not allowed.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 989429cfed88..c4c8f2e1dd15 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -341,6 +341,64 @@ static inline int memory_probe_init(void)
 }
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for offlining pages of memory
+ */
+
+/* Soft offline a page */
+static ssize_t
+store_soft_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+	ret = soft_offline_page(pfn_to_page(pfn), 0);
+	return ret == 0 ? count : ret;
+}
+
+/* Forcibly offline a page, including killing processes. */
+static ssize_t
+store_hard_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	ret = __memory_failure(pfn, 0, 0);
+	return ret ? ret : count;
+}
+
+static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+
+static __init int memory_fail_init(void)
+{
+	int err;
+
+	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_soft_offline_page.attr);
+	if (!err)
+		err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_hard_offline_page.attr);
+	return err;
+}
+#else
+static inline int memory_fail_init(void)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note that phys_device is optional.  It is here to allow for
  * differentiation between which *physical* devices each
@@ -471,6 +529,9 @@ int __init memory_dev_init(void)
 	}
 
 	err = memory_probe_init();
+	if (!ret)
+		ret = err;
+	err = memory_fail_init();
 	if (!ret)
 		ret = err;
 	err = block_size_init();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8cdb941fc7b5..849b4a61bd8f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1339,8 +1339,9 @@ extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
-extern void shake_page(struct page *p);
+extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
+extern int soft_offline_page(struct page *page, int flags);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c597f46ac18a..a77fe3f9e211 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -29,7 +29,7 @@ static int hwpoison_inject(void *data, u64 val)
 		return 0;
 
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	/*
 	 * This implies unable to support non-LRU pages.
 	 */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b5c3b6bd511f..bcce28755832 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,9 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/backing-dev.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
+#include <linux/suspend.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -201,7 +204,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
  * When a unknown page type is encountered drain as many buffers as possible
  * in the hope to turn the page into a LRU or free page, which we can handle.
  */
-void shake_page(struct page *p)
+void shake_page(struct page *p, int access)
 {
 	if (!PageSlab(p)) {
 		lru_add_drain_all();
@@ -211,11 +214,19 @@ void shake_page(struct page *p)
 		if (PageLRU(p) || is_free_buddy_page(p))
 			return;
 	}
+
 	/*
-	 * Could call shrink_slab here (which would also
-	 * shrink other caches). Unfortunately that might
-	 * also access the corrupted page, which could be fatal.
+	 * Only all shrink_slab here (which would also
+	 * shrink other caches) if access is not potentially fatal.
 	 */
+	if (access) {
+		int nr;
+		do {
+			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			if (page_count(p) == 0)
+				break;
+		} while (nr > 10);
+	}
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
@@ -949,7 +960,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	if (!PageLRU(p)) {
 		/*
 		 * shake_page could have turned it free.
@@ -1099,3 +1110,176 @@ int unpoison_memory(unsigned long pfn)
 	return 0;
 }
 EXPORT_SYMBOL(unpoison_memory);
+
+static struct page *new_page(struct page *p, unsigned long private, int **x)
+{
+	return alloc_pages(GFP_HIGHUSER_MOVABLE, 0);
+}
+
+/*
+ * Safely get reference count of an arbitrary page.
+ * Returns 0 for a free page, -EIO for a zero refcount page
+ * that is not free, and 1 for any other page type.
+ * For 1 the page is returned with increased page count, otherwise not.
+ */
+static int get_any_page(struct page *p, unsigned long pfn, int flags)
+{
+	int ret;
+
+	if (flags & MF_COUNT_INCREASED)
+		return 1;
+
+	/*
+	 * The lock_system_sleep prevents a race with memory hotplug,
+	 * because the isolation assumes there's only a single user.
+	 * This is a big hammer, a better would be nicer.
+	 */
+	lock_system_sleep();
+
+	/*
+	 * Isolate the page, so that it doesn't get reallocated if it
+	 * was free.
+	 */
+	set_migratetype_isolate(p);
+	if (!get_page_unless_zero(compound_head(p))) {
+		if (is_free_buddy_page(p)) {
+			pr_debug("get_any_page: %#lx free buddy page\n", pfn);
+			/* Set hwpoison bit while page is still isolated */
+			SetPageHWPoison(p);
+			ret = 0;
+		} else {
+			pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
+				pfn, p->flags);
+			ret = -EIO;
+		}
+	} else {
+		/* Not a free page */
+		ret = 1;
+	}
+	unset_migratetype_isolate(p);
+	unlock_system_sleep();
+	return ret;
+}
+
+/**
+ * soft_offline_page - Soft offline a page.
+ * @page: page to offline
+ * @flags: flags. Same as memory_failure().
+ *
+ * Returns 0 on success, otherwise negated errno.
+ *
+ * Soft offline a page, by migration or invalidation,
+ * without killing anything. This is for the case when
+ * a page is not corrupted yet (so it's still valid to access),
+ * but has had a number of corrected errors and is better taken
+ * out.
+ *
+ * The actual policy on when to do that is maintained by
+ * user space.
+ *
+ * This should never impact any application or cause data loss,
+ * however it might take some time.
+ *
+ * This is not a 100% solution for all memory, but tries to be
+ * ``good enough'' for the majority of memory.
+ */
+int soft_offline_page(struct page *page, int flags)
+{
+	int ret;
+	unsigned long pfn = page_to_pfn(page);
+
+	ret = get_any_page(page, pfn, flags);
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		goto done;
+
+	/*
+	 * Page cache page we can handle?
+	 */
+	if (!PageLRU(page)) {
+		/*
+		 * Try to free it.
+		 */
+		put_page(page);
+		shake_page(page, 1);
+
+		/*
+		 * Did it turn free?
+		 */
+		ret = get_any_page(page, pfn, 0);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			goto done;
+	}
+	if (!PageLRU(page)) {
+		pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
+				pfn, page->flags);
+		return -EIO;
+	}
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+
+	/*
+	 * Synchronized using the page lock with memory_failure()
+	 */
+	if (PageHWPoison(page)) {
+		unlock_page(page);
+		put_page(page);
+		pr_debug("soft offline: %#lx page already poisoned\n", pfn);
+		return -EBUSY;
+	}
+
+	/*
+	 * Try to invalidate first. This should work for
+	 * non dirty unmapped page cache pages.
+	 */
+	ret = invalidate_inode_page(page);
+	unlock_page(page);
+
+	/*
+	 * Drop count because page migration doesn't like raised
+	 * counts. The page could get re-allocated, but if it becomes
+	 * LRU the isolation will just fail.
+	 * RED-PEN would be better to keep it isolated here, but we
+	 * would need to fix isolation locking first.
+	 */
+	put_page(page);
+	if (ret == 1) {
+		ret = 0;
+		pr_debug("soft_offline: %#lx: invalidated\n", pfn);
+		goto done;
+	}
+
+	/*
+	 * Simple invalidation didn't work.
+	 * Try to migrate to a new page instead. migrate.c
+	 * handles a large number of cases for us.
+	 */
+	ret = isolate_lru_page(page);
+	if (!ret) {
+		LIST_HEAD(pagelist);
+
+		list_add(&page->lru, &pagelist);
+		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
+		if (ret) {
+			pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
+				pfn, ret, page->flags);
+			if (ret > 0)
+				ret = -EIO;
+		}
+	} else {
+		pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
+				pfn, ret, page_count(page), page->flags);
+	}
+	if (ret)
+		return ret;
+
+done:
+	atomic_long_add(1, &mce_bad_pages);
+	SetPageHWPoison(page);
+	/* keep elevated page count for bad page */
+	return ret;
+}
-- 
cgit v1.2.3


From afcf938ee0aac4ef95b1a23bac704c6fbeb26de6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:20:00 +0100
Subject: HWPOISON: Add a madvise() injector for soft page offlining

Process based injection is much easier to handle for test programs,
who can first bring a page into a specific state and then test.
So add a new MADV_SOFT_OFFLINE to soft offline a page, similar
to the existing hard offline injector.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/asm-generic/mman-common.h |  1 +
 mm/madvise.c                      | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 20111265afd8..3da9e2742fa0 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -40,6 +40,7 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 #define MADV_HWPOISON	100		/* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
 
 #define MADV_MERGEABLE   12		/* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
diff --git a/mm/madvise.c b/mm/madvise.c
index 7964e36ba915..319528b8db74 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -9,6 +9,7 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
+#include <linux/page-isolation.h>
 #include <linux/hugetlb.h>
 #include <linux/sched.h>
 #include <linux/ksm.h>
@@ -222,7 +223,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 /*
  * Error injection support for memory error handling.
  */
-static int madvise_hwpoison(unsigned long start, unsigned long end)
+static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 {
 	int ret = 0;
 
@@ -233,6 +234,14 @@ static int madvise_hwpoison(unsigned long start, unsigned long end)
 		int ret = get_user_pages_fast(start, 1, 0, &p);
 		if (ret != 1)
 			return ret;
+		if (bhv == MADV_SOFT_OFFLINE) {
+			printk(KERN_INFO "Soft offlining page %lx at %lx\n",
+				page_to_pfn(p), start);
+			ret = soft_offline_page(p, MF_COUNT_INCREASED);
+			if (ret)
+				break;
+			continue;
+		}
 		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
 		       page_to_pfn(p), start);
 		/* Ignore return value for now */
@@ -333,8 +342,8 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 	size_t len;
 
 #ifdef CONFIG_MEMORY_FAILURE
-	if (behavior == MADV_HWPOISON)
-		return madvise_hwpoison(start, start+len_in);
+	if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
+		return madvise_hwpoison(behavior, start, start+len_in);
 #endif
 	if (!madvise_behavior_valid(behavior))
 		return error;
-- 
cgit v1.2.3


From 9905a43b2d563e6f89e4c63c4278ada03f2ebb14 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Mon, 14 Dec 2009 00:58:57 +0100
Subject: backlight: Constify struct backlight_ops

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/adp5520_bl.c    |  2 +-
 drivers/video/backlight/adx_bl.c        |  2 +-
 drivers/video/backlight/atmel-pwm-bl.c  |  2 +-
 drivers/video/backlight/backlight.c     |  2 +-
 drivers/video/backlight/corgi_lcd.c     |  2 +-
 drivers/video/backlight/cr_bllcd.c      |  2 +-
 drivers/video/backlight/da903x_bl.c     |  2 +-
 drivers/video/backlight/generic_bl.c    |  2 +-
 drivers/video/backlight/hp680_bl.c      |  2 +-
 drivers/video/backlight/jornada720_bl.c |  2 +-
 drivers/video/backlight/kb3886_bl.c     |  2 +-
 drivers/video/backlight/locomolcd.c     |  2 +-
 drivers/video/backlight/mbp_nvidia_bl.c |  2 +-
 drivers/video/backlight/omap1_bl.c      |  2 +-
 drivers/video/backlight/progear_bl.c    |  2 +-
 drivers/video/backlight/pwm_bl.c        |  2 +-
 drivers/video/backlight/tosa_bl.c       |  2 +-
 drivers/video/backlight/wm831x_bl.c     |  2 +-
 include/linux/backlight.h               | 12 ++++++------
 19 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 4c10edecfb66..86d95c228adb 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -85,7 +85,7 @@ static int adp5520_bl_get_brightness(struct backlight_device *bl)
 	return error ? data->current_brightness : reg_val;
 }
 
-static struct backlight_ops adp5520_bl_ops = {
+static const struct backlight_ops adp5520_bl_ops = {
 	.update_status	= adp5520_bl_update_status,
 	.get_brightness	= adp5520_bl_get_brightness,
 };
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index 2c3bdfc620b7..d769b0bab21a 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -61,7 +61,7 @@ static int adx_backlight_check_fb(struct fb_info *fb)
 	return 1;
 }
 
-static struct backlight_ops adx_backlight_ops = {
+static const struct backlight_ops adx_backlight_ops = {
 	.options = 0,
 	.update_status = adx_backlight_update_status,
 	.get_brightness = adx_backlight_get_brightness,
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index 2cf7ba52f67c..f625ffc69ad3 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -113,7 +113,7 @@ static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
 	return pwm_channel_enable(&pwmbl->pwmc);
 }
 
-static struct backlight_ops atmel_pwm_bl_ops = {
+static const struct backlight_ops atmel_pwm_bl_ops = {
 	.get_brightness = atmel_pwm_bl_get_intensity,
 	.update_status  = atmel_pwm_bl_set_intensity,
 };
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 6615ac7fa60a..18829cf68b1b 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(backlight_force_update);
  * ERR_PTR() or a pointer to the newly allocated device.
  */
 struct backlight_device *backlight_device_register(const char *name,
-		struct device *parent, void *devdata, struct backlight_ops *ops)
+		struct device *parent, void *devdata, const struct backlight_ops *ops)
 {
 	struct backlight_device *new_bd;
 	int rc;
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 96774949cd30..b4bcf8043797 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -451,7 +451,7 @@ void corgi_lcd_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgi_lcd_limit_intensity);
 
-static struct backlight_ops corgi_bl_ops = {
+static const struct backlight_ops corgi_bl_ops = {
 	.get_brightness	= corgi_bl_get_intensity,
 	.update_status  = corgi_bl_update_status,
 };
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index b9fe62b475c6..2914bf104adf 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -108,7 +108,7 @@ static int cr_backlight_get_intensity(struct backlight_device *bd)
 	return intensity;
 }
 
-static struct backlight_ops cr_backlight_ops = {
+static const struct backlight_ops cr_backlight_ops = {
 	.get_brightness = cr_backlight_get_intensity,
 	.update_status = cr_backlight_set_intensity,
 };
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index f2d76dae1eb3..74cdc640173d 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -95,7 +95,7 @@ static int da903x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops da903x_backlight_ops = {
+static const struct backlight_ops da903x_backlight_ops = {
 	.update_status	= da903x_backlight_update_status,
 	.get_brightness	= da903x_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 6d27f62fdcd0..e6d348e63596 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -70,7 +70,7 @@ void corgibl_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgibl_limit_intensity);
 
-static struct backlight_ops genericbl_ops = {
+static const struct backlight_ops genericbl_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.get_brightness = genericbl_get_intensity,
 	.update_status  = genericbl_send_intensity,
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 7fb4eefff80d..f7cc528d5be7 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -98,7 +98,7 @@ static int hp680bl_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops hp680bl_ops = {
+static const struct backlight_ops hp680bl_ops = {
 	.get_brightness = hp680bl_get_intensity,
 	.update_status  = hp680bl_set_intensity,
 };
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 7aed2565c1bd..db9071fc5665 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -93,7 +93,7 @@ out:
 	return ret;
 }
 
-static struct backlight_ops jornada_bl_ops = {
+static const struct backlight_ops jornada_bl_ops = {
 	.get_brightness = jornada_bl_get_brightness,
 	.update_status = jornada_bl_update_status,
 	.options = BL_CORE_SUSPENDRESUME,
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index a38fda1742dd..939e7b830cf3 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -134,7 +134,7 @@ static int kb3886bl_get_intensity(struct backlight_device *bd)
 	return kb3886bl_intensity;
 }
 
-static struct backlight_ops kb3886bl_ops = {
+static const struct backlight_ops kb3886bl_ops = {
 	.get_brightness = kb3886bl_get_intensity,
 	.update_status  = kb3886bl_send_intensity,
 };
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 6b488b8a7eee..00a9591b0003 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -141,7 +141,7 @@ static int locomolcd_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops locomobl_data = {
+static const struct backlight_ops locomobl_data = {
 	.get_brightness = locomolcd_get_intensity,
 	.update_status  = locomolcd_set_intensity,
 };
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 9edb8d7c295f..581246894733 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -33,7 +33,7 @@ struct dmi_match_data {
 	unsigned long iostart;
 	unsigned long iolen;
 	/* Backlight operations structure. */
-	struct backlight_ops backlight_ops;
+	const struct backlight_ops backlight_ops;
 };
 
 /* Module parameters. */
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 8693e5fcd2eb..409ca9643528 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -125,7 +125,7 @@ static int omapbl_get_intensity(struct backlight_device *dev)
 	return bl->current_intensity;
 }
 
-static struct backlight_ops omapbl_ops = {
+static const struct backlight_ops omapbl_ops = {
 	.get_brightness = omapbl_get_intensity,
 	.update_status  = omapbl_update_status,
 };
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 9edaf24fd82d..075786e05034 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -54,7 +54,7 @@ static int progearbl_get_intensity(struct backlight_device *bd)
 	return intensity - HW_LEVEL_MIN;
 }
 
-static struct backlight_ops progearbl_ops = {
+static const struct backlight_ops progearbl_ops = {
 	.get_brightness = progearbl_get_intensity,
 	.update_status = progearbl_set_intensity,
 };
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 887166267443..df9e0b32cf39 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -56,7 +56,7 @@ static int pwm_backlight_get_brightness(struct backlight_device *bl)
 	return bl->props.brightness;
 }
 
-static struct backlight_ops pwm_backlight_ops = {
+static const struct backlight_ops pwm_backlight_ops = {
 	.update_status	= pwm_backlight_update_status,
 	.get_brightness	= pwm_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index 43edbada12d1..e14ce4d469f5 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -72,7 +72,7 @@ static int tosa_bl_get_brightness(struct backlight_device *dev)
 	return props->brightness;
 }
 
-static struct backlight_ops bl_ops = {
+static const struct backlight_ops bl_ops = {
 	.get_brightness		= tosa_bl_get_brightness,
 	.update_status		= tosa_bl_update_status,
 };
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index 467bdb7efb23..e32add37a203 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -112,7 +112,7 @@ static int wm831x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops wm831x_backlight_ops = {
+static const struct backlight_ops wm831x_backlight_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.update_status	= wm831x_backlight_update_status,
 	.get_brightness	= wm831x_backlight_get_brightness,
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 0f5f57858a23..8c4f884db6b4 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -36,18 +36,18 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
-	unsigned int options;
+	const unsigned int options;
 
 #define BL_CORE_SUSPENDRESUME	(1 << 0)
 
 	/* Notify the backlight driver some property has changed */
-	int (*update_status)(struct backlight_device *);
+	int (* const update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
 	   fb_blank etc.) */
-	int (*get_brightness)(struct backlight_device *);
+	int (* const get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (* const check_fb)(struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
@@ -86,7 +86,7 @@ struct backlight_device {
 	   registered this device has been unloaded, and if class_get_devdata()
 	   points to something in the body of that driver, it is also invalid. */
 	struct mutex ops_lock;
-	struct backlight_ops *ops;
+	const struct backlight_ops *ops;
 
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
@@ -103,7 +103,7 @@ static inline void backlight_update_status(struct backlight_device *bd)
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-	struct device *dev, void *devdata, struct backlight_ops *ops);
+	struct device *dev, void *devdata, const struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-- 
cgit v1.2.3


From 3d1e463158febf6e047897597722f768b15350cd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 8 Aug 2009 23:56:29 +0400
Subject: get rid of init_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 30 ++----------------------------
 include/linux/file.h |  3 ---
 2 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index f906ac8c9a9f..602a9ee3023a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -171,32 +171,6 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	init_file(file, mnt, dentry, mode, fop);
-	return file;
-}
-EXPORT_SYMBOL(alloc_file);
-
-/**
- * init_file - initialize a 'struct file'
- * @file: the already allocated 'struct file' to initialized
- * @mnt: the vfsmount on which the file resides
- * @dentry: the dentry representing this file
- * @mode: the mode the file is opened with
- * @fop: the 'struct file_operations' for this file
- *
- * Use this instead of setting the members directly.  Doing so
- * avoids making mistakes like forgetting the mntget() or
- * forgetting to take a write on the mnt.
- *
- * Note: This is a crappy interface.  It is here to make
- * merging with the existing users of get_empty_filp()
- * who have complex failure logic easier.  All users
- * of this should be moving to alloc_file().
- */
-int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
-	   fmode_t mode, const struct file_operations *fop)
-{
-	int error = 0;
 	file->f_path.dentry = dentry;
 	file->f_path.mnt = mntget(mnt);
 	file->f_mapping = dentry->d_inode->i_mapping;
@@ -210,13 +184,13 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 * that we can do debugging checks at __fput()
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+		int error = 0;
 		file_take_write(file);
 		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
-	return error;
+	return file;
 }
-EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
diff --git a/include/linux/file.h b/include/linux/file.h
index 335a0a5c316e..6a8d3612eb2a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,9 +18,6 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern int init_file(struct file *, struct vfsmount *mnt,
-		struct dentry *dentry, fmode_t mode,
-		const struct file_operations *fop);
 extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
 		fmode_t mode, const struct file_operations *fop);
 
-- 
cgit v1.2.3


From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 00:52:35 +0400
Subject: switch alloc_file() to passing struct path

... and have the caller grab both mnt and dentry; kill
leak in infiniband, while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c            | 15 ++++++++-------
 drivers/infiniband/core/uverbs_main.c |  9 +++++++--
 fs/anon_inodes.c                      | 18 +++++++++---------
 fs/file_table.c                       | 13 ++++++-------
 fs/hugetlbfs/inode.c                  | 15 ++++++++-------
 fs/notify/inotify/inotify_user.c      |  8 ++++++--
 fs/pipe.c                             | 17 +++++++++--------
 include/linux/file.h                  |  5 +++--
 ipc/shm.c                             | 10 +++++-----
 mm/shmem.c                            | 14 ++++++++------
 net/socket.c                          | 17 +++++++++--------
 11 files changed, 78 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 599b233bef75..5246285a95fb 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2200,7 +2200,7 @@ pfm_alloc_file(pfm_context_t *ctx)
 {
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry;
+	struct path path;
 	char name[32];
 	struct qstr this;
 
@@ -2225,18 +2225,19 @@ pfm_alloc_file(pfm_context_t *ctx)
 	/*
 	 * allocate a new dcache entry
 	 */
-	dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!dentry) {
+	path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry) {
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
 	}
+	path.mnt = mntget(pfmfs_mnt);
 
-	dentry->d_op = &pfmfs_dentry_operations;
-	d_add(dentry, inode);
+	path.dentry->d_op = &pfmfs_dentry_operations;
+	d_add(path.dentry, inode);
 
-	file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops);
+	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
 	if (!file) {
-		dput(dentry);
+		path_put(&path);
 		return ERR_PTR(-ENFILE);
 	}
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index aec0fbdfe7f0..5f284ffd430e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -492,6 +492,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 					int is_async, int *fd)
 {
 	struct ib_uverbs_event_file *ev_file;
+	struct path path;
 	struct file *filp;
 	int ret;
 
@@ -519,8 +520,10 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	 * system call on a uverbs file, which will already have a
 	 * module reference.
 	 */
-	filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
-			  FMODE_READ, fops_get(&uverbs_event_fops));
+	path.mnt = uverbs_event_mnt;
+	path.dentry = uverbs_event_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, fops_get(&uverbs_event_fops));
 	if (!filp) {
 		ret = -ENFILE;
 		goto err_fd;
@@ -531,6 +534,8 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	return filp;
 
 err_fd:
+	fops_put(&uverbs_event_fops);
+	path_put(&path);
 	put_unused_fd(*fd);
 
 err:
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..94f5110c4655 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -88,7 +88,7 @@ struct file *anon_inode_getfile(const char *name,
 				void *priv, int flags)
 {
 	struct qstr this;
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int error;
 
@@ -106,10 +106,11 @@ struct file *anon_inode_getfile(const char *name,
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry)
 		goto err_module;
 
+	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
 	 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,14 +118,13 @@ struct file *anon_inode_getfile(const char *name,
 	 */
 	atomic_inc(&anon_inode_inode->i_count);
 
-	dentry->d_op = &anon_inodefs_dentry_operations;
+	path.dentry->d_op = &anon_inodefs_dentry_operations;
 	/* Do not publish this dentry inside the global dentry hash table */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, anon_inode_inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(anon_inode_mnt, dentry,
-			  FMODE_READ | FMODE_WRITE, fops);
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
@@ -137,7 +137,7 @@ struct file *anon_inode_getfile(const char *name,
 	return file;
 
 err_dput:
-	dput(dentry);
+	path_put(&path);
 err_module:
 	module_put(fops->owner);
 	return ERR_PTR(error);
diff --git a/fs/file_table.c b/fs/file_table.c
index 602a9ee3023a..163cd28314e0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -162,8 +162,8 @@ fail:
  * If all the callers of init_file() are eliminated, its
  * code should be moved into this function.
  */
-struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop)
+struct file *alloc_file(struct path *path, fmode_t mode,
+		const struct file_operations *fop)
 {
 	struct file *file;
 
@@ -171,9 +171,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	file->f_path.dentry = dentry;
-	file->f_path.mnt = mntget(mnt);
-	file->f_mapping = dentry->d_inode->i_mapping;
+	file->f_path = *path;
+	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_mode = mode;
 	file->f_op = fop;
 
@@ -183,10 +182,10 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	 * visible.  We do this for consistency, and so
 	 * that we can do debugging checks at __fput()
 	 */
-	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+	if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
 		int error = 0;
 		file_take_write(file);
-		error = mnt_clone_write(mnt);
+		error = mnt_clone_write(path->mnt);
 		WARN_ON(error);
 	}
 	return file;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..6bd41525cd71 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -922,7 +922,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	int error = -ENOMEM;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr quick_string;
 
 	*user = NULL;
@@ -944,10 +945,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	quick_string.name = name;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
-	dentry = d_alloc(root, &quick_string);
-	if (!dentry)
+	path.dentry = d_alloc(root, &quick_string);
+	if (!path.dentry)
 		goto out_shm_unlock;
 
+	path.mnt = mntget(hugetlbfs_vfsmount);
 	error = -ENOSPC;
 	inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
 				current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,13 +962,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 			acctflag))
 		goto out_inode;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
 
 	error = -ENFILE;
-	file = alloc_file(hugetlbfs_vfsmount, dentry,
-			FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 			&hugetlbfs_file_operations);
 	if (!file)
 		goto out_dentry; /* inode is already attached */
@@ -977,7 +978,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 out_inode:
 	iput(inode);
 out_dentry:
-	dput(dentry);
+	path_put(&path);
 out_shm_unlock:
 	if (*user) {
 		user_shm_unlock(size, *user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 9e4f90042eaf..8271cf05c957 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
+	struct path path;
 	int fd, ret;
 
 	/* Check the IN_* constants for consistency.  */
@@ -675,8 +676,10 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 	atomic_inc(&user->inotify_devs);
 
-	filp = alloc_file(inotify_mnt, dget(inotify_mnt->mnt_root),
-			  FMODE_READ, &inotify_fops);
+	path.mnt = inotify_mnt;
+	path.dentry = inotify_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
 	if (!filp)
 		goto Enfile;
 
@@ -689,6 +692,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 Enfile:
 	ret = -ENFILE;
+	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..81288bc2bcbb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -974,7 +974,7 @@ struct file *create_write_pipe(int flags)
 	int err;
 	struct inode *inode;
 	struct file *f;
-	struct dentry *dentry;
+	struct path path;
 	struct qstr name = { .name = "" };
 
 	err = -ENFILE;
@@ -983,21 +983,22 @@ struct file *create_write_pipe(int flags)
 		goto err;
 
 	err = -ENOMEM;
-	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
-	if (!dentry)
+	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	if (!path.dentry)
 		goto err_inode;
+	path.mnt = mntget(pipe_mnt);
 
-	dentry->d_op = &pipefs_dentry_operations;
+	path.dentry->d_op = &pipefs_dentry_operations;
 	/*
 	 * We dont want to publish this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on pipes
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
+	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
 	if (!f)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
@@ -1009,7 +1010,7 @@ struct file *create_write_pipe(int flags)
 
  err_dentry:
 	free_pipe_info(inode);
-	dput(dentry);
+	path_put(&path);
 	return ERR_PTR(err);
 
  err_inode:
diff --git a/include/linux/file.h b/include/linux/file.h
index 6a8d3612eb2a..5555508fd517 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,8 +18,9 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop);
+struct path;
+extern struct file *alloc_file(struct path *, fmode_t mode,
+	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/ipc/shm.c b/ipc/shm.c
index 11bec626c228..16e39230aa0d 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -878,8 +878,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (err)
 		goto out_unlock;
 
-	path.dentry = dget(shp->shm_file->f_path.dentry);
-	path.mnt    = shp->shm_file->f_path.mnt;
+	path = shp->shm_file->f_path;
+	path_get(&path);
 	shp->shm_nattch++;
 	size = i_size_read(path.dentry->d_inode);
 	shm_unlock(shp);
@@ -889,8 +889,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (!sfd)
 		goto out_put_dentry;
 
-	file = alloc_file(path.mnt, path.dentry, f_mode,
-			is_file_hugepages(shp->shm_file) ?
+	file = alloc_file(&path, f_mode,
+			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
 	if (!file)
@@ -950,7 +950,7 @@ out_unlock:
 out_free:
 	kfree(sfd);
 out_put_dentry:
-	dput(path.dentry);
+	path_put(&path);
 	goto out_nattch;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ef8f47473c5a..d2ec7f029ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2626,7 +2626,8 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	int error;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr this;
 
 	if (IS_ERR(shm_mnt))
@@ -2643,16 +2644,17 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	this.len = strlen(name);
 	this.hash = 0; /* will go */
 	root = shm_mnt->mnt_root;
-	dentry = d_alloc(root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(root, &this);
+	if (!path.dentry)
 		goto put_memory;
+	path.mnt = mntget(shm_mnt);
 
 	error = -ENOSPC;
 	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
 	if (!inode)
 		goto put_dentry;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
 #ifndef CONFIG_MMU
@@ -2662,7 +2664,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 #endif
 
 	error = -ENFILE;
-	file = alloc_file(shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 		  &shmem_file_operations);
 	if (!file)
 		goto put_dentry;
@@ -2671,7 +2673,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	return file;
 
 put_dentry:
-	dput(dentry);
+	path_put(&path);
 put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
diff --git a/net/socket.c b/net/socket.c
index eaaba3510e81..dbfdfa96d29b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = {
 static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 {
 	struct qstr name = { .name = "" };
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int fd;
 
@@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-	if (unlikely(!dentry)) {
+	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
 	}
+	path.mnt = mntget(sock_mnt);
 
-	dentry->d_op = &sockfs_dentry_operations;
+	path.dentry->d_op = &sockfs_dentry_operations;
 	/*
 	 * We dont want to push this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on sockets
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, SOCK_INODE(sock));
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-	file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	if (unlikely(!file)) {
 		/* drop dentry, keep inode */
 		atomic_inc(&path.dentry->d_inode->i_count);
-		dput(dentry);
+		path_put(&path);
 		put_unused_fd(fd);
 		return -ENFILE;
 	}
-- 
cgit v1.2.3


From e9496ff46a20a8592fdc7bdaaf41b45eb808d310 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 18:44:32 +0400
Subject: fix mismerge with Trond's stuff (create_mnt_ns() export is gone now)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 3 +--
 fs/nfs/super.c                | 8 --------
 include/linux/mnt_namespace.h | 1 -
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..faab1273281e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,7 +2080,6 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
-EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..d5b112bcf3de 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,21 +2648,13 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
-	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
-	ns_private = create_mnt_ns(root_mnt);
-	ret = PTR_ERR(ns_private);
-	if (IS_ERR(ns_private))
-		goto out_mntput;
-
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
-	put_mnt_ns(ns_private);
-
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c2393a..d9ebf1037dfa 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,7 +23,6 @@ struct proc_mounts {
 
 struct fs_struct;
 
-extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From e81e3f4dca6c54116a24aec217d2c15c6f58ada5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 4 Dec 2009 15:47:36 -0500
Subject: fs: move get_empty_filp() deffinition to internal.h

All users outside of fs/ of get_empty_filp() have been removed.  This patch
moves the definition from the include/ directory to internal.h so no new
users crop up and removes the EXPORT_SYMBOL.  I'd love to see open intents
stop using it too, but that's a problem for another day and a smarter
developer!

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 2 ++
 fs/internal.h      | 1 +
 fs/namei.c         | 2 ++
 fs/open.c          | 2 ++
 include/linux/fs.h | 1 -
 5 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 163cd28314e0..361d76be8295 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -24,6 +24,8 @@
 
 #include <asm/atomic.h>
 
+#include "internal.h"
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..f67cd141d9a8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,6 +79,7 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+extern struct file *get_empty_filp(void);
 
 /*
  * super.c
diff --git a/fs/namei.c b/fs/namei.c
index 8c8b379b94a4..1fc038b117be 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,8 @@
 #include <linux/fs_struct.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /* [Feb-1997 T. Schoebel-Theuer]
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..d95651e8be9e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,6 +31,8 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 
+#include "internal.h"
+
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a057f48eb156..cdc23be4edde 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2189,7 +2189,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From 1429b3eca23818f87f9fa569a15d9816de81f698 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Dec 2009 06:38:01 -0500
Subject: Untangling ima mess, part 3: kill dead code in ima

Kill the 'update' argument of ima_path_check(), kill
dead code in ima.

Current rules: ima counters are bumped at the same time
when the file switches from put_filp() fodder to fput()
one.  Which happens exactly in two places - alloc_file()
and __dentry_open().  Nothing else needs to do that at
all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |  4 +--
 fs/nfsd/vfs.c                     |  3 +--
 include/linux/ima.h               | 12 ++-------
 security/integrity/ima/ima_main.c | 52 +++------------------------------------
 4 files changed, 9 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index c530e5d32f12..a765e7a741f4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1686,7 +1686,7 @@ do_last:
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
 			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+				       (MAY_READ | MAY_WRITE | MAY_EXEC));
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1747,7 +1747,7 @@ ok:
 	filp = nameidata_to_filp(&nd, open_flag);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+			       (MAY_READ | MAY_WRITE | MAY_EXEC));
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9942b39654e..936f08400db6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2122,8 +2122,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 	 */
 	path.mnt = exp->ex_path.mnt;
 	path.dentry = dentry;
-	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
-			     IMA_COUNT_LEAVE);
+	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
 nfsd_out:
 	return err? nfserrno(err) : 0;
 }
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e3f2a4c25f6..99dc6d5cf7e5 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,18 +13,14 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
-#define IMA_COUNT_UPDATE 1
-#define IMA_COUNT_LEAVE 0
-
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask, int update_counts);
+extern int ima_path_check(struct path *path, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
-extern void ima_counts_put(struct path *path, int mask);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -42,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask, int update_counts)
+static inline int ima_path_check(struct path *path, int mask)
 {
 	return 0;
 }
@@ -62,9 +58,5 @@ static inline void ima_counts_get(struct file *file)
 	return;
 }
 
-static inline void ima_counts_put(struct path *path, int mask)
-{
-	return;
-}
 #endif /* CONFIG_IMA_H */
 #endif /* _LINUX_IMA_H */
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e041233b4d2a..16dc57d247d0 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -49,20 +49,13 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
-/*
- * Update the counts given open flags instead of fmode
- */
-static void ima_inc_counts_flags(struct ima_iint_cache *iint, int flags)
-{
-	ima_inc_counts(iint, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /*
  * Decrement ima counts
  */
 static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
-			   fmode_t mode)
+			   struct file *file)
 {
+	mode_t mode = file->f_mode;
 	BUG_ON(!mutex_is_locked(&iint->mutex));
 
 	iint->opencount--;
@@ -92,12 +85,6 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 	}
 }
 
-static void ima_dec_counts_flags(struct ima_iint_cache *iint,
-				 struct inode *inode, int flags)
-{
-	ima_dec_counts(iint, inode, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /**
  * ima_file_free - called on __fput()
  * @file: pointer to file structure being freed
@@ -117,7 +104,7 @@ void ima_file_free(struct file *file)
 		return;
 
 	mutex_lock(&iint->mutex);
-	ima_dec_counts(iint, inode, file->f_mode);
+	ima_dec_counts(iint, inode, file);
 	mutex_unlock(&iint->mutex);
 	kref_put(&iint->refcount, iint_free);
 }
@@ -183,7 +170,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct path *path, int mask, int update_counts)
+int ima_path_check(struct path *path, int mask)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -197,8 +184,6 @@ int ima_path_check(struct path *path, int mask, int update_counts)
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	if (update_counts)
-		ima_inc_counts_flags(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -268,35 +253,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_put - decrement file counts
- *
- * File counts are incremented in ima_path_check. On file open
- * error, such as ETXTBSY, decrement the counts to prevent
- * unnecessary imbalance messages.
- */
-void ima_counts_put(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	/* The inode may already have been freed, freeing the iint
-	 * with it. Verify the inode is not NULL before dereferencing
-	 * it.
-	 */
-	if (!ima_initialized || !inode || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-
-	mutex_lock(&iint->mutex);
-	ima_dec_counts_flags(iint, inode, mask);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-
 /*
  * ima_counts_get - increment file counts
  *
-- 
cgit v1.2.3


From 431547b3c4533b8c7fd150ab36980b9a3147797b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2009 09:52:56 +0000
Subject: sanitize xattr handler prototypes

Add a flags argument to struct xattr_handler and pass it to all xattr
handler methods.  This allows using the same methods for multiple
handlers, e.g. for the ACL methods which perform exactly the same action
for the access and default ACLs, just using a different underlying
attribute.  With a little more groundwork it'll also allow sharing the
methods for the regular user/trusted/secure handlers in extN, ocfs2 and
jffs2 like it's already done for xfs in this patch.

Also change the inode argument to the handlers to a dentry to allow
using the handlers mechnism for filesystems that require it later,
e.g. cifs.

[with GFS2 bits updated by Steven Whitehouse <swhiteho@redhat.com>]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c               | 47 ++++++------------------
 fs/ext2/acl.c                | 79 +++++++++++++---------------------------
 fs/ext2/xattr.c              | 11 ++++--
 fs/ext2/xattr_security.c     | 16 ++++----
 fs/ext2/xattr_trusted.c      | 16 ++++----
 fs/ext2/xattr_user.c         | 25 +++++++------
 fs/ext3/acl.c                | 74 ++++++++++++-------------------------
 fs/ext3/xattr.c              | 31 +++++++++-------
 fs/ext3/xattr_security.c     | 20 +++++-----
 fs/ext3/xattr_trusted.c      | 18 ++++-----
 fs/ext3/xattr_user.c         | 25 +++++++------
 fs/ext4/acl.c                | 74 ++++++++++++-------------------------
 fs/ext4/xattr.c              | 31 +++++++++-------
 fs/ext4/xattr_security.c     | 20 +++++-----
 fs/ext4/xattr_trusted.c      | 20 +++++-----
 fs/ext4/xattr_user.c         | 25 +++++++------
 fs/gfs2/acl.c                | 16 +++++---
 fs/gfs2/inode.c              |  3 +-
 fs/gfs2/xattr.c              | 69 +++++++++++++----------------------
 fs/gfs2/xattr.h              |  7 ++--
 fs/jffs2/acl.c               | 65 +++++++++++----------------------
 fs/jffs2/security.c          | 18 +++++----
 fs/jffs2/xattr.c             |  6 ++-
 fs/jffs2/xattr_trusted.c     | 18 +++++----
 fs/jffs2/xattr_user.c        | 18 +++++----
 fs/ocfs2/acl.c               | 87 +++++++++++++-------------------------------
 fs/ocfs2/xattr.c             | 72 ++++++++++++++++++------------------
 fs/reiserfs/xattr.c          | 36 +++++++++---------
 fs/reiserfs/xattr_acl.c      | 69 ++++++++++-------------------------
 fs/reiserfs/xattr_security.c | 21 ++++++-----
 fs/reiserfs/xattr_trusted.c  | 21 ++++++-----
 fs/reiserfs/xattr_user.c     | 21 ++++++-----
 fs/xattr.c                   | 28 +++++++-------
 fs/xfs/linux-2.6/xfs_acl.c   | 57 ++++++++++-------------------
 fs/xfs/linux-2.6/xfs_xattr.c | 71 ++++++++----------------------------
 fs/xfs/xfs_acl.h             |  3 +-
 include/linux/xattr.h        | 13 ++++---
 mm/shmem.c                   | 19 +++++-----
 mm/shmem_acl.c               | 78 ++++++++++-----------------------------
 39 files changed, 533 insertions(+), 815 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..52cbe47022bf 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static int btrfs_xattr_get_acl(struct inode *inode, int type,
-			       void *value, size_t size)
+static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int ret = 0;
 
-	acl = btrfs_get_acl(inode, type);
+	acl = btrfs_get_acl(dentry->d_inode, type);
 
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
@@ -151,8 +151,8 @@ out:
 	return ret;
 }
 
-static int btrfs_xattr_set_acl(struct inode *inode, int type,
-			       const void *value, size_t size)
+static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	int ret = 0;
 	struct posix_acl *acl = NULL;
@@ -167,38 +167,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
 		}
 	}
 
-	ret = btrfs_set_acl(inode, acl, type);
+	ret = btrfs_set_acl(dentry->d_inode, acl, type);
 
 	posix_acl_release(acl);
 
 	return ret;
 }
 
-
-static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
-				      void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
-				      const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
-				       void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 int btrfs_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;
@@ -303,14 +278,16 @@ int btrfs_acl_chmod(struct inode *inode)
 
 struct xattr_handler btrfs_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get	= btrfs_xattr_acl_default_get,
-	.set	= btrfs_xattr_acl_default_set,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 struct xattr_handler btrfs_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get	= btrfs_xattr_acl_access_get,
-	.set	= btrfs_xattr_acl_access_set,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 #else /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
  * Extended attribut handlers
  */
 static size_t
-ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 }
 
 static size_t
-ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
-			    const char *name, size_t name_len)
+ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext2_get_acl(inode, type);
+	acl = ext2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext2_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext2_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
-	if (!is_owner_or_cap(inode))
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
 	} else
 		acl = NULL;
 
-	error = ext2_set_acl(inode, type, acl);
+	error = ext2_set_acl(dentry->d_inode, type, acl);
 
 release_and_out:
 	posix_acl_release(acl);
 	return error;
 }
 
-static int
-ext2_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext2_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
-	.get	= ext2_xattr_get_acl_access,
-	.set	= ext2_xattr_set_acl_access,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
-	.get	= ext2_xattr_get_acl_default,
-	.set	= ext2_xattr_set_acl_default,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/security.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
  * used / required on success.
  */
 static int
-ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	struct ext2_xattr_entry *entry;
 	char *end;
@@ -300,9 +302,10 @@ bad_block:	ext2_error(inode->i_sb, "ext2_xattr_list",
 			ext2_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest) {
 					error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext2_xattr_list(dentry->d_inode, buffer, size);
+	return ext2_xattr_list(dentry, buffer, size);
 }
 
 /*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext2_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
 
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext3_get_acl(inode, type);
+	acl = ext3_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext3_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
-	.get	= ext3_xattr_get_acl_access,
-	.set	= ext3_xattr_set_acl_access,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
-	.get	= ext3_xattr_get_acl_default,
-	.set	= ext3_xattr_set_acl_default,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
 			      struct ext3_xattr_entry *);
-static int ext3_xattr_list(struct inode *inode, char *buffer,
+static int ext3_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext3_xattr_list(dentry->d_inode, buffer, size);
+	return ext3_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 			ext3_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 }
 
 static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext3_xattr_cache_insert(bh);
-	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext3_xattr_ibody_header *header;
 	struct ext3_inode *raw_inode;
 	struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext3_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_list_entries(inode, IFIRST(header),
+	error = ext3_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -430,12 +433,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext3_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext4_get_acl(inode, type);
+	acl = ext4_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext4_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext4_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext4_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext4_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext4_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext4_xattr_list_acl_access,
-	.get	= ext4_xattr_get_acl_access,
-	.set	= ext4_xattr_set_acl_access,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
 
 struct xattr_handler ext4_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext4_xattr_list_acl_default,
-	.get	= ext4_xattr_get_acl_default,
-	.set	= ext4_xattr_set_acl_default,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..83218bebbc7c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct inode *inode, char *buffer,
+static int ext4_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
 ssize_t
 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext4_xattr_list(dentry->d_inode, buffer, size);
+	return ext4_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
+ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 			ext4_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 }
 
 static int
-ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext4_xattr_cache_insert(bh);
-	error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
 }
 
 static int
-ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext4_xattr_ibody_header *header;
 	struct ext4_inode *raw_inode;
 	struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext4_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext4_xattr_list_entries(inode, IFIRST(header),
+	error = ext4_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -423,12 +426,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT4_I(inode)->xattr_sem);
-	i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+	i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT4_I(inode)->xattr_sem);
+	up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		     const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext4_xattr_user_get(struct dentry *dentry, const char *name,
+		    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext4_xattr_user_set(struct dentry *dentry, const char *name,
+		    const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
 	error = posix_acl_to_xattr(acl, data, len);
 	if (error < 0)
 		goto out;
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0);
+	error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
 	if (!error)
 		set_cached_acl(inode, type, acl);
 out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
 	return -EINVAL;
 }
 
-static int gfs2_xattr_system_get(struct inode *inode, const char *name,
-				 void *buffer, size_t size)
+static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
+				 void *buffer, size_t size, int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int type;
 	int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
 	return error;
 }
 
-static int gfs2_xattr_system_set(struct inode *inode, const char *name,
-				 const void *value, size_t size, int flags)
+static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
+				 const void *value, size_t size, int flags,
+				 int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct posix_acl *acl = NULL;
 	int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
 	}
 
 set_acl:
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0);
+	error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
 	if (!error) {
 		if (acl)
 			set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
 
 struct xattr_handler gfs2_xattr_system_handler = {
 	.prefix = XATTR_SYSTEM_PREFIX,
+	.flags  = GFS2_EATYPE_SYS,
 	.get    = gfs2_xattr_system_get,
 	.set    = gfs2_xattr_system_set,
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..3ff32fa793da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 		return err;
 	}
 
-	err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
+	err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
+			       GFS2_EATYPE_SECURITY);
 	kfree(value);
 	kfree(name);
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..8a04108e0c22 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
 /**
  * gfs2_xattr_get - Get a GFS2 extended attribute
  * @inode: The inode
- * @type: The type of extended attribute
  * @name: The name of the extended attribute
  * @buffer: The buffer to write the result into
  * @size: The size of the buffer
+ * @type: The type of extended attribute
  *
  * Returns: actual size of data on success, -errno on error
  */
-
-int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-		   void *buffer, size_t size)
+static int gfs2_xattr_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 /**
  * gfs2_xattr_remove - Remove a GFS2 extended attribute
- * @inode: The inode
+ * @ip: The inode
  * @type: The type of the extended attribute
  * @name: The name of the extended attribute
  *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
  * Returns: 0, or errno on failure
  */
 
-static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
+static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
 }
 
 /**
- * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
- * @inode: The inode
- * @type: The type of the extended attribute
+ * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
+ * @ip: The inode
  * @name: The name of the extended attribute
  * @value: The value of the extended attribute (NULL for remove)
  * @size: The size of the @value argument
  * @flags: Create or Replace
+ * @type: The type of the extended attribute
  *
  * See gfs2_xattr_remove() for details of the removal of xattrs.
  *
  * Returns: 0 or errno on failure
  */
 
-int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-		   const void *value, size_t size, int flags)
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+		   const void *value, size_t size, int flags, int type)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_ea_location el;
 	unsigned int namel = strlen(name);
 	int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 		return -ERANGE;
 
 	if (value == NULL)
-		return gfs2_xattr_remove(inode, type, name);
+		return gfs2_xattr_remove(ip, type, name);
 
 	if (ea_check_size(sdp, namel, size))
 		return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 	return error;
 }
 
+static int gfs2_xattr_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	return __gfs2_xattr_set(dentry->d_inode, name, value,
+				size, flags, type);
+}
+
 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 				  struct gfs2_ea_header *ea, char *data)
 {
@@ -1529,40 +1534,18 @@ out_alloc:
 	return error;
 }
 
-static int gfs2_xattr_user_get(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
-}
-
-static int gfs2_xattr_user_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
-}
-
-static int gfs2_xattr_security_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
-}
-
-static int gfs2_xattr_security_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
-}
-
 static struct xattr_handler gfs2_xattr_user_handler = {
 	.prefix = XATTR_USER_PREFIX,
-	.get    = gfs2_xattr_user_get,
-	.set    = gfs2_xattr_user_set,
+	.flags  = GFS2_EATYPE_USR,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 static struct xattr_handler gfs2_xattr_security_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
-	.get    = gfs2_xattr_security_get,
-	.set    = gfs2_xattr_security_set,
+	.flags  = GFS2_EATYPE_SECURITY,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
-extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-			  void *buffer, size_t size);
-extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-			  const void *value, size_t size, int flags);
+extern int __gfs2_xattr_set(struct inode *inode, const char *name,
+			    const void *value, size_t size,
+			    int flags, int type);
 extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
 extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
 	return rc;
 }
 
-static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size,
-					 const char *name, size_t name_len)
+static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
 
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
 	return retlen;
 }
 
-static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size,
-					  const char *name, size_t name_len)
+static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
 	return retlen;
 }
 
-static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size)
+static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	acl = jffs2_get_acl(inode, type);
+	if (name[0] != '\0')
+		return -EINVAL;
+
+	acl = jffs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
 	return rc;
 }
 
-static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
+static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	if (!is_owner_or_cap(inode))
+	if (name[0] != '\0')
+		return -EINVAL;
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
 	} else {
 		acl = NULL;
 	}
-	rc = jffs2_set_acl(inode, type, acl);
+	rc = jffs2_set_acl(dentry->d_inode, type, acl);
  out:
 	posix_acl_release(acl);
 	return rc;
 }
 
-static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
-				     const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
-				      const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
 struct xattr_handler jffs2_acl_access_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_access_listxattr,
-	.get	= jffs2_acl_access_getxattr,
-	.set	= jffs2_acl_access_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
 
 struct xattr_handler jffs2_acl_default_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_default_listxattr,
-	.get	= jffs2_acl_default_getxattr,
-	.set	= jffs2_acl_default_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
 }
 
 /* ---- XATTR Handler for "security.*" ----------------- */
-static int jffs2_security_getxattr(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
+				   void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size);
 }
 
-static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer,
-				   size_t size, int flags)
+static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size,
-				       const char *name, size_t name_len)
+static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		if (!xhandle)
 			continue;
 		if (buffer) {
-			rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, buffer+len, size-len,
+					   xd->xname, xd->name_len, xd->flags);
 		} else {
-			rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, NULL, 0, xd->xname,
+					   xd->name_len, xd->flags);
 		}
 		if (rc < 0)
 			goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_trusted_getxattr(struct inode *inode, const char *name,
-				  void *buffer, size_t size)
+static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size);
 }
 
-static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer,
-				  size_t size, int flags)
+static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size,
-				      const char *name, size_t name_len)
+static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_user_getxattr(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
+static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
+			       void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size);
 }
 
-static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
-			       size_t size, int flags)
+static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size,
-				   const char *name, size_t name_len)
+static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..e3e47415d851 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -331,13 +331,14 @@ cleanup:
 	return ret;
 }
 
-static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
 					  char *list,
 					  size_t list_len,
 					  const char *name,
-					  size_t name_len)
+					  size_t name_len,
+					  int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +349,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 	return size;
 }
 
-static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
 					   char *list,
 					   size_t list_len,
 					   const char *name,
-					   size_t name_len)
+					   size_t name_len,
+					   int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +367,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
 	return size;
 }
 
-static int ocfs2_xattr_get_acl(struct inode *inode,
-			       int type,
-			       void *buffer,
-			       size_t size)
+static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	struct posix_acl *acl;
 	int ret;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ocfs2_get_acl(inode, type);
+	acl = ocfs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -388,35 +390,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_xattr_get_acl_access(struct inode *inode,
-				      const char *name,
-				      void *buffer,
-				      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int ocfs2_xattr_get_acl_default(struct inode *inode,
-				       const char *name,
-				       void *buffer,
-				       size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int ocfs2_xattr_set_acl(struct inode *inode,
-			       int type,
-			       const void *value,
-			       size_t size)
+static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct posix_acl *acl;
 	int ret = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
@@ -442,38 +425,18 @@ cleanup:
 	return ret;
 }
 
-static int ocfs2_xattr_set_acl_access(struct inode *inode,
-				      const char *name,
-				      const void *value,
-				      size_t size,
-				      int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int ocfs2_xattr_set_acl_default(struct inode *inode,
-				       const char *name,
-				       const void *value,
-				       size_t size,
-				       int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ocfs2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ocfs2_xattr_list_acl_access,
-	.get	= ocfs2_xattr_get_acl_access,
-	.set	= ocfs2_xattr_set_acl_access,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
 
 struct xattr_handler ocfs2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ocfs2_xattr_list_acl_default,
-	.get	= ocfs2_xattr_get_acl_default,
-	.set	= ocfs2_xattr_set_acl_default,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..43c114831c0d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -205,8 +205,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 					   int offset,
 					   struct ocfs2_xattr_value_root **xv,
 					   struct buffer_head **bh);
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -6978,9 +6976,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
 
 	ret = ocfs2_init_security_get(inode, dir, &si);
 	if (!ret) {
-		ret = ocfs2_xattr_security_set(inode, si.name,
-					       si.value, si.value_len,
-					       XATTR_CREATE);
+		ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
+				      si.name, si.value, si.value_len,
+				      XATTR_CREATE);
 		if (ret) {
 			mlog_errno(ret);
 			goto leave;
@@ -7008,9 +7006,9 @@ leave:
 /*
  * 'security' attributes support
  */
-static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_size, const char *name,
-					size_t name_len)
+					size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7021,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
+				    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, value, size, flags);
 }
 
 int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7074,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
 /*
  * 'trusted' attributes support
  */
-static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
 				       size_t list_size, const char *name,
-				       size_t name_len)
+				       size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7089,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
+static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7118,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
 /*
  * 'user' attributes support
  */
-static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return 0;
@@ -7139,31 +7137,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
-				void *buffer, size_t size)
+static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
 			       buffer, size);
 }
 
-static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
-				const void *value, size_t size, int flags)
+static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8c7033a8b67e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
 #include <net/checksum.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
+#include <linux/security.h>
 
 #define PRIVROOT_NAME ".reiserfs_priv"
 #define XAROOT_NAME   "xattrs"
@@ -726,15 +727,14 @@ ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 		  size_t size)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -746,15 +746,14 @@ int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, flags, handler->flags);
 }
 
 /*
@@ -764,21 +763,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
 }
 
 struct listxattr_buf {
 	size_t size;
 	size_t pos;
 	char *buf;
-	struct inode *inode;
+	struct dentry *dentry;
 };
 
 static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +787,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
 	if (name[0] != '.' ||
 	    (namelen != 1 && (name[1] != '.' || namelen != 2))) {
 		struct xattr_handler *handler;
-		handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
+		handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
 						    name);
 		if (!handler)	/* Unsupported xattr name */
 			return 0;
 		if (b->buf) {
-			size = handler->list(b->inode, b->buf + b->pos,
-					 b->size, name, namelen);
+			size = handler->list(b->dentry, b->buf + b->pos,
+					 b->size, name, namelen,
+					 handler->flags);
 			if (size > b->size)
 				return -ERANGE;
 		} else {
-			size = handler->list(b->inode, NULL, 0, name, namelen);
+			size = handler->list(b->dentry, NULL, 0, name,
+					     namelen, handler->flags);
 		}
 
 		b->pos += size;
@@ -820,7 +820,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 	int err = 0;
 	loff_t pos = 0;
 	struct listxattr_buf buf = {
-		.inode = dentry->d_inode,
+		.dentry = dentry,
 		.buf = buffer,
 		.size = buffer ? size : 0,
 	};
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..cc32e6ada67b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
 			    struct posix_acl *acl);
 
 static int
-xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
+posix_acl_set(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int error, error2;
 	struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
 }
 
 static int
-xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return -EOPNOTSUPP;
 
-	acl = reiserfs_get_acl(inode, type);
+	acl = reiserfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -482,30 +485,12 @@ int reiserfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-static int
-posix_acl_access_get(struct inode *inode, const char *name,
-		     void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-posix_acl_access_set(struct inode *inode, const char *name,
-		     const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static size_t posix_acl_access_list(struct inode *inode, char *list,
+static size_t posix_acl_access_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +499,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get = posix_acl_access_get,
-	.set = posix_acl_access_set,
+	.flags = ACL_TYPE_ACCESS,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_access_list,
 };
 
-static int
-posix_acl_default_get(struct inode *inode, const char *name,
-		      void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-posix_acl_default_set(struct inode *inode, const char *name,
-		      const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static size_t posix_acl_default_list(struct inode *inode, char *list,
+static size_t posix_acl_default_list(struct dentry *dentry, char *list,
 				     size_t list_size, const char *name,
-				     size_t name_len)
+				     size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +519,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get = posix_acl_default_get,
-	.set = posix_acl_default_set,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_default_list,
 };
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-security_get(struct inode *inode, const char *name, void *buffer, size_t size)
+security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+		int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-security_set(struct inode *inode, const char *name, const void *buffer,
-	     size_t size, int flags)
+security_set(struct dentry *dentry, const char *name, const void *buffer,
+	     size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t security_list(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t namelen)
+static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t namelen, int handler_flags)
 {
 	const size_t len = namelen + 1;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
+trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	    int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-trusted_set(struct inode *inode, const char *name, const void *buffer,
-	    size_t size, int flags)
+trusted_set(struct dentry *dentry, const char *name, const void *buffer,
+	    size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
 #include <asm/uaccess.h>
 
 static int
-user_get(struct inode *inode, const char *name, void *buffer, size_t size)
+user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	 int handler_flags)
 {
 
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-user_set(struct inode *inode, const char *name, const void *buffer,
-	 size_t size, int flags)
+user_set(struct dentry *dentry, const char *name, const void *buffer,
+	 size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t user_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
+			const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return 0;
 	if (list && len <= list_size) {
 		memcpy(list, name, name_len);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
 generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
 ssize_t
 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-	struct inode *inode = dentry->d_inode;
-	struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
+	struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
 	unsigned int size = 0;
 
 	if (!buffer) {
-		for_each_xattr_handler(handlers, handler)
-			size += handler->list(inode, NULL, 0, NULL, 0);
+		for_each_xattr_handler(handlers, handler) {
+			size += handler->list(dentry, NULL, 0, NULL, 0,
+					      handler->flags);
+		}
 	} else {
 		char *buf = buffer;
 
 		for_each_xattr_handler(handlers, handler) {
-			size = handler->list(inode, buf, buffer_size, NULL, 0);
+			size = handler->list(dentry, buf, buffer_size,
+					     NULL, 0, handler->flags);
 			if (size > buffer_size)
 				return -ERANGE;
 			buf += size;
@@ -659,14 +660,13 @@ int
 generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
 	if (size == 0)
 		value = "";  /* empty EA, do not remove */
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, 0, handler->flags);
 }
 
 /*
@@ -677,12 +677,12 @@ int
 generic_removexattr(struct dentry *dentry, const char *name)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0,
+			    XATTR_REPLACE, handler->flags);
 }
 
 EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986f..2512125dfa7c 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -354,37 +354,14 @@ xfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-/*
- * System xattr handlers.
- *
- * Currently Posix ACLs are the only system namespace extended attribute
- * handlers supported by XFS, so we just implement the handlers here.
- * If we ever support other system extended attributes this will need
- * some refactoring.
- */
-
 static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *value, size_t size)
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
-	int type, error;
-
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
+	int error;
 
-	acl = xfs_get_acl(inode, type);
+	acl = xfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -397,15 +374,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
 }
 
 static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
-	int error = 0, type;
+	int error = 0;
 
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
 	if (flags & XATTR_CREATE)
 		return -EINVAL;
 	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +437,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
 	return error;
 }
 
-struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
+struct xattr_handler xfs_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
+};
+
+struct xattr_handler xfs_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
 };
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
 
 
 static int
-__xfs_xattr_get(struct inode *inode, const char *name,
+xfs_xattr_get(struct dentry *dentry, const char *name,
 		void *value, size_t size, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 	int error, asize = size;
 
 	if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
 }
 
 static int
-__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
 		size_t size, int flags, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
 	return -xfs_attr_set(ip, name, (void *)value, size, xflags);
 }
 
-static int
-xfs_xattr_user_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, 0);
-}
-
-static int
-xfs_xattr_user_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, 0);
-}
-
 static struct xattr_handler xfs_xattr_user_handler = {
 	.prefix	= XATTR_USER_PREFIX,
-	.get	= xfs_xattr_user_get,
-	.set	= xfs_xattr_user_set,
+	.flags	= 0, /* no flags implies user namespace */
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_trusted_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
-}
-
-static int
-xfs_xattr_trusted_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
-}
-
 static struct xattr_handler xfs_xattr_trusted_handler = {
 	.prefix	= XATTR_TRUSTED_PREFIX,
-	.get	= xfs_xattr_trusted_get,
-	.set	= xfs_xattr_trusted_set,
+	.flags	= ATTR_ROOT,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_secure_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
-}
-
-static int
-xfs_xattr_secure_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
-}
-
 static struct xattr_handler xfs_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
-	.get	= xfs_xattr_secure_get,
-	.set	= xfs_xattr_secure_set,
+	.flags	= ATTR_SECURE,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
 struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
 #ifdef CONFIG_XFS_POSIX_ACL
-	&xfs_xattr_system_handler,
+	&xfs_xattr_acl_access_handler,
+	&xfs_xattr_acl_default_handler,
 #endif
 	NULL
 };
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
-extern struct xattr_handler xfs_xattr_system_handler;
+extern struct xattr_handler xfs_xattr_acl_access_handler;
+extern struct xattr_handler xfs_xattr_acl_default_handler;
 #else
 # define xfs_check_acl					NULL
 # define xfs_get_acl(inode, type)			NULL
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 5c84af8c5f6f..fb9b7e6e1e2d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -38,12 +38,13 @@ struct dentry;
 
 struct xattr_handler {
 	char *prefix;
-	size_t (*list)(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len);
-	int (*get)(struct inode *inode, const char *name, void *buffer,
-		   size_t size);
-	int (*set)(struct inode *inode, const char *name, const void *buffer,
-		   size_t size, int flags);
+	int flags;	/* fs private flags passed back to the handlers */
+	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
+		       const char *name, size_t name_len, int handler_flags);
+	int (*get)(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int handler_flags);
+	int (*set)(struct dentry *dentry, const char *name, const void *buffer,
+		   size_t size, int flags, int handler_flags);
 };
 
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
diff --git a/mm/shmem.c b/mm/shmem.c
index adf8033afd52..3cd32c2ea0a0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2042,27 +2042,28 @@ static const struct inode_operations shmem_symlink_inode_operations = {
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_len, const char *name,
-					size_t name_len)
+					size_t name_len, int handler_flags)
 {
-	return security_inode_listsecurity(inode, list, list_len);
+	return security_inode_listsecurity(dentry->d_inode, list, list_len);
 }
 
-static int shmem_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return xattr_getsecurity(inode, name, buffer, size);
+	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
 }
 
-static int shmem_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return security_inode_setsecurity(inode, name, value, size, flags);
+	return security_inode_setsecurity(dentry->d_inode, name, value,
+					  size, flags);
 }
 
 static struct xattr_handler shmem_xattr_security_handler = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index df2c87fdae50..f8d5330ec0d7 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -63,86 +63,48 @@ struct generic_acl_operations shmem_acl_ops = {
 	.setacl = shmem_set_acl,
 };
 
-/**
- * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
- * shmem_xattr_acl_access_handler  -  plumbing code to implement the
- * system.posix_acl_access xattr using the generic acl functions.
- */
-
 static size_t
-shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
-		      const char *name, size_t name_len)
+shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
-				list, list_size);
+	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
+				type, list, list_size);
 }
 
 static int
-shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
-		     size_t size)
+shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
-			       size);
+	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
+			       buffer, size);
 }
 
 static int
-shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
-		     size_t size, int flags)
+shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
-			       size);
+	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
+			       value, size);
 }
 
 struct xattr_handler shmem_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.list	= shmem_list_acl_access,
-	.get	= shmem_get_acl_access,
-	.set	= shmem_set_acl_access,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
-/**
- * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
- * shmem_xattr_acl_default_handler  -  plumbing code to implement the
- * system.posix_acl_default xattr using the generic acl functions.
- */
-
-static size_t
-shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len)
-{
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
-				list, list_size);
-}
-
-static int
-shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
-		      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
-			       size);
-}
-
-static int
-shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
-		      size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
-			       size);
-}
-
 struct xattr_handler shmem_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.list	= shmem_list_acl_default,
-	.get	= shmem_get_acl_default,
-	.set	= shmem_set_acl_default,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
 /**
-- 
cgit v1.2.3


From 1c7c474c31aea6d5cb2fb35f31d9e9e91ae466b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:44 +0100
Subject: make generic_acl slightly more generic

Now that we cache the ACL pointers in the generic inode all the generic_acl
cruft can go away and generic_acl.c can directly implement xattr handlers
dealing with the full Posix ACL semantics for in-memory filesystems.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/generic_acl.c            | 158 +++++++++++++++++++++++++-------------------
 include/linux/generic_acl.h |  41 +++---------
 include/linux/shmem_fs.h    |  16 -----
 mm/Makefile                 |   1 -
 mm/shmem.c                  |  17 +++--
 mm/shmem_acl.c              | 133 -------------------------------------
 6 files changed, 109 insertions(+), 257 deletions(-)
 delete mode 100644 mm/shmem_acl.c

(limited to 'include')

diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
 /*
- * fs/generic_acl.c
- *
  * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
  *
  * This file is released under the GPL.
+ *
+ * Generic ACL support for in-memory filesystems.
  */
 
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/generic_acl.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
 
-/**
- * generic_acl_list  -  Generic xattr_handler->list() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-size_t
-generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
-		 int type, char *list, size_t list_size)
+
+static size_t
+generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	struct posix_acl *acl;
-	const char *name;
+	const char *xname;
 	size_t size;
 
-	acl = ops->getacl(inode, type);
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return 0;
 	posix_acl_release(acl);
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			name = POSIX_ACL_XATTR_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			name = POSIX_ACL_XATTR_DEFAULT;
-			break;
-
-		default:
-			return 0;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		xname = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		xname = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		return 0;
 	}
-	size = strlen(name) + 1;
+	size = strlen(xname) + 1;
 	if (list && size <= list_size)
-		memcpy(list, name, size);
+		memcpy(list, xname, size);
 	return size;
 }
 
-/**
- * generic_acl_get  -  Generic xattr_handler->get() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
-		int type, void *buffer, size_t size)
+static int
+generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	acl = ops->getacl(inode, type);
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return -ENODATA;
 	error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
 	return error;
 }
 
-/**
- * generic_acl_set  -  Generic xattr_handler->set() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
-		int type, const void *value, size_t size)
+static int
+generic_acl_set(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
 	int error;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
 		error = posix_acl_valid(acl);
 		if (error)
 			goto failed;
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
-				if (error < 0)
-					goto failed;
-				inode->i_mode = mode;
-				if (error == 0) {
-					posix_acl_release(acl);
-					acl = NULL;
-				}
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				if (!S_ISDIR(inode->i_mode)) {
-					error = -EINVAL;
-					goto failed;
-				}
-				break;
+		switch (type) {
+		case ACL_TYPE_ACCESS:
+			mode = inode->i_mode;
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0)
+				goto failed;
+			inode->i_mode = mode;
+			if (error == 0) {
+				posix_acl_release(acl);
+				acl = NULL;
+			}
+			break;
+		case ACL_TYPE_DEFAULT:
+			if (!S_ISDIR(inode->i_mode)) {
+				error = -EINVAL;
+				goto failed;
+			}
+			break;
 		}
 	}
-	ops->setacl(inode, type, acl);
+	set_cached_acl(inode, type, acl);
 	error = 0;
 failed:
 	posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
 
 /**
  * generic_acl_init  -  Take care of acl inheritance at @inode create time
- * @ops:	Filesystem specific getacl and setacl callbacks
  *
  * Files created inside a directory with a default ACL inherit the
  * directory's default ACL.
  */
 int
-generic_acl_init(struct inode *inode, struct inode *dir,
-		 struct generic_acl_operations *ops)
+generic_acl_init(struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 
 	inode->i_mode = mode & ~current_umask();
 	if (!S_ISLNK(inode->i_mode))
-		acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+		acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
 	if (acl) {
 		struct posix_acl *clone;
 
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 			error = -ENOMEM;
 			if (!clone)
 				goto cleanup;
-			ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
 			posix_acl_release(clone);
 		}
 		clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 		if (error >= 0) {
 			inode->i_mode = mode;
 			if (error > 0)
-				ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+				set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		}
 		posix_acl_release(clone);
 	}
@@ -169,20 +161,19 @@ cleanup:
 
 /**
  * generic_acl_chmod  -  change the access acl of @inode upon chmod()
- * @ops:	FIlesystem specific getacl and setacl callbacks
  *
  * A chmod also changes the permissions of the owner, group/mask, and
  * other ACL entries.
  */
 int
-generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+generic_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
 	int error = 0;
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
-	acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
 	if (acl) {
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
 			return -ENOMEM;
 		error = posix_acl_chmod_masq(clone, inode->i_mode);
 		if (!error)
-			ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		posix_acl_release(clone);
 	}
 	return error;
 }
+
+int
+generic_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+	return -EAGAIN;
+}
+
+struct xattr_handler generic_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
+
+struct xattr_handler generic_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 886f5faa08cb..ca666d18ed67 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -1,36 +1,15 @@
-/*
- * include/linux/generic_acl.h
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
+#ifndef LINUX_GENERIC_ACL_H
+#define LINUX_GENERIC_ACL_H
 
-#ifndef GENERIC_ACL_H
-#define GENERIC_ACL_H
+#include <linux/xattr.h>
 
-#include <linux/posix_acl.h>
-#include <linux/posix_acl_xattr.h>
+struct inode;
 
-/**
- * struct generic_acl_operations  -  filesystem operations
- *
- * Filesystems must make these operations available to the generic
- * operations.
- */
-struct generic_acl_operations {
-	struct posix_acl *(*getacl)(struct inode *, int);
-	void (*setacl)(struct inode *, int, struct posix_acl *);
-};
+extern struct xattr_handler generic_acl_access_handler;
+extern struct xattr_handler generic_acl_default_handler;
 
-size_t generic_acl_list(struct inode *, struct generic_acl_operations *, int,
-			char *, size_t);
-int generic_acl_get(struct inode *, struct generic_acl_operations *, int,
-		    void *, size_t);
-int generic_acl_set(struct inode *, struct generic_acl_operations *, int,
-		    const void *, size_t);
-int generic_acl_init(struct inode *, struct inode *,
-		     struct generic_acl_operations *);
-int generic_acl_chmod(struct inode *, struct generic_acl_operations *);
+int generic_acl_init(struct inode *, struct inode *);
+int generic_acl_chmod(struct inode *);
+int generic_check_acl(struct inode *inode, int mask);
 
-#endif
+#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index deee7afd8d66..e164291fb3e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -41,20 +41,4 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 
-#ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_check_acl(struct inode *, int);
-int shmem_acl_init(struct inode *, struct inode *);
-
-extern struct xattr_handler shmem_xattr_acl_access_handler;
-extern struct xattr_handler shmem_xattr_acl_default_handler;
-
-extern struct generic_acl_operations shmem_acl_ops;
-
-#else
-static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return 0;
-}
-#endif  /* CONFIG_TMPFS_POSIX_ACL */
-
 #endif
diff --git a/mm/Makefile b/mm/Makefile
index 82131d0f8d85..7a68d2ab5560 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
-obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_KSM) += ksm.o
diff --git a/mm/shmem.c b/mm/shmem.c
index 3cd32c2ea0a0..f8485062f3ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -41,6 +41,7 @@ static struct vfsmount *shm_mnt;
 
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
+#include <linux/posix_acl.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
 #include <linux/string.h>
@@ -809,7 +810,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 		error = inode_setattr(inode, attr);
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	if (!error && (attr->ia_valid & ATTR_MODE))
-		error = generic_acl_chmod(inode, &shmem_acl_ops);
+		error = generic_acl_chmod(inode);
 #endif
 	if (page)
 		page_cache_release(page);
@@ -1823,11 +1824,13 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 				return error;
 			}
 		}
-		error = shmem_acl_init(inode, dir);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+		error = generic_acl_init(inode, dir);
 		if (error) {
 			iput(inode);
 			return error;
 		}
+#endif
 		if (dir->i_mode & S_ISGID) {
 			inode->i_gid = dir->i_gid;
 			if (S_ISDIR(mode))
@@ -2074,8 +2077,8 @@ static struct xattr_handler shmem_xattr_security_handler = {
 };
 
 static struct xattr_handler *shmem_xattr_handlers[] = {
-	&shmem_xattr_acl_access_handler,
-	&shmem_xattr_acl_default_handler,
+	&generic_acl_access_handler,
+	&generic_acl_default_handler,
 	&shmem_xattr_security_handler,
 	NULL
 };
@@ -2454,7 +2457,7 @@ static const struct inode_operations shmem_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 
 };
@@ -2477,7 +2480,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
@@ -2488,7 +2491,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
deleted file mode 100644
index f8d5330ec0d7..000000000000
--- a/mm/shmem_acl.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * mm/shmem_acl.c
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
-
-#include <linux/fs.h>
-#include <linux/shmem_fs.h>
-#include <linux/xattr.h>
-#include <linux/generic_acl.h>
-
-/**
- * shmem_get_acl  -   generic_acl_operations->getacl() operation
- */
-static struct posix_acl *
-shmem_get_acl(struct inode *inode, int type)
-{
-	struct posix_acl *acl = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = posix_acl_dup(inode->i_acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = posix_acl_dup(inode->i_default_acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-/**
- * shmem_set_acl  -   generic_acl_operations->setacl() operation
- */
-static void
-shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-	struct posix_acl *free = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			free = inode->i_acl;
-			inode->i_acl = posix_acl_dup(acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			free = inode->i_default_acl;
-			inode->i_default_acl = posix_acl_dup(acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-	posix_acl_release(free);
-}
-
-struct generic_acl_operations shmem_acl_ops = {
-	.getacl = shmem_get_acl,
-	.setacl = shmem_set_acl,
-};
-
-static size_t
-shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
-		const char *name, size_t name_len, int type)
-{
-	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
-				type, list, list_size);
-}
-
-static int
-shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
-		     size_t size, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
-			       buffer, size);
-}
-
-static int
-shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
-		     size_t size, int flags, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
-			       value, size);
-}
-
-struct xattr_handler shmem_xattr_acl_access_handler = {
-	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.flags	= ACL_TYPE_ACCESS,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-struct xattr_handler shmem_xattr_acl_default_handler = {
-	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.flags	= ACL_TYPE_DEFAULT,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-/**
- * shmem_acl_init  -  Inizialize the acl(s) of a new inode
- */
-int
-shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return generic_acl_init(inode, dir, &shmem_acl_ops);
-}
-
-/**
- * shmem_check_acl  -  check_acl() callback for generic_permission()
- */
-int
-shmem_check_acl(struct inode *inode, int mask)
-{
-	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
-
-	if (acl) {
-		int error = posix_acl_permission(inode, acl, mask);
-		posix_acl_release(acl);
-		return error;
-	}
-	return -EAGAIN;
-}
-- 
cgit v1.2.3


From 1e431f5ce78f3ae8254d725060288b78ff74f086 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:53 +0100
Subject: cleanup blockdev_direct_IO locking

Currently the locking in blockdev_direct_IO is a mess, we have three different
locking types and very confusing checks for some of them.  The most
complicated one is DIO_OWN_LOCKING for reads, which happens to not actually be
used.

This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read case
is unused anyway, and the write side is almost identical to DIO_NO_LOCKING.
The difference is that DIO_NO_LOCKING always sets the create argument for
the get_blocks callback to zero, but we can easily move that to the actual
get_blocks callbacks.  There are four users of the DIO_NO_LOCKING mode:
gfs already ignores the create argument and thus is fine with the new
version, ocfs2 only errors out if create were ever set, and we can remove
this dead code now, the block device code only ever uses create for an
error message if we are fully beyond the device which can never happen,
and last but not least XFS will need the new behavour for writes.

Now we can replace the lock_type variable with a flags one, where no flag
means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first
flag.  Separate out the check for not allowing to fill holes into a separate
flag, although for now both flags always get set at the same time.

Also revamp the documentation of the locking scheme to actually make sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c              | 129 ++++++++++++++++++--------------------------
 fs/ocfs2/aops.c             |  34 ++----------
 fs/xfs/linux-2.6/xfs_aops.c |  20 +++----
 include/linux/fs.h          |  22 +++-----
 4 files changed, 71 insertions(+), 134 deletions(-)

(limited to 'include')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..7dde0df8e8b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
  *
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
  */
 
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
 	struct inode *inode;
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
-	int lock_type;			/* doesn't change */
+	int flags;			/* doesn't change */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -240,7 +233,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, transferred,
 			    dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
+
+	if (dio->flags & DIO_LOCKING)
 		/* lockdep: non-owner release */
 		up_read_non_owner(&dio->inode->i_alloc_sem);
 
@@ -515,21 +509,24 @@ static int get_more_blocks(struct dio *dio)
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
+		/*
+		 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+		 * forbid block creations: only overwrites are permitted.
+		 * We will return early to the caller once we see an
+		 * unmapped buffer head returned, and the caller will fall
+		 * back to buffered I/O.
+		 *
+		 * Otherwise the decision is left to the get_blocks method,
+		 * which may decide to handle it or also return an unmapped
+		 * buffer head.
+		 */
 		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_LOCKING) {
+		if (dio->flags & DIO_SKIP_HOLES) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
 				create = 0;
-		} else if (dio->lock_type == DIO_NO_LOCKING) {
-			create = 0;
 		}
 
-		/*
-		 * For writes inside i_size we forbid block creations: only
-		 * overwrites are permitted.  We fall back to buffered writes
-		 * at a higher level for inside-i_size block-instantiating
-		 * writes.
-		 */
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
@@ -1039,7 +1036,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
-	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
+	if (rw == READ && (dio->flags & DIO_LOCKING))
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
@@ -1086,30 +1083,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 /*
  * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
  *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
  *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- * For reads, i_mutex is not held on entry, but it is taken and dropped before
- * returning.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
  */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int dio_lock_type)
+	int flags)
 {
 	int seg;
 	size_t size;
@@ -1120,8 +1115,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	int release_i_mutex = 0;
-	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT_PLUG;
@@ -1156,43 +1149,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (!dio)
 		goto out;
 
-	/*
-	 * For block device access DIO_NO_LOCKING is used,
-	 *	neither readers nor writers do any locking at all
-	 * For regular files using DIO_LOCKING,
-	 *	readers need to grab i_mutex and i_alloc_sem
-	 *	writers need to grab i_alloc_sem only (i_mutex is already held)
-	 * For regular files using DIO_OWN_LOCKING,
-	 *	neither readers nor writers take any locks here
-	 */
-	dio->lock_type = dio_lock_type;
-	if (dio_lock_type != DIO_NO_LOCKING) {
+	dio->flags = flags;
+	if (dio->flags & DIO_LOCKING) {
 		/* watch out for a 0 len io from a tricksy fs */
 		if (rw == READ && end > offset) {
-			struct address_space *mapping;
+			struct address_space *mapping =
+					iocb->ki_filp->f_mapping;
 
-			mapping = iocb->ki_filp->f_mapping;
-			if (dio_lock_type != DIO_OWN_LOCKING) {
-				mutex_lock(&inode->i_mutex);
-				release_i_mutex = 1;
-			}
+			/* will be released by direct_io_worker */
+			mutex_lock(&inode->i_mutex);
 
 			retval = filemap_write_and_wait_range(mapping, offset,
 							      end - 1);
 			if (retval) {
+				mutex_unlock(&inode->i_mutex);
 				kfree(dio);
 				goto out;
 			}
-
-			if (dio_lock_type == DIO_OWN_LOCKING) {
-				mutex_unlock(&inode->i_mutex);
-				acquire_i_mutex = 1;
-			}
 		}
 
-		if (dio_lock_type == DIO_LOCKING)
-			/* lockdep: not the owner will release it */
-			down_read_non_owner(&inode->i_alloc_sem);
+		/*
+		 * Will be released at I/O completion, possibly in a
+		 * different thread.
+		 */
+		down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
@@ -1210,24 +1190,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 *
+	 * NOTE: filesystems with their own locking have to handle this
+	 * on their own.
 	 */
-	if (unlikely(retval < 0 && (rw & WRITE))) {
-		loff_t isize = i_size_read(inode);
-
-		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+	if (dio->flags & DIO_LOCKING) {
+		if (unlikely((rw & WRITE) && retval < 0)) {
+			loff_t isize = i_size_read(inode);
+			if (end > isize )
+				vmtruncate(inode, isize);
+		}
 	}
 
-	if (rw == READ && dio_lock_type == DIO_LOCKING)
-		release_i_mutex = 0;
-
 out:
-	if (release_i_mutex)
-		mutex_unlock(&inode->i_mutex);
-	else if (acquire_i_mutex)
-		mutex_lock(&inode->i_mutex);
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
  *
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  * 					fs_count, map_bh, dio->rw == WRITE);
+ *
+ * Note that we never bother to allocate blocks here, and thus ignore the
+ * create argument.
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 				     struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
 	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 
-	/*
-	 * Any write past EOF is not allowed because we'd be extending.
-	 */
-	if (create && (iblock + max_blocks) > inode_blocks) {
-		ret = -EIO;
-		goto bail;
-	}
-
 	/* This figures out the size of the next contiguous block, and
 	 * our logical offset */
 	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has a hole at block %llu\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			    (unsigned long long)iblock);
-		ret = -EROFS;
-		goto bail;
-	}
-
 	/* We should already CoW the refcounted extent. */
 	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	 */
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
 		map_bh(bh_result, inode->i_sb, p_blkno);
-	else {
-		/*
-		 * ocfs2_prepare_inode_for_write() should have caught
-		 * the case where we'd be filling a hole and triggered
-		 * a buffered write instead.
-		 */
-		if (create) {
-			ret = -EIO;
-			mlog_errno(ret);
-			goto bail;
-		}
-
+	else
 		clear_buffer_mapped(bh_result);
-	}
 
 	/* make sure we don't map more than max_blocks blocks here as
 	   that's all the kernel will handle at this point. */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
 
 	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
 
-	if (rw == WRITE) {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	} else {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	}
+	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
+					IOMAP_UNWRITTEN : IOMAP_READ);
+
+	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct);
 
 	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc23be4edde..7c8ff12d1995 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2263,9 +2263,11 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int lock_type);
 
 enum {
-	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-	DIO_NO_LOCKING,  /* bdev; no locking at all between buffered/direct */
-	DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
+	/* need locking between buffered and direct access */
+	DIO_LOCKING	= 0x01,
+
+	/* filesystem does not support filling holes */
+	DIO_SKIP_HOLES	= 0x02,
 };
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
@@ -2274,7 +2276,8 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_LOCKING);
+				    nr_segs, get_block, end_io,
+				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -2283,16 +2286,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
-	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_block_t get_block,
-	dio_iodone_t end_io)
-{
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From efbbd05a595343a413964ad85a2ad359b7b7efbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:40 +0100
Subject: sched: Add pre and post wakeup hooks

As will be apparent in the next patch, we need a pre wakeup hook
for sched_fair task migration, hence rename the post wakeup hook
and one pre wakeup.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.114746117@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 12 ++++++++----
 kernel/sched_rt.c     |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f38e81a..2c9fa1ccebff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1091,7 +1091,8 @@ struct sched_class {
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const struct cpumask *newmask);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297dc441ff96..6c571bdd5658 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2412,6 +2412,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 	p->state = TASK_WAKING;
+
+	if (p->sched_class->task_waking)
+		p->sched_class->task_waking(rq, p);
+
 	__task_rq_unlock(rq);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2475,8 +2479,8 @@ out_running:
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 
 	if (unlikely(rq->idle_stamp)) {
 		u64 delta = rq->clock - rq->idle_stamp;
@@ -2666,8 +2670,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 #endif
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea2828164e..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
  * If we are not running and we are not going to reschedule soon, we should
  * try to push tasks away now
  */
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
 	.post_schedule		= post_schedule_rt,
-	.task_wake_up		= task_wake_up_rt,
+	.task_woken		= task_woken_rt,
 	.switched_from		= switched_from_rt,
 #endif
 
-- 
cgit v1.2.3


From 88ec22d3edb72b261f8628226cd543589a6d5e1b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:41 +0100
Subject: sched: Remove the cfs_rq dependency from set_task_cpu()

In order to remove the cfs_rq dependency from set_task_cpu() we
need to ensure the task is cfs_rq invariant for all callsites.

The simple approach is to substract cfs_rq->min_vruntime from
se->vruntime on dequeue, and add cfs_rq->min_vruntime on
enqueue.

However, this has the downside of breaking FAIR_SLEEPERS since
we loose the old vruntime as we only maintain the relative
position.

To solve this, we observe that we only migrate runnable tasks,
we do this using deactivate_task(.sleep=0) and
activate_task(.wakeup=0), therefore we can restrain the
min_vruntime invariance to that state.

The only other case is wakeup balancing, since we want to
maintain the old vruntime we cannot make it relative on dequeue,
but since we don't migrate inactive tasks, we can do so right
before we activate it again.

This is where we need the new pre-wakeup hook, we need to call
this while still holding the old rq->lock. We could fold it into
->select_task_rq(), but since that has multiple callsites and
would obfuscate the locking requirements, that seems like a
fudge.

This leaves the fork() case, simply make sure that ->task_fork()
leaves the ->vruntime in a relative state.

This covers all cases where set_task_cpu() gets called, and
ensures it sees a relative vruntime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.191697025@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  6 +-----
 kernel/sched_fair.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9fa1ccebff..973b2b89f86d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,7 +1116,7 @@ struct sched_class {
 					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*moved_group) (struct task_struct *p);
+	void (*moved_group) (struct task_struct *p, int on_rq);
 #endif
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c571bdd5658..f92ce63edfff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
-	p->se.vruntime -= old_cfsrq->min_vruntime -
-					 new_cfsrq->min_vruntime;
 
 	__set_task_cpu(p, new_cpu);
 }
@@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->moved_group)
-		tsk->sched_class->moved_group(tsk);
+		tsk->sched_class->moved_group(tsk, on_rq);
 #endif
 
 	if (unlikely(running))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ec1d2715620c..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq, exec_clock, delta_exec);
 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+
 	curr->vruntime += delta_exec_weighted;
 	update_min_vruntime(cfs_rq);
 }
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	se->vruntime = vruntime;
 }
 
+#define ENQUEUE_WAKEUP	1
+#define ENQUEUE_MIGRATE 2
+
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	/*
+	 * Update the normalized vruntime before updating min_vruntime
+	 * through callig update_curr().
+	 */
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+		se->vruntime += cfs_rq->min_vruntime;
+
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
 	account_entity_enqueue(cfs_rq, se);
 
-	if (wakeup) {
+	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
 		enqueue_sleeper(cfs_rq, se);
 	}
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 		__dequeue_entity(cfs_rq, se);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
+
+	/*
+	 * Normalize the entity after updating the min_vruntime because the
+	 * update can refer to the ->curr item and we need to reflect this
+	 * movement in our normalized position.
+	 */
+	if (!sleep)
+		se->vruntime -= cfs_rq->min_vruntime;
 }
 
 /*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int flags = 0;
+
+	if (wakeup)
+		flags |= ENQUEUE_WAKEUP;
+	if (p->state == TASK_WAKING)
+		flags |= ENQUEUE_MIGRATE;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup);
-		wakeup = 1;
+		enqueue_entity(cfs_rq, se, flags);
+		flags = ENQUEUE_WAKEUP;
 	}
 
 	hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	se->vruntime -= cfs_rq->min_vruntime;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * effective_load() calculates the load change as seen from the root_task_group
@@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
 		resched_task(rq->curr);
 	}
 
+	se->vruntime -= cfs_rq->min_vruntime;
+
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
 	update_curr(cfs_rq);
-	place_entity(cfs_rq, &p->se, 1);
+	if (!on_rq)
+		place_entity(cfs_rq, &p->se, 1);
 }
 #endif
 
@@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
 	.move_one_task		= move_one_task_fair,
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
+
+	.task_waking		= task_waking_fair,
 #endif
 
 	.set_curr_task          = set_curr_task_fair,
-- 
cgit v1.2.3


From 70023de88c58a81a730ab4d13c51a30e537ec76e Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:28 +0800
Subject: ACPI: Add a generic API for _OSC -v2

v2->v1:
.improve debug info as suggedted by Bjorn,Kenji
.API is using uuid string as suggested by Alexey

Add an API to execute _OSC. A lot of devices can have this method, so add a
generic API.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |   9 ++++
 2 files changed, 131 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 741191524353..12240be58f27 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -344,6 +344,128 @@ bool acpi_bus_can_wakeup(acpi_handle handle)
 
 EXPORT_SYMBOL(acpi_bus_can_wakeup);
 
+static void acpi_print_osc_error(acpi_handle handle,
+	struct acpi_osc_context *context, char *error)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
+	int i;
+
+	if (ACPI_FAILURE(acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer)))
+		printk(KERN_DEBUG "%s\n", error);
+	else {
+		printk(KERN_DEBUG "%s:%s\n", (char *)buffer.pointer, error);
+		kfree(buffer.pointer);
+	}
+	printk(KERN_DEBUG"_OSC request data:");
+	for (i = 0; i < context->cap.length; i += sizeof(u32))
+		printk("%x ", *((u32 *)(context->cap.pointer + i)));
+	printk("\n");
+}
+
+static u8 hex_val(unsigned char c)
+{
+	return isdigit(c) ? c - '0' : toupper(c) - 'A' + 10;
+}
+
+static acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
+{
+	int i;
+	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
+		24, 26, 28, 30, 32, 34};
+
+	if (strlen(str) != 36)
+		return AE_BAD_PARAMETER;
+	for (i = 0; i < 36; i++) {
+		if (i == 8 || i == 13 || i == 18 || i == 23) {
+			if (str[i] != '-')
+				return AE_BAD_PARAMETER;
+		} else if (!isxdigit(str[i]))
+			return AE_BAD_PARAMETER;
+	}
+	for (i = 0; i < 16; i++) {
+		uuid[i] = hex_val(str[opc_map_to_uuid[i]]) << 4;
+		uuid[i] |= hex_val(str[opc_map_to_uuid[i] + 1]);
+	}
+	return AE_OK;
+}
+
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object in_params[4];
+	union acpi_object *out_obj;
+	u8 uuid[16];
+	u32 errors;
+
+	if (!context)
+		return AE_ERROR;
+	if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+		return AE_ERROR;
+	context->ret.length = ACPI_ALLOCATE_BUFFER;
+	context->ret.pointer = NULL;
+
+	/* Setting up input parameters */
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type 		= ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length 	= 16;
+	in_params[0].buffer.pointer	= uuid;
+	in_params[1].type 		= ACPI_TYPE_INTEGER;
+	in_params[1].integer.value 	= context->rev;
+	in_params[2].type 		= ACPI_TYPE_INTEGER;
+	in_params[2].integer.value	= context->cap.length/sizeof(u32);
+	in_params[3].type		= ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length 	= context->cap.length;
+	in_params[3].buffer.pointer 	= context->cap.pointer;
+
+	status = acpi_evaluate_object(handle, "_OSC", &input, &context->ret);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	/* return buffer should have the same length as cap buffer */
+	if (context->ret.length != context->cap.length)
+		return AE_NULL_OBJECT;
+
+	out_obj = context->ret.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		acpi_print_osc_error(handle, context,
+			"_OSC evaluation returned wrong type");
+		status = AE_TYPE;
+		goto out_kfree;
+	}
+	/* Need to ignore the bit0 in result code */
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		if (errors & OSC_REQUEST_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC request failed");
+		if (errors & OSC_INVALID_UUID_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid UUID");
+		if (errors & OSC_INVALID_REVISION_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid revision");
+		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+			if (((u32 *)context->cap.pointer)[OSC_QUERY_TYPE]
+			    & OSC_QUERY_ENABLE)
+				goto out_success;
+			status = AE_SUPPORT;
+			goto out_kfree;
+		}
+		status = AE_ERROR;
+		goto out_kfree;
+	}
+out_success:
+	return AE_OK;
+
+out_kfree:
+	kfree(context->ret.pointer);
+	context->ret.pointer = NULL;
+	return status;
+}
+EXPORT_SYMBOL(acpi_run_osc);
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..3247e09db20d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -253,6 +253,13 @@ void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
 
+struct acpi_osc_context {
+	char *uuid_str; /* uuid string */
+	int rev;
+	struct acpi_buffer cap; /* arg2/arg3 */
+	struct acpi_buffer ret; /* free by caller if success */
+};
+
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
@@ -265,6 +272,8 @@ void __init acpi_s4_no_nvs(void);
 #define OSC_INVALID_REVISION_ERROR	8
 #define OSC_CAPABILITIES_MASK_ERROR	16
 
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
+
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-- 
cgit v1.2.3


From 3a9622dc4659af44a8098a233f65c51e495ff0a5 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:50 +0800
Subject: ACPI: cleanup pci_root _OSC code.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_root.c | 76 +++++++++----------------------------------------
 include/linux/acpi.h    |  5 ++--
 2 files changed, 17 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 1af808171d46..101cce3681d1 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -202,72 +202,24 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
 		}
 }
 
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
-			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
 
 static acpi_status acpi_pci_run_osc(acpi_handle handle,
 				    const u32 *capbuf, u32 *retval)
 {
+	struct acpi_osc_context context = {
+		.uuid_str = pci_osc_uuid_str,
+		.rev = 1,
+		.cap.length = 12,
+		.cap.pointer = (void *)capbuf,
+	};
 	acpi_status status;
-	struct acpi_object_list input;
-	union acpi_object in_params[4];
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *out_obj;
-	u32 errors;
-
-	/* Setting up input parameters */
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type 		= ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= OSC_UUID;
-	in_params[1].type 		= ACPI_TYPE_INTEGER;
-	in_params[1].integer.value 	= 1;
-	in_params[2].type 		= ACPI_TYPE_INTEGER;
-	in_params[2].integer.value	= 3;
-	in_params[3].type		= ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length 	= 12;
-	in_params[3].buffer.pointer 	= (u8 *)capbuf;
-
-	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return status;
 
-	if (!output.length)
-		return AE_NULL_OBJECT;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
-		status = AE_TYPE;
-		goto out_kfree;
-	}
-	/* Need to ignore the bit0 in result code */
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		if (errors & OSC_REQUEST_ERROR)
-			printk(KERN_DEBUG "_OSC request failed\n");
-		if (errors & OSC_INVALID_UUID_ERROR)
-			printk(KERN_DEBUG "_OSC invalid UUID\n");
-		if (errors & OSC_INVALID_REVISION_ERROR)
-			printk(KERN_DEBUG "_OSC invalid revision\n");
-		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
-				goto out_success;
-			printk(KERN_DEBUG
-			       "Firmware did not grant requested _OSC control\n");
-			status = AE_SUPPORT;
-			goto out_kfree;
-		}
-		status = AE_ERROR;
-		goto out_kfree;
+	status = acpi_run_osc(handle, &context);
+	if (ACPI_SUCCESS(status)) {
+		*retval = *((u32 *)(context.ret.pointer + 8));
+		kfree(context.ret.pointer);
 	}
-out_success:
-	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
-	status = AE_OK;
-
-out_kfree:
-	kfree(output.pointer);
 	return status;
 }
 
@@ -277,10 +229,10 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
 	u32 support_set, result, capbuf[3];
 
 	/* do _OSC query for all possible controls */
-	support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+	support_set = root->osc_support_set | (flags & OSC_PCI_SUPPORT_MASKS);
 	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
 	capbuf[OSC_SUPPORT_TYPE] = support_set;
-	capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+	capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS;
 
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
@@ -427,7 +379,7 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 	if (ACPI_FAILURE(status))
 		return status;
 
-	control_req = (flags & OSC_CONTROL_MASKS);
+	control_req = (flags & OSC_PCI_CONTROL_MASKS);
 	if (!control_req)
 		return AE_TYPE;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3247e09db20d..535beecc37cf 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -263,7 +263,6 @@ struct acpi_osc_context {
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
-#define OSC_SUPPORT_MASKS		0x1f
 
 /* _OSC DW0 Definition */
 #define OSC_QUERY_ENABLE		1
@@ -274,12 +273,14 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
 #define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
 #define OSC_MSI_SUPPORT				16
+#define OSC_PCI_SUPPORT_MASKS			0x1f
 
 /* _OSC DW1 Definition (OS Control Fields) */
 #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
@@ -288,7 +289,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_PCI_EXPRESS_AER_CONTROL		8
 #define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
 
-#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
+#define OSC_PCI_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
 				OSC_SHPC_NATIVE_HP_CONTROL | 		\
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
-- 
cgit v1.2.3


From 3563ff964fdc36358cef0330936fdac28e65142a Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:05:05 +0800
Subject: ACPI: Add platform-wide _OSC support.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 26 ++++++++++++++++++++++++++
 include/linux/acpi.h |  7 +++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 12240be58f27..65f7e335f122 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -466,6 +466,30 @@ out_kfree:
 }
 EXPORT_SYMBOL(acpi_run_osc);
 
+static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
+static void acpi_bus_osc_support(void)
+{
+	u32 capbuf[2];
+	struct acpi_osc_context context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+	acpi_handle handle;
+
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
+#ifdef CONFIG_ACPI_PROCESSOR_AGGREGATOR
+	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PAD_SUPPORT;
+#endif
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
+		return;
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
+		kfree(context.ret.pointer);
+	/* do we need to check the returned cap? Sounds no */
+}
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
@@ -856,6 +880,8 @@ static int __init acpi_bus_init(void)
 	status = acpi_ec_ecdt_probe();
 	/* Ignore result. Not having an ECDT is not fatal. */
 
+	acpi_bus_osc_support();
+
 	status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 535beecc37cf..e11090d462d2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -273,6 +273,13 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* platform-wide _OSC bits */
+#define OSC_SB_PAD_SUPPORT		1
+#define OSC_SB_PPC_OST_SUPPORT		2
+#define OSC_SB_PR3_SUPPORT		4
+#define OSC_SB_CPUHP_OST_SUPPORT	8
+#define OSC_SB_APEI_SUPPORT		16
+
 /* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
-- 
cgit v1.2.3


From 2ee1abad73a12df5521cd3f017f081f1f684a361 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dgc@sgi.com>
Date: Tue, 24 Nov 2009 18:03:15 +0000
Subject: xfs: improve metadata I/O merging in the elevator

Change all async metadata buffers to use [READ|WRITE]_META I/O types
so that the I/O doesn't get issued immediately. This allows merging of
adjacent metadata requests but still prioritises them over bulk data.
This shows a 10-15% improvement in sequential create speed of small
files.

Don't include the log buffers in this classification - leave them as
sync types so they are issued immediately.

Signed-off-by: Dave Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 6 +++++-
 fs/xfs/linux-2.6/xfs_buf.h | 1 +
 fs/xfs/xfs_log.c           | 2 ++
 include/linux/fs.h         | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aac..162359b664ca 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
 	if (bp->b_flags & XBF_ORDERED) {
 		ASSERT(!(bp->b_flags & XBF_READ));
 		rw = WRITE_BARRIER;
-	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+	} else if (bp->b_flags & XBF_LOG_BUFFER) {
 		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
 		bp->b_flags &= ~_XBF_RUN_QUEUES;
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+		bp->b_flags &= ~_XBF_RUN_QUEUES;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
 	} else {
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE :
 		     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2a..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
 	XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
  	XBF_ORDERED = (1 << 11),    /* use ordered writes		   */
 	XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
+	XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log   */
 
 	/* flags used only as arguments to access routines */
 	XBF_LOCK = (1 << 14),       /* lock requested			   */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e3..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
+	bp->b_flags |= XBF_LOG_BUFFER;
 	/*
 	 * Do an ordered write for the log block.
 	 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t		*log,
 		XFS_BUF_ZEROFLAGS(bp);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
+		bp->b_flags |= XBF_LOG_BUFFER;
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
 			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b23a7018eb90..cf7fc8a7fe6a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -152,6 +152,7 @@ struct inodes_stat_t {
 #define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
 #define WRITE_ODIRECT_PLUG	(WRITE | (1 << BIO_RW_SYNCIO))
+#define WRITE_META	(WRITE | (1 << BIO_RW_META))
 #define SWRITE_SYNC_PLUG	\
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-- 
cgit v1.2.3


From 6e1415467614e854fee660ff6648bd10fa976e95 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 15 Dec 2009 19:27:45 +0000
Subject: NOMMU: Optimise away the {dac_,}mmap_min_addr tests

In NOMMU mode clamp dac_mmap_min_addr to zero to cause the tests on it to be
skipped by the compiler.  We do this as the minimum mmap address doesn't make
any sense in NOMMU mode.

mmap_min_addr and round_hint_to_min() can be discarded entirely in NOMMU mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 7 +++++++
 kernel/sysctl.c          | 2 ++
 mm/Kconfig               | 1 +
 security/Makefile        | 3 ++-
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..2c627d361c02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,8 +95,13 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+#ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
+#else
+#define dac_mmap_min_addr	0UL
+#endif
+
 /*
  * Values used in the task_security_ops calls
  */
@@ -121,6 +126,7 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+#ifdef CONFIG_MMU
 /*
  * If a hint addr is less than mmap_min_addr change hint to be as
  * low as possible but still greater than mmap_min_addr
@@ -135,6 +141,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 }
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 
 #ifdef CONFIG_SECURITY
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 45e4bef0012a..856a24eadf7e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
+#ifdef CONFIG_MMU
 	{
 		.procname	= "mmap_min_addr",
 		.data		= &dac_mmap_min_addr,
@@ -1221,6 +1222,7 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mmap_min_addr_handler,
 	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.procname	= "numa_zonelist_order",
diff --git a/mm/Kconfig b/mm/Kconfig
index 43ea8c3a2bbf..ee9f3e0f2b69 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -221,6 +221,7 @@ config KSM
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
+	depends on MMU
         default 4096
         help
 	  This is the portion of low virtual memory which should be protected
diff --git a/security/Makefile b/security/Makefile
index bb44e350c618..da20a193c8dd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,8 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o min_addr.o
+obj-y					+= commoncap.o
+obj-$(CONFIG_MMU)			+= min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
-- 
cgit v1.2.3


From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 23:25:50 +0100
Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion

The reiserfs lock -> inode mutex dependency gets inverted when we
relax the lock while walking to the tree.

To fix this, use a specialized version of reiserfs_mutex_lock_safe
that takes care of mutex subclasses. Then we can grab the inode
mutex with I_MUTEX_XATTR subclass without any reiserfs lock
dependency.

This fixes the following report:

[ INFO: possible circular locking dependency detected ]
2.6.32-06793-gf405425-dirty #2
-------------------------------------------------------
mv/18566 is trying to acquire lock:
 (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c1110708>] reiserfs_write_lock+0x28=
/0x40

but task is already holding lock:
 (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}:
       [<c104f723>] validate_chain+0xa23/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c11102b4>] reiserfs_for_each_xattr+0x84/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c109cc0b>] do_unlinkat+0xdb/0x160
       [<c109cca0>] sys_unlink+0x10/0x20
       [<c1002c50>] sysenter_do_call+0x12/0x36

-> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
       [<c104fc68>] validate_chain+0xf68/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c1110708>] reiserfs_write_lock+0x28/0x40
       [<c1103d6b>] search_by_key+0x1f7b/0x21b0
       [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
       [<c10e77f7>] reiserfs_find_entry+0x77/0x400
       [<c10e81e5>] reiserfs_lookup+0x85/0x130
       [<c109a144>] __lookup_hash+0xb4/0x110
       [<c109b763>] lookup_one_len+0xb3/0x100
       [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c10a1c4f>] dentry_iput+0x6f/0xf0
       [<c10a1d74>] d_kill+0x24/0x50
       [<c10a396b>] dput+0x5b/0x120
       [<c109ca89>] sys_renameat+0x1b9/0x230
       [<c109cb28>] sys_rename+0x28/0x30
       [<c1002c50>] sysenter_do_call+0x12/0x36

other info that might help us debug this:

2 locks held by mv/18566:
 #0:  (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [<c109b6ac>]
lock_rename+0xcc/0xd0
 #1:  (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

stack backtrace:
Pid: 18566, comm: mv Tainted: G         C 2.6.32-06793-gf405425-dirty #2
Call Trace:
 [<c134b252>] ? printk+0x18/0x1e
 [<c104e790>] print_circular_bug+0xc0/0xd0
 [<c104fc68>] validate_chain+0xf68/0xf70
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1050155>] __lock_acquire+0x4e5/0xa70
 [<c105075a>] lock_acquire+0x7a/0xa0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134b60a>] ? schedule+0x27a/0x440
 [<c1110708>] reiserfs_write_lock+0x28/0x40
 [<c1103d6b>] search_by_key+0x1f7b/0x21b0
 [<c1050176>] ? __lock_acquire+0x506/0xa70
 [<c1051267>] ? lock_release_non_nested+0x1e7/0x340
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c104e3ab>] ? trace_hardirqs_on+0xb/0x10
 [<c1042a55>] ? T.316+0x15/0x1a0
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
 [<c134bf2a>] ? __mutex_unlock_slowpath+0x9a/0x120
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c10e77f7>] reiserfs_find_entry+0x77/0x400
 [<c10e81e5>] reiserfs_lookup+0x85/0x130
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c109a144>] __lookup_hash+0xb4/0x110
 [<c109b763>] lookup_one_len+0xb3/0x100
 [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
 [<c110ffe0>] ? delete_one_xattr+0x0/0x1c0
 [<c1003342>] ? math_error+0x22/0x150
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
 [<c10a561f>] ? generic_delete_inode+0x5f/0x150
 [<c10ef4f0>] ? reiserfs_delete_inode+0x0/0x140
 [<c10a565c>] generic_delete_inode+0x9c/0x150
 [<c10a574d>] generic_drop_inode+0x3d/0x60
 [<c10a4667>] iput+0x47/0x50
 [<c10a1c4f>] dentry_iput+0x6f/0xf0
 [<c10a1d74>] d_kill+0x24/0x50
 [<c10a396b>] dput+0x5b/0x120
 [<c109ca89>] sys_renameat+0x1b9/0x230
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1042dde>] ? cpu_clock+0x4e/0x60
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c1041816>] ? up_read+0x16/0x30
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c109cb28>] sys_rename+0x28/0x30
 [<c1002c50>] sysenter_do_call+0x12/0x36

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 fs/reiserfs/xattr.c         | 3 ++-
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8891cd88a3f4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -243,7 +243,8 @@ static int reiserfs_for_each_xattr(struct inode *inode,
 		goto out_dir;
 	}
 
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+	reiserfs_mutex_lock_nested_safe(&dir->d_inode->i_mutex, I_MUTEX_XATTR,
+					inode->i_sb);
 	buf.xadir = dir;
 	err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
 	while ((err == 0 || err == -ENOSPC) && buf.count) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..4351b49e2b1e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From 329962503692b42d8088f31584e42d52db179d52 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 15 Dec 2009 17:59:02 -0800
Subject: x86: Fix checking of SRAT when node 0 ram is not from 0

Found one system that boot from socket1 instead of socket0, SRAT get rejected...

[    0.000000] SRAT: Node 1 PXM 0 0-a0000
[    0.000000] SRAT: Node 1 PXM 0 100000-80000000
[    0.000000] SRAT: Node 1 PXM 0 100000000-2080000000
[    0.000000] SRAT: Node 0 PXM 1 2080000000-4080000000
[    0.000000] SRAT: Node 2 PXM 2 4080000000-6080000000
[    0.000000] SRAT: Node 3 PXM 3 6080000000-8080000000
[    0.000000] SRAT: Node 4 PXM 4 8080000000-a080000000
[    0.000000] SRAT: Node 5 PXM 5 a080000000-c080000000
[    0.000000] SRAT: Node 6 PXM 6 c080000000-e080000000
[    0.000000] SRAT: Node 7 PXM 7 e080000000-10080000000
...
[    0.000000] NUMA: Allocated memnodemap from 500000 - 701040
[    0.000000] NUMA: Using 20 for the hash shift.
[    0.000000] Adding active range (0, 0x2080000, 0x4080000) 0 entries of 3200 used
[    0.000000] Adding active range (1, 0x0, 0x96) 1 entries of 3200 used
[    0.000000] Adding active range (1, 0x100, 0x7f750) 2 entries of 3200 used
[    0.000000] Adding active range (1, 0x100000, 0x2080000) 3 entries of 3200 used
[    0.000000] Adding active range (2, 0x4080000, 0x6080000) 4 entries of 3200 used
[    0.000000] Adding active range (3, 0x6080000, 0x8080000) 5 entries of 3200 used
[    0.000000] Adding active range (4, 0x8080000, 0xa080000) 6 entries of 3200 used
[    0.000000] Adding active range (5, 0xa080000, 0xc080000) 7 entries of 3200 used
[    0.000000] Adding active range (6, 0xc080000, 0xe080000) 8 entries of 3200 used
[    0.000000] Adding active range (7, 0xe080000, 0x10080000) 9 entries of 3200 used
[    0.000000] SRAT: PXMs only cover 917504MB of your 1048566MB e820 RAM. Not used.
[    0.000000] SRAT: SRAT not used.

the early_node_map is not sorted because node0 with non zero start come first.

so try to sort it right away after all regions are registered.

also fixs refression by 8716273c (x86: Export srat physical topology)

-v2: make it more solid to handle cross node case like node0 [0,4g), [8,12g) and node1 [4g, 8g), [12g, 16g)
-v3: update comments.

Reported-and-tested-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B2579D2.3010201@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/srat_32.c | 2 ++
 arch/x86/mm/srat_64.c | 4 +++-
 include/linux/mm.h    | 3 +++
 mm/page_alloc.c       | 4 ++--
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
 		e820_register_active_regions(chunk->nid, chunk->start_pfn,
 					     min(chunk->end_pfn, max_pfn));
 	}
+	/* for out of order entries in SRAT */
+	sort_node_map();
 
 	for_each_online_node(nid) {
 		unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d89075489664..a27124185fc1 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 		unsigned long s = nodes[i].start >> PAGE_SHIFT;
 		unsigned long e = nodes[i].end >> PAGE_SHIFT;
 		pxmram += e - s;
-		pxmram -= absent_pages_in_range(s, e);
+		pxmram -= __absent_pages_in_range(i, s, e);
 		if ((long)pxmram < 0)
 			pxmram = 0;
 	}
@@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 	for_each_node_mask(i, nodes_parsed)
 		e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 						nodes[i].end >> PAGE_SHIFT);
+	/* for out of order entries in SRAT */
+	sort_node_map();
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
 		return -1;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c395694f4d..20a2036f3a94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1022,6 +1022,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
+void sort_node_map(void);
+unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
+						unsigned long end_pfn);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
 extern void get_pfn_range_for_nid(unsigned int nid,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..873c86308b4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3573,7 +3573,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
  * then all holes in the requested range will be accounted for.
  */
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
 				unsigned long range_start_pfn,
 				unsigned long range_end_pfn)
 {
@@ -4102,7 +4102,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
 }
 
 /* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
 {
 	sort(early_node_map, (size_t)nr_nodemap_entries,
 			sizeof(struct node_active_region),
-- 
cgit v1.2.3


From a4636818f8e0991f32d9528f39cf4f3d6a7d30a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 Dec 2009 11:43:29 -0600
Subject: cpumask: rename tsk_cpumask to tsk_cpus_allowed

Noone uses this wrapper yet, and Ingo asked that it be kept consistent
with current task_struct usage.

(One user crept in via linux-next: fixed)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +-
 include/linux/sched.h                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a9df9441a9a2..f125e5c551c0 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1136,7 +1136,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
 		return -ENOMEM;
 
-	cpumask_copy(oldmask, tsk_cpumask(current));
+	cpumask_copy(oldmask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
 
 	if (smp_processor_id() != pol->cpu) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..4d7adb282bdd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1555,7 +1555,7 @@ struct task_struct {
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
+#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
-- 
cgit v1.2.3


From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 9 Dec 2009 17:52:13 +1100
Subject: PCI/cardbus: Add a fixup hook and fix powerpc

The cardbus code creates PCI devices without ever going through the
necessary fixup bits and pieces that normal PCI devices go through.

There's in fact a commented out call to pcibios_fixup_bus() in there,
it's commented because ... it doesn't work.

I could make pcibios_fixup_bus() do the right thing on powerpc easily
but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus
for which a weak empty implementation is provided by the PCI core.

This fixes cardbus on powerbooks and probably all other PowerPC
platforms which was broken completely for ever on some platforms and
since 2.6.31 on others such as PowerBooks when we made the DMA ops
mandatory (since those are setup by the fixups).

Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci-common.c | 13 +++++++++++++
 drivers/pci/pci.c                |  5 +++++
 drivers/pcmcia/cardbus.c         |  2 +-
 include/linux/pci.h              |  3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e8dfdbd9327a..cadbed679fbb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct dev_archdata *sd = &dev->dev.archdata;
 
+		/* Cardbus can call us to add new devices to a bus, so ignore
+		 * those who are already fully discovered
+		 */
+		if (dev->is_added)
+			continue;
+
 		/* Setup OF node pointer in archdata */
 		sd->of_node = pci_device_to_OF_node(dev);
 
@@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+	/* Now fixup devices on that bus */
+	pcibios_setup_bus_devices(bus);
+}
+
+
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
 	if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) &&
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d50522bf16b1..864e703cf737 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
 	return 1;
 }
 
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
 static int __init pci_setup(char *str)
 {
 	while (str) {
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index cdf50f3bc2df..d99f846451a3 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-/*	pcibios_fixup_bus(bus); */
+	pci_fixup_cardbus(bus); 
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf1e67080849..5da0690d9cee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
+/* Weak but can be overriden by arch */
+void pci_fixup_cardbus(struct pci_bus *);
+
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-- 
cgit v1.2.3


From 4f924ba5b5aaf1477daafeae779495d39549186d Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 17 Dec 2009 00:08:33 +0900
Subject: sony-laptop: add AVMode key mapping

Some models are equipped with an "AVMode" function key that sends
  sony-laptop: Unknown event: 0x100 0xa1
  sony-laptop: Unknown event: 0x100 0x21
for press and release respectively.

Cc: "Matthew W. S. Bell" <matthew@bells23.org.uk>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/sony-laptop.c | 4 ++++
 include/linux/sonypi.h             | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a2a742c8ff7e..9710f70040ba 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -232,6 +232,7 @@ static int sony_laptop_input_index[] = {
 	56,	/* 69 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	57,	/* 70 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	-1,	/* 71 SONYPI_EVENT_BRIGHTNESS_PRESSED */
+	58,	/* 72 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -293,6 +294,7 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_F15,	/* 55 SONYPI_EVENT_SETTINGKEY_PRESSED */
 	KEY_VOLUMEUP,	/* 56 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	KEY_VOLUMEDOWN,	/* 57 SONYPI_EVENT_VOLUME_DEC_PRESSED */
+	KEY_MEDIA,	/* 58 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -890,6 +892,8 @@ static struct sony_nc_event sony_100_events[] = {
 	{ 0x0C, SONYPI_EVENT_FNKEY_RELEASED },
 	{ 0x9f, SONYPI_EVENT_CD_EJECT_PRESSED },
 	{ 0x1f, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa1, SONYPI_EVENT_MEDIA_PRESSED },
+	{ 0x21, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0, 0 },
 };
 
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 34c4475ac4a2..4f95c1aac2fd 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -111,6 +111,7 @@
 #define SONYPI_EVENT_VOLUME_INC_PRESSED		69
 #define SONYPI_EVENT_VOLUME_DEC_PRESSED		70
 #define SONYPI_EVENT_BRIGHTNESS_PRESSED		71
+#define SONYPI_EVENT_MEDIA_PRESSED		72
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit v1.2.3


From 234da7bcdc7aaa935846534c3b726dbc79a9cdd5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 20:21:05 +0100
Subject: sched: Teach might_sleep() about preemptible RCU

In practice, it is harmless to voluntarily sleep in a
rcu_read_lock() section if we are running under preempt rcu, but
it is illegal if we build a kernel running non-preemptable rcu.

Currently, might_sleep() doesn't notice sleepable operations
under rcu_read_lock() sections if we are running under
preemptable rcu because preempt_count() is left untouched after
rcu_read_lock() in this case. But we want developers who test
their changes under such config to notice the "sleeping while
atomic" issues.

So we add rcu_read_lock_nesting to prempt_count() in
might_sleep() checks.

[ v2: Handle rcu-tiny ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1260991265-8451-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  5 +++++
 include/linux/rcutree.h | 11 +++++++++++
 kernel/sched.c          |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c4ba9a78721e..96cc307ed9f4 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -101,4 +101,9 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c93eee5911b0..8044b1b94333 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,6 +45,12 @@ extern void __rcu_read_unlock(void);
 extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
+/*
+ * Defined as macro as it is a very low level header
+ * included from areas that don't even know about current
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock(void)
@@ -63,6 +69,11 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock_bh(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index af7dfa74e6bb..7be88a7be047 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9682,7 +9682,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = preempt_count() & ~PREEMPT_ACTIVE;
+	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
 
 	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
-- 
cgit v1.2.3


From 5b74ed4729ad2b2017453add68104a83206caefb Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 16 Dec 2009 10:09:45 -0500
Subject: perf events: Remove unused perf_counter.h header file

Since nothing includes the <linux/perf_counter.h> file and it's
also not exported to user space, remove it.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <alpine.LFD.2.00.0912161007430.8198@localhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 444 -------------------------------------------
 1 file changed, 444 deletions(-)
 delete mode 100644 include/linux/perf_counter.h

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index e3fb25606706..000000000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- *  NOTE: this file will be removed in a future kernel release, it is
- *  provided as a courtesy copy of user-space code that relies on the
- *  old (pre-rename) symbols and constants.
- *
- *  Performance events:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
-	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- *	{ u64		value;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		id;           } && PERF_FORMAT_ID
- *	} && !PERF_FORMAT_GROUP
- *
- *	{ u64		nr;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		value;
- *	    { u64	id;           } && PERF_FORMAT_ID
- *	  }		cntr[nr];
- *	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				id;
-	 *	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, tid;
-	 *
-	 *	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 *	#
-	 *	# The RAW record below is opaque data wrt the ABI
-	 *	#
-	 *	# That is, the ABI doesn't make any promises wrt to
-	 *	# the stability of its content, it may vary depending
-	 *	# on event, hardware, kernel version and phase of
-	 *	# the moon.
-	 *	#
-	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 *	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
-#define PERF_FLAG_FD_OUTPUT		(1U << 1)
-
-/*
- * In case some app still references the old symbols:
- */
-
-#define __NR_perf_counter_open		__NR_perf_event_open
-
-#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
-#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
-
-#endif /* _LINUX_PERF_COUNTER_H */
-- 
cgit v1.2.3


From 27f37e4bfed803be338dcc78845d4a67eefb40a0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 25 Sep 2009 09:39:26 +0200
Subject: regulator: add driver for MAX8660/8661

Tested with a MX25-based custom board.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   7 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8660.c       | 510 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8660.h |  57 +++++
 4 files changed, 575 insertions(+)
 create mode 100644 drivers/regulator/max8660.c
 create mode 100644 include/linux/regulator/max8660.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 7cfdd65bebb4..9e0aa14dc6af 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -69,6 +69,13 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8660
+	tristate "Maxim 8660/8661 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8660/8661 voltage output
+	  regulator via I2C bus.
+
 config REGULATOR_TWL4030
 	bool "TI TWL4030/TWL5030/TWL6030/TPS695x0 PMIC"
 	depends on TWL4030_CORE
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 9ae3cc44e668..12285e41beec 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
new file mode 100644
index 000000000000..acc2fb7b6087
--- /dev/null
+++ b/drivers/regulator/max8660.c
@@ -0,0 +1,510 @@
+/*
+ * max8660.c  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * based on max1586.c and wm8400-regulator.c
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Some info:
+ *
+ * Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8660-MAX8661.pdf
+ *
+ * This chip is a bit nasty because it is a write-only device. Thus, the driver
+ * uses shadow registers to keep track of its values. The main problem appears
+ * to be the initialization: When Linux boots up, we cannot know if the chip is
+ * in the default state or not, so we would have to pass such information in
+ * platform_data. As this adds a bit of complexity to the driver, this is left
+ * out for now until it is really needed.
+ *
+ * [A|S|M]DTV1 registers are currently not used, but [A|S|M]DTV2.
+ *
+ * If the driver is feature complete, it might be worth to check if one set of
+ * functions for V3-V7 is sufficient. For maximum flexibility during
+ * development, they are separated for now.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8660.h>
+
+#define MAX8660_DCDC_MIN_UV	 725000
+#define MAX8660_DCDC_MAX_UV	1800000
+#define MAX8660_DCDC_STEP	  25000
+#define MAX8660_DCDC_MAX_SEL	0x2b
+
+#define MAX8660_LDO5_MIN_UV	1700000
+#define MAX8660_LDO5_MAX_UV	2000000
+#define MAX8660_LDO5_STEP	  25000
+#define MAX8660_LDO5_MAX_SEL	0x0c
+
+#define MAX8660_LDO67_MIN_UV	1800000
+#define MAX8660_LDO67_MAX_UV	3300000
+#define MAX8660_LDO67_STEP	 100000
+#define MAX8660_LDO67_MAX_SEL	0x0f
+
+enum {
+	MAX8660_OVER1,
+	MAX8660_OVER2,
+	MAX8660_VCC1,
+	MAX8660_ADTV1,
+	MAX8660_ADTV2,
+	MAX8660_SDTV1,
+	MAX8660_SDTV2,
+	MAX8660_MDTV1,
+	MAX8660_MDTV2,
+	MAX8660_L12VCR,
+	MAX8660_FPWM,
+	MAX8660_N_REGS,	/* not a real register */
+};
+
+struct max8660 {
+	struct i2c_client *client;
+	u8 shadow_regs[MAX8660_N_REGS];		/* as chip is write only */
+	struct regulator_dev *rdev[];
+};
+
+static int max8660_write(struct max8660 *max8660, u8 reg, u8 mask, u8 val)
+{
+	static const u8 max8660_addresses[MAX8660_N_REGS] =
+	  { 0x10, 0x12, 0x20, 0x23, 0x24, 0x29, 0x2a, 0x32, 0x33, 0x39, 0x80 };
+
+	int ret;
+	u8 reg_val = (max8660->shadow_regs[reg] & mask) | val;
+	dev_vdbg(&max8660->client->dev, "Writing reg %02x with %02x\n",
+			max8660_addresses[reg], reg_val);
+
+	ret = i2c_smbus_write_byte_data(max8660->client,
+			max8660_addresses[reg], reg_val);
+	if (ret == 0)
+		max8660->shadow_regs[reg] = reg_val;
+
+	return ret;
+}
+
+
+/*
+ * DCDC functions
+ */
+
+static int max8660_dcdc_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER1];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_dcdc_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return max8660_write(max8660, MAX8660_OVER1, 0xff, bit);
+}
+
+static int max8660_dcdc_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? ~1 : ~4;
+	return max8660_write(max8660, MAX8660_OVER1, mask, 0);
+}
+
+static int max8660_dcdc_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_DCDC_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	u8 selector = max8660->shadow_regs[reg];
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg, selector, bits;
+	int ret;
+
+	if (min_uV < MAX8660_DCDC_MIN_UV || min_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_DCDC_MIN_UV || max_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_DCDC_MIN_UV - MAX8660_DCDC_STEP + 1))
+			/ MAX8660_DCDC_STEP;
+
+	ret = max8660_dcdc_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	ret = max8660_write(max8660, reg, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	bits = (rdev_get_id(rdev) == MAX8660_V3) ? 0x03 : 0x30;
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, bits);
+}
+
+static struct regulator_ops max8660_dcdc_ops = {
+	.is_enabled = max8660_dcdc_is_enabled,
+	.list_voltage = max8660_dcdc_list,
+	.set_voltage = max8660_dcdc_set,
+	.get_voltage = max8660_dcdc_get,
+};
+
+
+/*
+ * LDO5 functions
+ */
+
+static int max8660_ldo5_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO5_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector = max8660->shadow_regs[MAX8660_MDTV2];
+
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO5_MIN_UV || min_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO5_MIN_UV || max_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO5_MIN_UV - MAX8660_LDO5_STEP + 1))
+			/ MAX8660_LDO5_STEP;
+	ret = max8660_ldo5_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	ret = max8660_write(max8660, MAX8660_MDTV2, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, 0xc0);
+}
+
+static struct regulator_ops max8660_ldo5_ops = {
+	.list_voltage = max8660_ldo5_list,
+	.set_voltage = max8660_ldo5_set,
+	.get_voltage = max8660_ldo5_get,
+};
+
+
+/*
+ * LDO67 functions
+ */
+
+static int max8660_ldo67_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER2];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_ldo67_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return max8660_write(max8660, MAX8660_OVER2, 0xff, bit);
+}
+
+static int max8660_ldo67_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? ~2 : ~4;
+	return max8660_write(max8660, MAX8660_OVER2, mask, 0);
+}
+
+static int max8660_ldo67_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO67_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 shift = (rdev_get_id(rdev) == MAX8660_V6) ? 0 : 4;
+	u8 selector = (max8660->shadow_regs[MAX8660_L12VCR] >> shift) & 0xf;
+
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO67_MIN_UV || min_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO67_MIN_UV || max_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO67_MIN_UV - MAX8660_LDO67_STEP + 1))
+			/ MAX8660_LDO67_STEP;
+
+	ret = max8660_ldo67_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	if (rdev_get_id(rdev) == MAX8660_V6)
+		return max8660_write(max8660, MAX8660_L12VCR, 0xf0, selector);
+	else
+		return max8660_write(max8660, MAX8660_L12VCR, 0x0f, selector << 4);
+}
+
+static struct regulator_ops max8660_ldo67_ops = {
+	.is_enabled = max8660_ldo67_is_enabled,
+	.enable = max8660_ldo67_enable,
+	.disable = max8660_ldo67_disable,
+	.list_voltage = max8660_ldo67_list,
+	.get_voltage = max8660_ldo67_get,
+	.set_voltage = max8660_ldo67_set,
+};
+
+static struct regulator_desc max8660_reg[] = {
+	{
+		.name = "V3(DCDC)",
+		.id = MAX8660_V3,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V4(DCDC)",
+		.id = MAX8660_V4,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V5(LDO)",
+		.id = MAX8660_V5,
+		.ops = &max8660_ldo5_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO5_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V6(LDO)",
+		.id = MAX8660_V6,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V7(LDO)",
+		.id = MAX8660_V7,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int max8660_probe(struct i2c_client *client,
+			      const struct i2c_device_id *i2c_id)
+{
+	struct regulator_dev **rdev;
+	struct max8660_platform_data *pdata = client->dev.platform_data;
+	struct max8660 *max8660;
+	int boot_on, i, id, ret = -EINVAL;
+
+	if (pdata->num_subdevs > MAX8660_V_END) {
+		dev_err(&client->dev, "Too much regulators found!\n");
+		goto out;
+	}
+
+	max8660 = kzalloc(sizeof(struct max8660) +
+			sizeof(struct regulator_dev *) * MAX8660_V_END,
+			GFP_KERNEL);
+	if (!max8660) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	max8660->client = client;
+	rdev = max8660->rdev;
+
+	if (pdata->en34_is_high) {
+		/* Simulate always on */
+		max8660->shadow_regs[MAX8660_OVER1] = 5;
+	} else {
+		/* Otherwise devices can be toggled via software */
+		max8660_dcdc_ops.enable = max8660_dcdc_enable;
+		max8660_dcdc_ops.disable = max8660_dcdc_disable;
+	}
+
+	/*
+	 * First, set up shadow registers to prevent glitches. As some
+	 * registers are shared between regulators, everything must be properly
+	 * set up for all regulators in advance.
+	 */
+	max8660->shadow_regs[MAX8660_ADTV1] =
+		max8660->shadow_regs[MAX8660_ADTV2] =
+		max8660->shadow_regs[MAX8660_SDTV1] =
+		max8660->shadow_regs[MAX8660_SDTV2] = 0x1b;
+	max8660->shadow_regs[MAX8660_MDTV1] =
+		max8660->shadow_regs[MAX8660_MDTV2] = 0x04;
+
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		if (!pdata->subdevs[i].platform_data)
+			goto err_free;
+
+		boot_on = pdata->subdevs[i].platform_data->constraints.boot_on;
+
+		switch (pdata->subdevs[i].id) {
+		case MAX8660_V3:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 1;
+			break;
+
+		case MAX8660_V4:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 4;
+			break;
+
+		case MAX8660_V5:
+			break;
+
+		case MAX8660_V6:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 2;
+			break;
+
+		case MAX8660_V7:
+			if (!strcmp(i2c_id->name, "max8661")) {
+				dev_err(&client->dev, "Regulator not on this chip!\n");
+				goto err_free;
+			}
+
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 4;
+			break;
+
+		default:
+			dev_err(&client->dev, "invalid regulator %s\n",
+				 pdata->subdevs[i].name);
+			goto err_free;
+		}
+	}
+
+	/* Finally register devices */
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		id = pdata->subdevs[i].id;
+
+		rdev[i] = regulator_register(&max8660_reg[id], &client->dev,
+					     pdata->subdevs[i].platform_data,
+					     max8660);
+		if (IS_ERR(rdev[i])) {
+			ret = PTR_ERR(rdev[i]);
+			dev_err(&client->dev, "failed to register %s\n",
+				max8660_reg[id].name);
+			goto err_unregister;
+		}
+	}
+
+	i2c_set_clientdata(client, rdev);
+	dev_info(&client->dev, "Maxim 8660/8661 regulator driver loaded\n");
+	return 0;
+
+err_unregister:
+	while (--i >= 0)
+		regulator_unregister(rdev[i]);
+err_free:
+	kfree(max8660);
+out:
+	return ret;
+}
+
+static int max8660_remove(struct i2c_client *client)
+{
+	struct regulator_dev **rdev = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < MAX8660_V_END; i++)
+		if (rdev[i])
+			regulator_unregister(rdev[i]);
+	kfree(rdev);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8660_id[] = {
+	{ "max8660", 0 },
+	{ "max8661", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8660_id);
+
+static struct i2c_driver max8660_driver = {
+	.probe = max8660_probe,
+	.remove = max8660_remove,
+	.driver		= {
+		.name	= "max8660",
+	},
+	.id_table	= max8660_id,
+};
+
+static int __init max8660_init(void)
+{
+	return i2c_add_driver(&max8660_driver);
+}
+subsys_initcall(max8660_init);
+
+static void __exit max8660_exit(void)
+{
+	i2c_del_driver(&max8660_driver);
+}
+module_exit(max8660_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8660/8661 voltage regulator driver");
+MODULE_AUTHOR("Wolfram Sang");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/max8660.h b/include/linux/regulator/max8660.h
new file mode 100644
index 000000000000..9936763621c7
--- /dev/null
+++ b/include/linux/regulator/max8660.h
@@ -0,0 +1,57 @@
+/*
+ * max8660.h  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8660_H
+#define __LINUX_REGULATOR_MAX8660_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8660_V3,
+	MAX8660_V4,
+	MAX8660_V5,
+	MAX8660_V6,
+	MAX8660_V7,
+	MAX8660_V_END,
+};
+
+/**
+ * max8660_subdev_data - regulator subdev data
+ * @id: regulator id
+ * @name: regulator name
+ * @platform_data: regulator init data
+ */
+struct max8660_subdev_data {
+	int				id;
+	char				*name;
+	struct regulator_init_data	*platform_data;
+};
+
+/**
+ * max8660_platform_data - platform data for max8660
+ * @num_subdevs: number of regulators used
+ * @subdevs: pointer to regulators used
+ * @en34_is_high: if EN34 is driven high, regulators cannot be en-/disabled.
+ */
+struct max8660_platform_data {
+	int num_subdevs;
+	struct max8660_subdev_data *subdevs;
+	unsigned en34_is_high:1;
+};
+#endif
-- 
cgit v1.2.3


From e24a04c44cf312e88b50006a91ad7ffc1c0d97a5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Sep 2009 08:47:11 -0700
Subject: regulator: Implement WM831x BuckWise DC-DC convertor DVS support

The BuckWise DC-DC convertors in WM831x devices support switching to
a second output voltage using the logic level on one of the device
pins. This is intended to allow rapid voltage switching for uses like
cpufreq, replacing the I2C or SPI write used to configure the voltage
of the regulator with a much faster GPIO status change.

This is implemented by keeping the DVS voltage configured as the
maximum voltage permitted for the regulator. If a request is made
for the maximum voltage then the GPIO is used to switch to the DVS
voltage, otherwise the normal ON voltage is updated and used. This
follows the idiom used by most cpufreq drivers, which drop the
minimum voltage as the core frequency is dropped but use a constant
maximum - raising the voltage should normally be fast, but lowering
it may be slower.

Configuration of the DVS MFP on the device should be done externally,
for example via OTP.

Support is present in the hardware for monitoring the status of the
transition using a second GPIO. This is not currently implemented
but platform data is provided for it - the driver currently assumes
that the device will be configured to transition immediately - but
platform data is provided to reduce merge issues once it is.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/wm831x-dcdc.c  | 207 +++++++++++++++++++++++++++++++++++----
 include/linux/mfd/wm831x/pdata.h |  17 ++++
 2 files changed, 206 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 2eefc1a0cf08..0a6577577e8d 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -19,6 +19,8 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/gpio.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/regulator.h>
@@ -39,6 +41,7 @@
 #define WM831X_DCDC_CONTROL_2     1
 #define WM831X_DCDC_ON_CONFIG     2
 #define WM831X_DCDC_SLEEP_CONTROL 3
+#define WM831X_DCDC_DVS_CONTROL   4
 
 /*
  * Shared
@@ -50,6 +53,10 @@ struct wm831x_dcdc {
 	int base;
 	struct wm831x *wm831x;
 	struct regulator_dev *regulator;
+	int dvs_gpio;
+	int dvs_gpio_state;
+	int on_vsel;
+	int dvs_vsel;
 };
 
 static int wm831x_dcdc_is_enabled(struct regulator_dev *rdev)
@@ -240,11 +247,9 @@ static int wm831x_buckv_list_voltage(struct regulator_dev *rdev,
 	return -EINVAL;
 }
 
-static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
-					 int min_uV, int max_uV)
+static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
 {
-	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
 	u16 vsel;
 
 	if (min_uV < 600000)
@@ -257,39 +262,126 @@ static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
 	if (wm831x_buckv_list_voltage(rdev, vsel) > max_uV)
 		return -EINVAL;
 
-	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	return vsel;
+}
+
+static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
+{
+	u16 vsel;
+
+	if (max_uV < 600000 || max_uV > 1800000)
+		return -EINVAL;
+
+	vsel = ((max_uV - 600000) / 12500) + 8;
+
+	if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV ||
+	    wm831x_buckv_list_voltage(rdev, vsel) < max_uV)
+		return -EINVAL;
+
+	return vsel;
+}
+
+static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+
+	if (state == dcdc->dvs_gpio_state)
+		return 0;
+
+	dcdc->dvs_gpio_state = state;
+	gpio_set_value(dcdc->dvs_gpio, state);
+
+	/* Should wait for DVS state change to be asserted if we have
+	 * a GPIO for it, for now assume the device is configured
+	 * for the fastest possible transition.
+	 */
+
+	return 0;
 }
 
 static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
-				     int min_uV, int max_uV)
+				    int min_uV, int max_uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	struct wm831x *wm831x = dcdc->wm831x;
+	int on_reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int dvs_reg = dcdc->base + WM831X_DCDC_DVS_CONTROL;
+	int vsel, ret;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0)
+		return vsel;
+
+	/* If this value is already set then do a GPIO update if we can */
+	if (dcdc->dvs_gpio && dcdc->on_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 0);
+
+	if (dcdc->dvs_gpio && dcdc->dvs_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 1);
+
+	/* Always set the ON status to the minimum voltage */
+	ret = wm831x_set_bits(wm831x, on_reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	if (ret < 0)
+		return ret;
+	dcdc->on_vsel = vsel;
+
+	if (!dcdc->dvs_gpio)
+		return ret;
+
+	/* Kick the voltage transition now */
+	ret = wm831x_buckv_set_dvs(rdev, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Set the high voltage as the DVS voltage.  This is optimised
+	 * for CPUfreq usage, most processors will keep the maximum
+	 * voltage constant and lower the minimum with the frequency. */
+	vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0) {
+		/* This should never happen - at worst the same vsel
+		 * should be chosen */
+		WARN_ON(vsel < 0);
+		return 0;
+	}
+
+	/* Don't bother if it's the same VSEL we're already using */
+	if (vsel == dcdc->on_vsel)
+		return 0;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, min_uV, max_uV);
+	ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel);
+	if (ret == 0)
+		dcdc->dvs_vsel = vsel;
+	else
+		dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n",
+			 ret);
+
+	return 0;
 }
 
 static int wm831x_buckv_set_suspend_voltage(struct regulator_dev *rdev,
-					     int uV)
+					    int uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
 	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+	int vsel;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, uV, uV);
+	if (vsel < 0)
+		return vsel;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, uV, uV);
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_SLP_VSEL_MASK, vsel);
 }
 
 static int wm831x_buckv_get_voltage(struct regulator_dev *rdev)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
-	int val;
 
-	val = wm831x_reg_read(wm831x, reg);
-	if (val < 0)
-		return val;
-
-	return wm831x_buckv_list_voltage(rdev, val & WM831X_DC1_ON_VSEL_MASK);
+	if (dcdc->dvs_gpio && dcdc->dvs_gpio_state)
+		return wm831x_buckv_list_voltage(rdev, dcdc->dvs_vsel);
+	else
+		return wm831x_buckv_list_voltage(rdev, dcdc->on_vsel);
 }
 
 /* Current limit options */
@@ -346,6 +438,64 @@ static struct regulator_ops wm831x_buckv_ops = {
 	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
 };
 
+/*
+ * Set up DVS control.  We just log errors since we can still run
+ * (with reduced performance) if we fail.
+ */
+static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
+					    struct wm831x_buckv_pdata *pdata)
+{
+	struct wm831x *wm831x = dcdc->wm831x;
+	int ret;
+	u16 ctrl;
+
+	if (!pdata || !pdata->dvs_gpio)
+		return;
+
+	switch (pdata->dvs_control_src) {
+	case 1:
+		ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	case 2:
+		ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
+			pdata->dvs_control_src, dcdc->name);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
+			      WM831X_DC1_DVS_SRC_MASK, ctrl);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	/* gpiolib won't let us read the GPIO status so pick the higher
+	 * of the two existing voltages so we take it as platform data.
+	 */
+	dcdc->dvs_gpio_state = pdata->dvs_init_state;
+
+	ret = gpio_direction_output(pdata->dvs_gpio, dcdc->dvs_gpio_state);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to enable %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		gpio_free(pdata->dvs_gpio);
+		return;
+	}
+
+	dcdc->dvs_gpio = pdata->dvs_gpio;
+}
+
 static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
@@ -384,6 +534,23 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 	dcdc->desc.ops = &wm831x_buckv_ops;
 	dcdc->desc.owner = THIS_MODULE;
 
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read ON VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
+
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->dvs_vsel = ret & WM831X_DC1_DVS_VSEL_MASK;
+
+	if (pdata->dcdc[id])
+		wm831x_buckv_dvs_init(dcdc, pdata->dcdc[id]->driver_data);
+
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
 					     pdata->dcdc[id], dcdc);
 	if (IS_ERR(dcdc->regulator)) {
@@ -422,6 +589,8 @@ err_uv:
 err_regulator:
 	regulator_unregister(dcdc->regulator);
 err:
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 	return ret;
 }
@@ -434,6 +603,8 @@ static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 
 	return 0;
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 415c228743d5..fd322aca33ba 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -41,6 +41,23 @@ struct wm831x_battery_pdata {
 	int timeout;        /** Charge cycle timeout, in minutes */
 };
 
+/**
+ * Configuration for the WM831x DC-DC BuckWise convertors.  This
+ * should be passed as driver_data in the regulator_init_data.
+ *
+ * Currently all the configuration is for the fast DVS switching
+ * support of the devices.  This allows MFPs on the device to be
+ * configured as an input to switch between two output voltages,
+ * allowing voltage transitions without the expense of an access over
+ * I2C or SPI buses.
+ */
+struct wm831x_buckv_pdata {
+	int dvs_gpio;        /** CPU GPIO to use for DVS switching */
+	int dvs_control_src; /** Hardware DVS source to use (1 or 2) */
+	int dvs_init_state;  /** DVS state to expect on startup */
+	int dvs_state_gpio;  /** CPU GPIO to use for monitoring status */
+};
+
 /* Sources for status LED configuration.  Values are register values
  * plus 1 to allow for a zero default for preserve.
  */
-- 
cgit v1.2.3


From 638f85c54f4fed0f8f1fbc23745a8f334112e892 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 22 Oct 2009 16:31:33 +0100
Subject: regulator: Handle regulators without suspend mode configuration

Since some regulators in the system may not support suspend mode
configuration we need to allow some regulators to have a missing
suspend mode configuration. Do this by requiring that disabled
regulators are explicitly flagged and then skip over regulators
that have no state specified.

Try to avoid surprises by warning the if we could set the state
but no configuration is provided.  This also ensures that an all
zeros configuration generates a warning rather than silently
disabling the regulator.

Reported-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 25 ++++++++++++++++++++++---
 include/linux/regulator/machine.h |  6 +++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7d0c0d7d90ca..2dab0d9e11f5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -581,10 +581,29 @@ static int suspend_set_state(struct regulator_dev *rdev,
 	struct regulator_state *rstate)
 {
 	int ret = 0;
+	bool can_set_state;
 
-	/* enable & disable are mandatory for suspend control */
-	if (!rdev->desc->ops->set_suspend_enable ||
-		!rdev->desc->ops->set_suspend_disable) {
+	can_set_state = rdev->desc->ops->set_suspend_enable &&
+		rdev->desc->ops->set_suspend_disable;
+
+	/* If we have no suspend mode configration don't set anything;
+	 * only warn if the driver actually makes the suspend mode
+	 * configurable.
+	 */
+	if (!rstate->enabled && !rstate->disabled) {
+		if (can_set_state)
+			printk(KERN_WARNING "%s: No configuration for %s\n",
+			       __func__, rdev_get_name(rdev));
+		return 0;
+	}
+
+	if (rstate->enabled && rstate->disabled) {
+		printk(KERN_ERR "%s: invalid configuration for %s\n",
+		       __func__, rdev_get_name(rdev));
+		return -EINVAL;
+	}
+
+	if (!can_set_state) {
 		printk(KERN_ERR "%s: no way to set suspend state\n",
 			__func__);
 		return -EINVAL;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 87f5f176d4ef..234a8476cba8 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -43,16 +43,20 @@ struct regulator;
 /**
  * struct regulator_state - regulator state during low power system states
  *
- * This describes a regulators state during a system wide low power state.
+ * This describes a regulators state during a system wide low power
+ * state.  One of enabled or disabled must be set for the
+ * configuration to be applied.
  *
  * @uV: Operating voltage during suspend.
  * @mode: Operating mode during suspend.
  * @enabled: Enabled during suspend.
+ * @disabled: Disabled during suspend.
  */
 struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
+	int disabled; /* is the regulator disbled in this suspend state */
 };
 
 /**
-- 
cgit v1.2.3


From b56daf13eb77ee24f48f0bb34c2492f46a432ec4 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lrg@slimlogic.co.uk>
Date: Wed, 11 Nov 2009 14:16:10 +0000
Subject: regulator: consumer.h - fix build when consumer.h is #included first.

consumer.h requires device.h for stand alone build.

Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 490c5b37b6d7..030d92255c7a 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -35,6 +35,8 @@
 #ifndef __LINUX_REGULATOR_CONSUMER_H_
 #define __LINUX_REGULATOR_CONSUMER_H_
 
+#include <linux/device.h>
+
 /*
  * Regulator operating modes.
  *
-- 
cgit v1.2.3


From d4cc6a2eee98faebf2c7d3ebc4b35541c1d47d21 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Mon, 7 Dec 2009 15:08:13 +0100
Subject: leds: Add LED class driver for regulator driven LEDs.

This driver provides an interface for controlling LEDs (or vibrators)
connected to PMICs for which there is a regulator framework driver.

This driver can be used, for instance, to control vibrator on all Motorola EZX
phones using the pcap-regulator driver services.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/leds/Kconfig           |   6 +
 drivers/leds/Makefile          |   1 +
 drivers/leds/leds-regulator.c  | 242 +++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-regulator.h |  46 ++++++++
 4 files changed, 295 insertions(+)
 create mode 100644 drivers/leds/leds-regulator.c
 create mode 100644 include/linux/leds-regulator.h

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index f12a99676628..8a0e1ec95e4a 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -229,6 +229,12 @@ config LEDS_PWM
 	help
 	  This option enables support for pwm driven LEDs
 
+config LEDS_REGULATOR
+	tristate "REGULATOR driven LED support"
+	depends on LEDS_CLASS && REGULATOR
+	help
+	  This option enables support for regulator driven LEDs.
+
 config LEDS_BD2802
 	tristate "LED driver for BD2802 RGB LED"
 	depends on LEDS_CLASS && I2C
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 176f0c674751..9e63869d7c0d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
 obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
 obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
+obj-$(CONFIG_LEDS_REGULATOR)		+= leds-regulator.o
 obj-$(CONFIG_LEDS_INTEL_SS4200)		+= leds-ss4200.o
 obj-$(CONFIG_LEDS_LT3593)		+= leds-lt3593.o
 obj-$(CONFIG_LEDS_ADP5520)		+= leds-adp5520.o
diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c
new file mode 100644
index 000000000000..7f00de3ef922
--- /dev/null
+++ b/drivers/leds/leds-regulator.c
@@ -0,0 +1,242 @@
+/*
+ * leds-regulator.c - LED class driver for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * Inspired by leds-wm8350 driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <linux/leds.h>
+#include <linux/leds-regulator.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#define to_regulator_led(led_cdev) \
+	container_of(led_cdev, struct regulator_led, cdev)
+
+struct regulator_led {
+	struct led_classdev cdev;
+	enum led_brightness value;
+	int enabled;
+	struct mutex mutex;
+	struct work_struct work;
+
+	struct regulator *vcc;
+};
+
+static inline int led_regulator_get_max_brightness(struct regulator *supply)
+{
+	int ret;
+	int voltage = regulator_list_voltage(supply, 0);
+
+	if (voltage <= 0)
+		return 1;
+
+	/* even if regulator can't change voltages,
+	 * we still assume it can change status
+	 * and the LED can be turned on and off.
+	 */
+	ret = regulator_set_voltage(supply, voltage, voltage);
+	if (ret < 0)
+		return 1;
+
+	return regulator_count_voltages(supply);
+}
+
+static int led_regulator_get_voltage(struct regulator *supply,
+		enum led_brightness brightness)
+{
+	if (brightness == 0)
+		return -EINVAL;
+
+	return regulator_list_voltage(supply, brightness - 1);
+}
+
+
+static void regulator_led_enable(struct regulator_led *led)
+{
+	int ret;
+
+	if (led->enabled)
+		return;
+
+	ret = regulator_enable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to enable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 1;
+}
+
+static void regulator_led_disable(struct regulator_led *led)
+{
+	int ret;
+
+	if (!led->enabled)
+		return;
+
+	ret = regulator_disable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to disable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 0;
+}
+
+static void regulator_led_set_value(struct regulator_led *led)
+{
+	int voltage;
+	int ret;
+
+	mutex_lock(&led->mutex);
+
+	if (led->value == LED_OFF) {
+		regulator_led_disable(led);
+		goto out;
+	}
+
+	if (led->cdev.max_brightness > 1) {
+		voltage = led_regulator_get_voltage(led->vcc, led->value);
+		dev_dbg(led->cdev.dev, "brightness: %d voltage: %d\n",
+				led->value, voltage);
+
+		ret = regulator_set_voltage(led->vcc, voltage, voltage);
+		if (ret != 0)
+			dev_err(led->cdev.dev, "Failed to set voltage %d: %d\n",
+				voltage, ret);
+	}
+
+	regulator_led_enable(led);
+
+out:
+	mutex_unlock(&led->mutex);
+}
+
+static void led_work(struct work_struct *work)
+{
+	struct regulator_led *led;
+
+	led = container_of(work, struct regulator_led, work);
+	regulator_led_set_value(led);
+}
+
+static void regulator_led_brightness_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct regulator_led *led = to_regulator_led(led_cdev);
+
+	led->value = value;
+	schedule_work(&led->work);
+}
+
+static int __devinit regulator_led_probe(struct platform_device *pdev)
+{
+	struct led_regulator_platform_data *pdata = pdev->dev.platform_data;
+	struct regulator_led *led;
+	struct regulator *vcc;
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	vcc = regulator_get_exclusive(&pdev->dev, "vled");
+	if (IS_ERR(vcc)) {
+		dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
+		return PTR_ERR(vcc);
+	}
+
+	led = kzalloc(sizeof(*led), GFP_KERNEL);
+	if (led == NULL) {
+		ret = -ENOMEM;
+		goto err_vcc;
+	}
+
+	led->cdev.max_brightness = led_regulator_get_max_brightness(vcc);
+	if (pdata->brightness > led->cdev.max_brightness) {
+		dev_err(&pdev->dev, "Invalid default brightness %d\n",
+				pdata->brightness);
+		ret = -EINVAL;
+		goto err_led;
+	}
+	led->value = pdata->brightness;
+
+	led->cdev.brightness_set = regulator_led_brightness_set;
+	led->cdev.name = pdata->name;
+	led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	led->vcc = vcc;
+
+	mutex_init(&led->mutex);
+	INIT_WORK(&led->work, led_work);
+
+	platform_set_drvdata(pdev, led);
+
+	ret = led_classdev_register(&pdev->dev, &led->cdev);
+	if (ret < 0) {
+		cancel_work_sync(&led->work);
+		goto err_led;
+	}
+
+	/* to expose the default value to userspace */
+	led->cdev.brightness = led->value;
+
+	/* Set the default led status */
+	regulator_led_set_value(led);
+
+	return 0;
+
+err_led:
+	kfree(led);
+err_vcc:
+	regulator_put(vcc);
+	return ret;
+}
+
+static int __devexit regulator_led_remove(struct platform_device *pdev)
+{
+	struct regulator_led *led = platform_get_drvdata(pdev);
+
+	led_classdev_unregister(&led->cdev);
+	cancel_work_sync(&led->work);
+	regulator_led_disable(led);
+	regulator_put(led->vcc);
+	kfree(led);
+	return 0;
+}
+
+static struct platform_driver regulator_led_driver = {
+	.driver = {
+		   .name  = "leds-regulator",
+		   .owner = THIS_MODULE,
+		   },
+	.probe  = regulator_led_probe,
+	.remove = __devexit_p(regulator_led_remove),
+};
+
+static int __init regulator_led_init(void)
+{
+	return platform_driver_register(&regulator_led_driver);
+}
+module_init(regulator_led_init);
+
+static void __exit regulator_led_exit(void)
+{
+	platform_driver_unregister(&regulator_led_driver);
+}
+module_exit(regulator_led_exit);
+
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("Regulator driven LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:leds-regulator");
diff --git a/include/linux/leds-regulator.h b/include/linux/leds-regulator.h
new file mode 100644
index 000000000000..5a8eb389aab8
--- /dev/null
+++ b/include/linux/leds-regulator.h
@@ -0,0 +1,46 @@
+/*
+ * leds-regulator.h - platform data structure for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_LEDS_REGULATOR_H
+#define __LINUX_LEDS_REGULATOR_H
+
+/*
+ * Use "vled" as supply id when declaring the regulator consumer:
+ *
+ * static struct regulator_consumer_supply pcap_regulator_VVIB_consumers [] = {
+ * 	{ .dev_name = "leds-regulator.0", supply = "vled" },
+ * };
+ *
+ * If you have several regulator driven LEDs, you can append a numerical id to
+ * .dev_name as done above, and use the same id when declaring the platform
+ * device:
+ *
+ * static struct led_regulator_platform_data a780_vibrator_data = {
+ * 	.name   = "a780::vibrator",
+ * };
+ *
+ * static struct platform_device a780_vibrator = {
+ * 	.name = "leds-regulator",
+ * 	.id   = 0,
+ * 	.dev  = {
+ * 		.platform_data = &a780_vibrator_data,
+ * 	},
+ * };
+ */
+
+#include <linux/leds.h>
+
+struct led_regulator_platform_data {
+	char *name;                     /* LED name as expected by LED class */
+	enum led_brightness brightness; /* initial brightness value */
+};
+
+#endif /* __LINUX_LEDS_REGULATOR_H */
-- 
cgit v1.2.3


From 9695fff8f84d7ab849139750036e443b85804edd Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sat, 28 Nov 2009 12:55:51 +0100
Subject: leds: leds-pca9532.h- indent with tabs, not spaces

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-pca9532.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index 96eea90f01a8..f158eb1149aa 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -32,7 +32,7 @@ struct pca9532_led {
 	struct i2c_client *client;
 	char *name;
 	struct led_classdev ldev;
-       struct work_struct work;
+	struct work_struct work;
 	enum pca9532_type type;
 	enum pca9532_state state;
 };
-- 
cgit v1.2.3


From 1998111582f5d726ca4dbf9d68935d9e7c994374 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sun, 29 Nov 2009 13:12:21 +0100
Subject: leds: leds-lp3944.h - remove unneeded includes

These were needed in the first version of the driver because we used to expose
workqueue and led class details in the header file, now we don't.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-lp3944.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/leds-lp3944.h b/include/linux/leds-lp3944.h
index afc9f9fd70f5..2618aa9063bc 100644
--- a/include/linux/leds-lp3944.h
+++ b/include/linux/leds-lp3944.h
@@ -12,9 +12,6 @@
 #ifndef __LINUX_LEDS_LP3944_H
 #define __LINUX_LEDS_LP3944_H
 
-#include <linux/leds.h>
-#include <linux/workqueue.h>
-
 #define LP3944_LED0 0
 #define LP3944_LED1 1
 #define LP3944_LED2 2
-- 
cgit v1.2.3


From cfc3899fcd0b3b990b29d3d33f75f4edf715e7d1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 10 Nov 2009 17:20:40 +0000
Subject: backlight: Pass device through notify callback in the pwm driver

Add the device to the notify callback's arguments in the PWM backlight
driver. This brings the notify callback into line with the other
callbacks defined by this driver.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/pwm_bl.c | 9 ++++++---
 include/linux/pwm_backlight.h    | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index df9e0b32cf39..9d2ec2a1cce8 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -22,8 +22,10 @@
 
 struct pwm_bl_data {
 	struct pwm_device	*pwm;
+	struct device		*dev;
 	unsigned int		period;
-	int			(*notify)(int brightness);
+	int			(*notify)(struct device *,
+					  int brightness);
 };
 
 static int pwm_backlight_update_status(struct backlight_device *bl)
@@ -39,7 +41,7 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
 		brightness = 0;
 
 	if (pb->notify)
-		brightness = pb->notify(brightness);
+		brightness = pb->notify(pb->dev, brightness);
 
 	if (brightness == 0) {
 		pwm_config(pb->pwm, 0, pb->period);
@@ -88,6 +90,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
 	pb->period = data->pwm_period_ns;
 	pb->notify = data->notify;
+	pb->dev = &pdev->dev;
 
 	pb->pwm = pwm_request(data->pwm_id, "backlight");
 	if (IS_ERR(pb->pwm)) {
@@ -146,7 +149,7 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
 	struct pwm_bl_data *pb = dev_get_drvdata(&bl->dev);
 
 	if (pb->notify)
-		pb->notify(0);
+		pb->notify(pb->dev, 0);
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 	return 0;
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 7a9754c96775..01b3d759f1fc 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -10,7 +10,7 @@ struct platform_pwm_backlight_data {
 	unsigned int dft_brightness;
 	unsigned int pwm_period_ns;
 	int (*init)(struct device *dev);
-	int (*notify)(int brightness);
+	int (*notify)(struct device *dev, int brightness);
 	void (*exit)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From 733421516b42c44b9e21f1793c430cc801ef8324 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:27 +0100
Subject: sched: Move TASK_STATE_TO_CHAR_STR near the TASK_state bits

So that we don't keep forgetting about it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.815779372@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 973b2b89f86d..c28ed1b1d7c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,6 +193,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
@@ -2595,8 +2597,6 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
-
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 44d90df6b757c59651ddd55f1a84f28132b50d29 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:28 +0100
Subject: sched: Add missing state chars to TASK_STATE_TO_CHAR_STR

We grew 3 new task states since the last time someone touched
it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.892737686@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c28ed1b1d7c2..94858df38a87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,7 +193,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-- 
cgit v1.2.3


From e1781538cf5c870ab696e9b8f0a5c498d3900f2f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:30 +0100
Subject: sched: Assert task state bits at build time

Since everybody is lazy and prone to forgetting things, make the
compiler help us a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121830.060186433@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       | 18 ++++++++++--------
 include/linux/sched.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 96361e8fa3a8..f560325c444f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,14 +134,14 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char *task_state_array[] = {
-	"R (running)",		/*  0 */
-	"S (sleeping)",		/*  1 */
-	"D (disk sleep)",	/*  2 */
-	"T (stopped)",		/*  4 */
-	"t (tracing stop)",	/*  8 */
-	"Z (zombie)",		/* 16 */
-	"X (dead)",		/* 32 */
-	"x (dead)",		/* 64 */
+	"R (running)",		/*   0 */
+	"S (sleeping)",		/*   1 */
+	"D (disk sleep)",	/*   2 */
+	"T (stopped)",		/*   4 */
+	"t (tracing stop)",	/*   8 */
+	"Z (zombie)",		/*  16 */
+	"X (dead)",		/*  32 */
+	"x (dead)",		/*  64 */
 	"K (wakekill)",		/* 128 */
 	"W (waking)",		/* 256 */
 };
@@ -151,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
 	const char **p = &task_state_array[0];
 
+	BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
 	while (state) {
 		p++;
 		state >>= 1;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94858df38a87..37543876ddf5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -192,9 +192,13 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
+#define TASK_STATE_MAX		512
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
+extern char ___assert_task_state[1 - 2*!!(
+		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-- 
cgit v1.2.3


From e24c745272072fd2abe55209f1949b7b7ee602a7 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 14 Dec 2009 14:20:22 -0800
Subject: spi: controller driver for Designware SPI core

Driver for the Designware SPI core, it supports multipul interfaces like
PCI/APB etc.  User can use "dw_apb_ssi_db.pdf" from Synopsys as HW
datasheet.

[randy.dunlap@oracle.com: fix build]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig        |  10 +
 drivers/spi/Makefile       |   2 +
 drivers/spi/dw_spi.c       | 944 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/spi/dw_spi_pci.c   | 169 ++++++++
 include/linux/spi/dw_spi.h | 212 ++++++++++
 5 files changed, 1337 insertions(+)
 create mode 100644 drivers/spi/dw_spi.c
 create mode 100644 drivers/spi/dw_spi_pci.c
 create mode 100644 include/linux/spi/dw_spi.h

(limited to 'include')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 07e5453f7b18..d7c1741c4c5b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -292,6 +292,16 @@ config SPI_NUC900
 # Add new SPI master controllers in alphabetical order above this line
 #
 
+config SPI_DESIGNWARE
+	bool "DesignWare SPI controller core support"
+	depends on SPI_MASTER
+	help
+	  general driver for SPI controller core from DesignWare
+
+config SPI_DW_PCI
+	tristate "PCI interface driver for DW SPI core"
+	depends on SPI_DESIGNWARE && PCI
+
 #
 # There are lots of SPI device types, with sensors and memory
 # being probably the most widely used ones.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ed8c1675b52f..a909e39f7e7c 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,8 @@ obj-$(CONFIG_SPI_BFIN)			+= spi_bfin5xx.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
+obj-$(CONFIG_SPI_DESIGNWARE)		+= dw_spi.o
+obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_pci.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
new file mode 100644
index 000000000000..31620fae77be
--- /dev/null
+++ b/drivers/spi/dw_spi.c
@@ -0,0 +1,944 @@
+/*
+ * dw_spi.c - Designware SPI core controller driver (refer pxa2xx_spi.c)
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+
+#define START_STATE	((void *)0)
+#define RUNNING_STATE	((void *)1)
+#define DONE_STATE	((void *)2)
+#define ERROR_STATE	((void *)-1)
+
+#define QUEUE_RUNNING	0
+#define QUEUE_STOPPED	1
+
+#define MRST_SPI_DEASSERT	0
+#define MRST_SPI_ASSERT		1
+
+/* Slave spi_dev related */
+struct chip_data {
+	u16 cr0;
+	u8 cs;			/* chip select pin */
+	u8 n_bytes;		/* current is a 1/2/4 byte op */
+	u8 tmode;		/* TR/TO/RO/EEPROM */
+	u8 type;		/* SPI/SSP/MicroWire */
+
+	u8 poll_mode;		/* 1 means use poll mode */
+
+	u32 dma_width;
+	u32 rx_threshold;
+	u32 tx_threshold;
+	u8 enable_dma;
+	u8 bits_per_word;
+	u16 clk_div;		/* baud rate divider */
+	u32 speed_hz;		/* baud rate */
+	int (*write)(struct dw_spi *dws);
+	int (*read)(struct dw_spi *dws);
+	void (*cs_control)(u32 command);
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int spi_show_regs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+#define SPI_REGS_BUFSIZE	1024
+static ssize_t  spi_show_regs(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct dw_spi *dws;
+	char *buf;
+	u32 len = 0;
+	ssize_t ret;
+
+	dws = file->private_data;
+
+	buf = kzalloc(SPI_REGS_BUFSIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"MRST SPI0 registers:\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL0: \t\t0x%08x\n", dw_readl(dws, ctrl0));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL1: \t\t0x%08x\n", dw_readl(dws, ctrl1));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SSIENR: \t0x%08x\n", dw_readl(dws, ssienr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SER: \t\t0x%08x\n", dw_readl(dws, ser));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"BAUDR: \t\t0x%08x\n", dw_readl(dws, baudr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFTLR: \t0x%08x\n", dw_readl(dws, txfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFTLR: \t0x%08x\n", dw_readl(dws, rxfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFLR: \t\t0x%08x\n", dw_readl(dws, txflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFLR: \t\t0x%08x\n", dw_readl(dws, rxflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SR: \t\t0x%08x\n", dw_readl(dws, sr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"IMR: \t\t0x%08x\n", dw_readl(dws, imr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"ISR: \t\t0x%08x\n", dw_readl(dws, isr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMACR: \t\t0x%08x\n", dw_readl(dws, dmacr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMATDLR: \t0x%08x\n", dw_readl(dws, dmatdlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMARDLR: \t0x%08x\n", dw_readl(dws, dmardlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+
+	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations mrst_spi_regs_ops = {
+	.owner		= THIS_MODULE,
+	.open		= spi_show_regs_open,
+	.read		= spi_show_regs,
+};
+
+static int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+	dws->debugfs = debugfs_create_dir("mrst_spi", NULL);
+	if (!dws->debugfs)
+		return -ENOMEM;
+
+	debugfs_create_file("registers", S_IFREG | S_IRUGO,
+		dws->debugfs, (void *)dws, &mrst_spi_regs_ops);
+	return 0;
+}
+
+static void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+	if (dws->debugfs)
+		debugfs_remove_recursive(dws->debugfs);
+}
+
+#else
+static inline int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+}
+
+static inline void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static void wait_till_not_busy(struct dw_spi *dws)
+{
+	unsigned long end = jiffies + usecs_to_jiffies(1000);
+
+	while (time_before(jiffies, end)) {
+		if (!(dw_readw(dws, sr) & SR_BUSY))
+			return;
+	}
+	dev_err(&dws->master->dev,
+		"DW SPI: Stutus keeps busy for 1000us after a read/write!\n");
+}
+
+static void flush(struct dw_spi *dws)
+{
+	while (dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		dw_readw(dws, dr);
+
+	wait_till_not_busy(dws);
+}
+
+static void null_cs_control(u32 command)
+{
+}
+
+static int null_writer(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+	dw_writew(dws, dr, 0);
+	dws->tx += n_bytes;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int null_reader(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		dw_readw(dws, dr);
+		dws->rx += n_bytes;
+	}
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u8_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u8 *)(dws->tx));
+	++dws->tx;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u8_reader(struct dw_spi *dws)
+{
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		*(u8 *)(dws->rx) = dw_readw(dws, dr);
+		++dws->rx;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u16_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u16 *)(dws->tx));
+	dws->tx += 2;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u16_reader(struct dw_spi *dws)
+{
+	u16 temp;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		temp = dw_readw(dws, dr);
+		*(u16 *)(dws->rx) = temp;
+		dws->rx += 2;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static void *next_transfer(struct dw_spi *dws)
+{
+	struct spi_message *msg = dws->cur_msg;
+	struct spi_transfer *trans = dws->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		dws->cur_transfer =
+			list_entry(trans->transfer_list.next,
+					struct spi_transfer,
+					transfer_list);
+		return RUNNING_STATE;
+	} else
+		return DONE_STATE;
+}
+
+/*
+ * Note: first step is the protocol driver prepares
+ * a dma-capable memory, and this func just need translate
+ * the virt addr to physical
+ */
+static int map_dma_buffers(struct dw_spi *dws)
+{
+	if (!dws->cur_msg->is_dma_mapped || !dws->dma_inited
+		|| !dws->cur_chip->enable_dma)
+		return 0;
+
+	if (dws->cur_transfer->tx_dma)
+		dws->tx_dma = dws->cur_transfer->tx_dma;
+
+	if (dws->cur_transfer->rx_dma)
+		dws->rx_dma = dws->cur_transfer->rx_dma;
+
+	return 1;
+}
+
+/* Caller already set message->status; dma and pio irqs are blocked */
+static void giveback(struct dw_spi *dws)
+{
+	struct spi_transfer *last_transfer;
+	unsigned long flags;
+	struct spi_message *msg;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	msg = dws->cur_msg;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->prev_chip = dws->cur_chip;
+	dws->cur_chip = NULL;
+	dws->dma_mapped = 0;
+	queue_work(dws->workqueue, &dws->pump_messages);
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	last_transfer = list_entry(msg->transfers.prev,
+					struct spi_transfer,
+					transfer_list);
+
+	if (!last_transfer->cs_change)
+		dws->cs_control(MRST_SPI_DEASSERT);
+
+	msg->state = NULL;
+	if (msg->complete)
+		msg->complete(msg->context);
+}
+
+static void int_error_stop(struct dw_spi *dws, const char *msg)
+{
+	/* Stop and reset hw */
+	flush(dws);
+	spi_enable_chip(dws, 0);
+
+	dev_err(&dws->master->dev, "%s\n", msg);
+	dws->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&dws->pump_transfers);
+}
+
+static void transfer_complete(struct dw_spi *dws)
+{
+	/* Update total byte transfered return count actual bytes read */
+	dws->cur_msg->actual_length += dws->len;
+
+	/* Move to next transfer */
+	dws->cur_msg->state = next_transfer(dws);
+
+	/* Handle end of message */
+	if (dws->cur_msg->state == DONE_STATE) {
+		dws->cur_msg->status = 0;
+		giveback(dws);
+	} else
+		tasklet_schedule(&dws->pump_transfers);
+}
+
+static irqreturn_t interrupt_transfer(struct dw_spi *dws)
+{
+	u16 irq_status, irq_mask = 0x3f;
+
+	irq_status = dw_readw(dws, isr) & irq_mask;
+	/* Error handling */
+	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
+		dw_readw(dws, txoicr);
+		dw_readw(dws, rxoicr);
+		dw_readw(dws, rxuicr);
+		int_error_stop(dws, "interrupt_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
+
+	/* INT comes from tx */
+	if (dws->tx && (irq_status & SPI_INT_TXEI)) {
+		while (dws->tx < dws->tx_end)
+			dws->write(dws);
+
+		if (dws->tx == dws->tx_end) {
+			spi_mask_intr(dws, SPI_INT_TXEI);
+			transfer_complete(dws);
+		}
+	}
+
+	/* INT comes from rx */
+	if (dws->rx && (irq_status & SPI_INT_RXFI)) {
+		if (dws->read(dws))
+			transfer_complete(dws);
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dw_spi_irq(int irq, void *dev_id)
+{
+	struct dw_spi *dws = dev_id;
+
+	if (!dws->cur_msg) {
+		spi_mask_intr(dws, SPI_INT_TXEI);
+		/* Never fail */
+		return IRQ_HANDLED;
+	}
+
+	return dws->transfer_handler(dws);
+}
+
+/* Must be called inside pump_transfers() */
+static void poll_transfer(struct dw_spi *dws)
+{
+	if (dws->tx) {
+		while (dws->write(dws))
+			dws->read(dws);
+	}
+
+	dws->read(dws);
+	transfer_complete(dws);
+}
+
+static void dma_transfer(struct dw_spi *dws, int cs_change)
+{
+}
+
+static void pump_transfers(unsigned long data)
+{
+	struct dw_spi *dws = (struct dw_spi *)data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct spi_device *spi = NULL;
+	struct chip_data *chip = NULL;
+	u8 bits = 0;
+	u8 imask = 0;
+	u8 cs_change = 0;
+	u16 clk_div = 0;
+	u32 speed = 0;
+	u32 cr0 = 0;
+
+	/* Get current state information */
+	message = dws->cur_msg;
+	transfer = dws->cur_transfer;
+	chip = dws->cur_chip;
+	spi = message->spi;
+
+	if (message->state == ERROR_STATE) {
+		message->status = -EIO;
+		goto early_exit;
+	}
+
+	/* Handle end of message */
+	if (message->state == DONE_STATE) {
+		message->status = 0;
+		goto early_exit;
+	}
+
+	/* Delay if requested at end of transfer*/
+	if (message->state == RUNNING_STATE) {
+		previous = list_entry(transfer->transfer_list.prev,
+					struct spi_transfer,
+					transfer_list);
+		if (previous->delay_usecs)
+			udelay(previous->delay_usecs);
+	}
+
+	dws->n_bytes = chip->n_bytes;
+	dws->dma_width = chip->dma_width;
+	dws->cs_control = chip->cs_control;
+
+	dws->rx_dma = transfer->rx_dma;
+	dws->tx_dma = transfer->tx_dma;
+	dws->tx = (void *)transfer->tx_buf;
+	dws->tx_end = dws->tx + transfer->len;
+	dws->rx = transfer->rx_buf;
+	dws->rx_end = dws->rx + transfer->len;
+	dws->write = dws->tx ? chip->write : null_writer;
+	dws->read = dws->rx ? chip->read : null_reader;
+	dws->cs_change = transfer->cs_change;
+	dws->len = dws->cur_transfer->len;
+	if (chip != dws->prev_chip)
+		cs_change = 1;
+
+	cr0 = chip->cr0;
+
+	/* Handle per transfer options for bpw and speed */
+	if (transfer->speed_hz) {
+		speed = chip->speed_hz;
+
+		if (transfer->speed_hz != speed) {
+			speed = transfer->speed_hz;
+			if (speed > dws->max_freq) {
+				printk(KERN_ERR "MRST SPI0: unsupported"
+					"freq: %dHz\n", speed);
+				message->status = -EIO;
+				goto early_exit;
+			}
+
+			/* clk_div doesn't support odd number */
+			clk_div = dws->max_freq / speed;
+			clk_div = (clk_div >> 1) << 1;
+
+			chip->speed_hz = speed;
+			chip->clk_div = clk_div;
+		}
+	}
+	if (transfer->bits_per_word) {
+		bits = transfer->bits_per_word;
+
+		switch (bits) {
+		case 8:
+			dws->n_bytes = 1;
+			dws->dma_width = 1;
+			dws->read = (dws->read != null_reader) ?
+					u8_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u8_writer : null_writer;
+			break;
+		case 16:
+			dws->n_bytes = 2;
+			dws->dma_width = 2;
+			dws->read = (dws->read != null_reader) ?
+					u16_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u16_writer : null_writer;
+			break;
+		default:
+			printk(KERN_ERR "MRST SPI0: unsupported bits:"
+				"%db\n", bits);
+			message->status = -EIO;
+			goto early_exit;
+		}
+
+		cr0 = (bits - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+	}
+	message->state = RUNNING_STATE;
+
+	/* Check if current transfer is a DMA transaction */
+	dws->dma_mapped = map_dma_buffers(dws);
+
+	if (!dws->dma_mapped && !chip->poll_mode) {
+		if (dws->rx)
+			imask |= SPI_INT_RXFI;
+		if (dws->tx)
+			imask |= SPI_INT_TXEI;
+		dws->transfer_handler = interrupt_transfer;
+	}
+
+	/*
+	 * Reprogram registers only if
+	 *	1. chip select changes
+	 *	2. clk_div is changed
+	 *	3. control value changes
+	 */
+	if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div) {
+		spi_enable_chip(dws, 0);
+
+		if (dw_readw(dws, ctrl0) != cr0)
+			dw_writew(dws, ctrl0, cr0);
+
+		/* Set the interrupt mask, for poll mode just diable all int */
+		spi_mask_intr(dws, 0xff);
+		if (!chip->poll_mode)
+			spi_umask_intr(dws, imask);
+
+		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
+		spi_chip_sel(dws, spi->chip_select);
+		spi_enable_chip(dws, 1);
+
+		if (cs_change)
+			dws->prev_chip = chip;
+	}
+
+	if (dws->dma_mapped)
+		dma_transfer(dws, cs_change);
+
+	if (chip->poll_mode)
+		poll_transfer(dws);
+
+	return;
+
+early_exit:
+	giveback(dws);
+	return;
+}
+
+static void pump_messages(struct work_struct *work)
+{
+	struct dw_spi *dws =
+		container_of(work, struct dw_spi, pump_messages);
+	unsigned long flags;
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&dws->lock, flags);
+	if (list_empty(&dws->queue) || dws->run == QUEUE_STOPPED) {
+		dws->busy = 0;
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Make sure we are not already running a message */
+	if (dws->cur_msg) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Extract head of queue */
+	dws->cur_msg = list_entry(dws->queue.next, struct spi_message, queue);
+	list_del_init(&dws->cur_msg->queue);
+
+	/* Initial message state*/
+	dws->cur_msg->state = START_STATE;
+	dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
+						struct spi_transfer,
+						transfer_list);
+	dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
+
+	/* Mark as busy and launch transfers */
+	tasklet_schedule(&dws->pump_transfers);
+
+	dws->busy = 1;
+	spin_unlock_irqrestore(&dws->lock, flags);
+}
+
+/* spi_device use this to queue in their spi_msg */
+static int dw_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_STOPPED) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -ESHUTDOWN;
+	}
+
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = START_STATE;
+
+	list_add_tail(&msg->queue, &dws->queue);
+
+	if (dws->run == QUEUE_RUNNING && !dws->busy) {
+
+		if (dws->cur_transfer || dws->cur_msg)
+			queue_work(dws->workqueue,
+					&dws->pump_messages);
+		else {
+			/* If no other data transaction in air, just go */
+			spin_unlock_irqrestore(&dws->lock, flags);
+			pump_messages(&dws->pump_messages);
+			return 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&dws->lock, flags);
+	return 0;
+}
+
+/* This may be called twice for each spi dev */
+static int dw_spi_setup(struct spi_device *spi)
+{
+	struct dw_spi_chip *chip_info = NULL;
+	struct chip_data *chip;
+
+	if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
+		return -EINVAL;
+
+	/* Only alloc on first setup */
+	chip = spi_get_ctldata(spi);
+	if (!chip) {
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		chip->cs_control = null_cs_control;
+		chip->enable_dma = 0;
+	}
+
+	/*
+	 * Protocol drivers may change the chip settings, so...
+	 * if chip_info exists, use it
+	 */
+	chip_info = spi->controller_data;
+
+	/* chip_info doesn't always exist */
+	if (chip_info) {
+		if (chip_info->cs_control)
+			chip->cs_control = chip_info->cs_control;
+
+		chip->poll_mode = chip_info->poll_mode;
+		chip->type = chip_info->type;
+
+		chip->rx_threshold = 0;
+		chip->tx_threshold = 0;
+
+		chip->enable_dma = chip_info->enable_dma;
+	}
+
+	if (spi->bits_per_word <= 8) {
+		chip->n_bytes = 1;
+		chip->dma_width = 1;
+		chip->read = u8_reader;
+		chip->write = u8_writer;
+	} else if (spi->bits_per_word <= 16) {
+		chip->n_bytes = 2;
+		chip->dma_width = 2;
+		chip->read = u16_reader;
+		chip->write = u16_writer;
+	} else {
+		/* Never take >16b case for MRST SPIC */
+		dev_err(&spi->dev, "invalid wordsize\n");
+		return -EINVAL;
+	}
+	chip->bits_per_word = spi->bits_per_word;
+
+	chip->speed_hz = spi->max_speed_hz;
+	if (chip->speed_hz)
+		chip->clk_div = 25000000 / chip->speed_hz;
+	else
+		chip->clk_div = 8;	/* default value */
+
+	chip->tmode = 0; /* Tx & Rx */
+	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
+	chip->cr0 = (chip->bits_per_word - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode  << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+
+	spi_set_ctldata(spi, chip);
+	return 0;
+}
+
+static void dw_spi_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+	kfree(chip);
+}
+
+static int __init init_queue(struct dw_spi *dws)
+{
+	INIT_LIST_HEAD(&dws->queue);
+	spin_lock_init(&dws->lock);
+
+	dws->run = QUEUE_STOPPED;
+	dws->busy = 0;
+
+	tasklet_init(&dws->pump_transfers,
+			pump_transfers,	(unsigned long)dws);
+
+	INIT_WORK(&dws->pump_messages, pump_messages);
+	dws->workqueue = create_singlethread_workqueue(
+					dev_name(dws->master->dev.parent));
+	if (dws->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int start_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_RUNNING || dws->busy) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -EBUSY;
+	}
+
+	dws->run = QUEUE_RUNNING;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->cur_chip = NULL;
+	dws->prev_chip = NULL;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	queue_work(dws->workqueue, &dws->pump_messages);
+
+	return 0;
+}
+
+static int stop_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+	unsigned limit = 50;
+	int status = 0;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	dws->run = QUEUE_STOPPED;
+	while (!list_empty(&dws->queue) && dws->busy && limit--) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&dws->lock, flags);
+	}
+
+	if (!list_empty(&dws->queue) || dws->busy)
+		status = -EBUSY;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	return status;
+}
+
+static int destroy_queue(struct dw_spi *dws)
+{
+	int status;
+
+	status = stop_queue(dws);
+	if (status != 0)
+		return status;
+	destroy_workqueue(dws->workqueue);
+	return 0;
+}
+
+/* Restart the controller, disable all interrupts, clean rx fifo */
+static void spi_hw_init(struct dw_spi *dws)
+{
+	spi_enable_chip(dws, 0);
+	spi_mask_intr(dws, 0xff);
+	spi_enable_chip(dws, 1);
+	flush(dws);
+}
+
+int __devinit dw_spi_add_host(struct dw_spi *dws)
+{
+	struct spi_master *master;
+	int ret;
+
+	BUG_ON(dws == NULL);
+
+	master = spi_alloc_master(dws->parent_dev, 0);
+	if (!master) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	dws->master = master;
+	dws->type = SSI_MOTO_SPI;
+	dws->prev_chip = NULL;
+	dws->dma_inited = 0;
+	dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
+
+	ret = request_irq(dws->irq, dw_spi_irq, 0,
+			"dw_spi", dws);
+	if (ret < 0) {
+		dev_err(&master->dev, "can not get IRQ\n");
+		goto err_free_master;
+	}
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+	master->bus_num = dws->bus_num;
+	master->num_chipselect = dws->num_cs;
+	master->cleanup = dw_spi_cleanup;
+	master->setup = dw_spi_setup;
+	master->transfer = dw_spi_transfer;
+
+	dws->dma_inited = 0;
+
+	/* Basic HW init */
+	spi_hw_init(dws);
+
+	/* Initial and start queue */
+	ret = init_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem initializing queue\n");
+		goto err_diable_hw;
+	}
+	ret = start_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem starting queue\n");
+		goto err_diable_hw;
+	}
+
+	spi_master_set_devdata(master, dws);
+	ret = spi_register_master(master);
+	if (ret) {
+		dev_err(&master->dev, "problem registering spi master\n");
+		goto err_queue_alloc;
+	}
+
+	mrst_spi_debugfs_init(dws);
+	return 0;
+
+err_queue_alloc:
+	destroy_queue(dws);
+err_diable_hw:
+	spi_enable_chip(dws, 0);
+	free_irq(dws->irq, dws);
+err_free_master:
+	spi_master_put(master);
+exit:
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_add_host);
+
+void __devexit dw_spi_remove_host(struct dw_spi *dws)
+{
+	int status = 0;
+
+	if (!dws)
+		return;
+	mrst_spi_debugfs_remove(dws);
+
+	/* Remove the queue */
+	status = destroy_queue(dws);
+	if (status != 0)
+		dev_err(&dws->master->dev, "dw_spi_remove: workqueue will not "
+			"complete, message memory not freed\n");
+
+	spi_enable_chip(dws, 0);
+	/* Disable clk */
+	spi_set_clk(dws, 0);
+	free_irq(dws->irq, dws);
+
+	/* Disconnect from the SPI framework */
+	spi_unregister_master(dws->master);
+}
+
+int dw_spi_suspend_host(struct dw_spi *dws)
+{
+	int ret = 0;
+
+	ret = stop_queue(dws);
+	if (ret)
+		return ret;
+	spi_enable_chip(dws, 0);
+	spi_set_clk(dws, 0);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_suspend_host);
+
+int dw_spi_resume_host(struct dw_spi *dws)
+{
+	int ret;
+
+	spi_hw_init(dws);
+	ret = start_queue(dws);
+	if (ret)
+		dev_err(&dws->master->dev, "fail to start queue (%d)\n", ret);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_resume_host);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("Driver for DesignWare SPI controller core");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c
new file mode 100644
index 000000000000..34ba69161734
--- /dev/null
+++ b/drivers/spi/dw_spi_pci.c
@@ -0,0 +1,169 @@
+/*
+ * mrst_spi_pci.c - PCI interface driver for DW SPI Core
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME "dw_spi_pci"
+
+struct dw_spi_pci {
+	struct pci_dev		*pdev;
+	struct dw_spi		dws;
+};
+
+static int __devinit spi_pci_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct dw_spi_pci *dwpci;
+	struct dw_spi *dws;
+	int pci_bar = 0;
+	int ret;
+
+	printk(KERN_INFO "DW: found PCI SPI controller(ID: %04x:%04x)\n",
+		pdev->vendor, pdev->device);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dwpci = kzalloc(sizeof(struct dw_spi_pci), GFP_KERNEL);
+	if (!dwpci) {
+		ret = -ENOMEM;
+		goto err_disable;
+	}
+
+	dwpci->pdev = pdev;
+	dws = &dwpci->dws;
+
+	/* Get basic io resource and map it */
+	dws->paddr = pci_resource_start(pdev, pci_bar);
+	dws->iolen = pci_resource_len(pdev, pci_bar);
+
+	ret = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (ret)
+		goto err_kfree;
+
+	dws->regs = ioremap_nocache((unsigned long)dws->paddr,
+				pci_resource_len(pdev, pci_bar));
+	if (!dws->regs) {
+		ret = -ENOMEM;
+		goto err_release_reg;
+	}
+
+	dws->parent_dev = &pdev->dev;
+	dws->bus_num = 0;
+	dws->num_cs = 4;
+	dws->max_freq = 25000000;	/* for Moorestwon */
+	dws->irq = pdev->irq;
+
+	ret = dw_spi_add_host(dws);
+	if (ret)
+		goto err_unmap;
+
+	/* PCI hook and SPI hook use the same drv data */
+	pci_set_drvdata(pdev, dwpci);
+	return 0;
+
+err_unmap:
+	iounmap(dws->regs);
+err_release_reg:
+	pci_release_region(pdev, pci_bar);
+err_kfree:
+	kfree(dwpci);
+err_disable:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static void __devexit spi_pci_remove(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+	iounmap(dwpci->dws.regs);
+	pci_release_region(pdev, 0);
+	kfree(dwpci);
+	pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int spi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = dw_spi_suspend_host(&dwpci->dws);
+	if (ret)
+		return ret;
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	return ret;
+}
+
+static int spi_resume(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+	return dw_spi_resume_host(&dwpci->dws);
+}
+#else
+#define spi_suspend	NULL
+#define spi_resume	NULL
+#endif
+
+static const struct pci_device_id pci_ids[] __devinitdata = {
+	/* Intel Moorestown platform SPI controller 0 */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0800) },
+	{},
+};
+
+static struct pci_driver dw_spi_driver = {
+	.name =		DRIVER_NAME,
+	.id_table =	pci_ids,
+	.probe =	spi_pci_probe,
+	.remove =	__devexit_p(spi_pci_remove),
+	.suspend =	spi_suspend,
+	.resume	=	spi_resume,
+};
+
+static int __init mrst_spi_init(void)
+{
+	return pci_register_driver(&dw_spi_driver);
+}
+
+static void __exit mrst_spi_exit(void)
+{
+	pci_unregister_driver(&dw_spi_driver);
+}
+
+module_init(mrst_spi_init);
+module_exit(mrst_spi_exit);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("PCI interface driver for DW SPI Core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
new file mode 100644
index 000000000000..51b3e771a9a3
--- /dev/null
+++ b/include/linux/spi/dw_spi.h
@@ -0,0 +1,212 @@
+#ifndef DW_SPI_HEADER_H
+#define DW_SPI_HEADER_H
+#include <linux/io.h>
+
+/* Bit fields in CTRLR0 */
+#define SPI_DFS_OFFSET			0
+
+#define SPI_FRF_OFFSET			4
+#define SPI_FRF_SPI			0x0
+#define SPI_FRF_SSP			0x1
+#define SPI_FRF_MICROWIRE		0x2
+#define SPI_FRF_RESV			0x3
+
+#define SPI_MODE_OFFSET			6
+#define SPI_SCPH_OFFSET			6
+#define SPI_SCOL_OFFSET			7
+#define SPI_TMOD_OFFSET			8
+#define	SPI_TMOD_TR			0x0		/* xmit & recv */
+#define SPI_TMOD_TO			0x1		/* xmit only */
+#define SPI_TMOD_RO			0x2		/* recv only */
+#define SPI_TMOD_EPROMREAD		0x3		/* eeprom read mode */
+
+#define SPI_SLVOE_OFFSET		10
+#define SPI_SRL_OFFSET			11
+#define SPI_CFS_OFFSET			12
+
+/* Bit fields in SR, 7 bits */
+#define SR_MASK				0x7f		/* cover 7 bits */
+#define SR_BUSY				(1 << 0)
+#define SR_TF_NOT_FULL			(1 << 1)
+#define SR_TF_EMPT			(1 << 2)
+#define SR_RF_NOT_EMPT			(1 << 3)
+#define SR_RF_FULL			(1 << 4)
+#define SR_TX_ERR			(1 << 5)
+#define SR_DCOL				(1 << 6)
+
+/* Bit fields in ISR, IMR, RISR, 7 bits */
+#define SPI_INT_TXEI			(1 << 0)
+#define SPI_INT_TXOI			(1 << 1)
+#define SPI_INT_RXUI			(1 << 2)
+#define SPI_INT_RXOI			(1 << 3)
+#define SPI_INT_RXFI			(1 << 4)
+#define SPI_INT_MSTI			(1 << 5)
+
+/* TX RX interrupt level threshhold, max can be 256 */
+#define SPI_INT_THRESHOLD		32
+
+enum dw_ssi_type {
+	SSI_MOTO_SPI = 0,
+	SSI_TI_SSP,
+	SSI_NS_MICROWIRE,
+};
+
+struct dw_spi_reg {
+	u32	ctrl0;
+	u32	ctrl1;
+	u32	ssienr;
+	u32	mwcr;
+	u32	ser;
+	u32	baudr;
+	u32	txfltr;
+	u32	rxfltr;
+	u32	txflr;
+	u32	rxflr;
+	u32	sr;
+	u32	imr;
+	u32	isr;
+	u32	risr;
+	u32	txoicr;
+	u32	rxoicr;
+	u32	rxuicr;
+	u32	msticr;
+	u32	icr;
+	u32	dmacr;
+	u32	dmatdlr;
+	u32	dmardlr;
+	u32	idr;
+	u32	version;
+	u32	dr;		/* Currently oper as 32 bits,
+				though only low 16 bits matters */
+} __packed;
+
+struct dw_spi {
+	struct spi_master	*master;
+	struct spi_device	*cur_dev;
+	struct device		*parent_dev;
+	enum dw_ssi_type	type;
+
+	void __iomem		*regs;
+	unsigned long		paddr;
+	u32			iolen;
+	int			irq;
+	u32			max_freq;	/* max bus freq supported */
+
+	u16			bus_num;
+	u16			num_cs;		/* supported slave numbers */
+
+	/* Driver message queue */
+	struct workqueue_struct	*workqueue;
+	struct work_struct	pump_messages;
+	spinlock_t		lock;
+	struct list_head	queue;
+	int			busy;
+	int			run;
+
+	/* Message Transfer pump */
+	struct tasklet_struct	pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message	*cur_msg;
+	struct spi_transfer	*cur_transfer;
+	struct chip_data	*cur_chip;
+	struct chip_data	*prev_chip;
+	size_t			len;
+	void			*tx;
+	void			*tx_end;
+	void			*rx;
+	void			*rx_end;
+	int			dma_mapped;
+	dma_addr_t		rx_dma;
+	dma_addr_t		tx_dma;
+	size_t			rx_map_len;
+	size_t			tx_map_len;
+	u8			n_bytes;	/* current is a 1/2 bytes op */
+	u8			max_bits_per_word;	/* maxim is 16b */
+	u32			dma_width;
+	int			cs_change;
+	int			(*write)(struct dw_spi *dws);
+	int			(*read)(struct dw_spi *dws);
+	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
+	void			(*cs_control)(u32 command);
+
+	/* Dma info */
+	int			dma_inited;
+	struct dma_chan		*txchan;
+	struct dma_chan		*rxchan;
+	int			txdma_done;
+	int			rxdma_done;
+	u64			tx_param;
+	u64			rx_param;
+	struct device		*dma_dev;
+	dma_addr_t		dma_addr;
+
+	/* Bus interface info */
+	void			*priv;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
+};
+
+#define dw_readl(dw, name) \
+	__raw_readl(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writel(dw, name, val) \
+	__raw_writel((val), &(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_readw(dw, name) \
+	__raw_readw(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writew(dw, name, val) \
+	__raw_writew((val), &(((struct dw_spi_reg *)dw->regs)->name))
+
+static inline void spi_enable_chip(struct dw_spi *dws, int enable)
+{
+	dw_writel(dws, ssienr, (enable ? 1 : 0));
+}
+
+static inline void spi_set_clk(struct dw_spi *dws, u16 div)
+{
+	dw_writel(dws, baudr, div);
+}
+
+static inline void spi_chip_sel(struct dw_spi *dws, u16 cs)
+{
+	if (cs > dws->num_cs)
+		return;
+	dw_writel(dws, ser, 1 << cs);
+}
+
+/* Disable IRQ bits */
+static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) & ~mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/* Enable IRQ bits */
+static inline void spi_umask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) | mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/*
+ * Each SPI slave device to work with dw_api controller should
+ * has such a structure claiming its working mode (PIO/DMA etc),
+ * which can be save in the "controller_data" member of the
+ * struct spi_device
+ */
+struct dw_spi_chip {
+	u8 poll_mode;	/* 0 for contoller polling mode */
+	u8 type;	/* SPI/SSP/Micrwire */
+	u8 enable_dma;
+	void (*cs_control)(u32 command);
+};
+
+extern int dw_spi_add_host(struct dw_spi *dws);
+extern void dw_spi_remove_host(struct dw_spi *dws);
+extern int dw_spi_suspend_host(struct dw_spi *dws);
+extern int dw_spi_resume_host(struct dw_spi *dws);
+#endif /* DW_SPI_HEADER_H */
-- 
cgit v1.2.3


From 9afa2fb6c13501e5b3536d15344fce4e5442c469 Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Wed, 2 Dec 2009 19:51:54 -0500
Subject: fsstack/ecryptfs: remove unused get_nlinks param to
 fsstack_copy_attr_all

This get_nlinks parameter was never used by the only mainline user,
ecryptfs; and it has never been used by unionfs or wrapfs either.

Acked-by: Dustin Kirkland <kirkland@canonical.com>
Acked-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c     |  2 +-
 fs/ecryptfs/inode.c      |  6 +++---
 fs/ecryptfs/main.c       |  2 +-
 fs/stack.c               | 17 +++--------------
 include/linux/fs_stack.h |  4 +---
 5 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
 
-		fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 	}
 out:
 	return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..429ca0b3ba08 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 	if (new_dir != old_dir)
-		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	rc = notify_change(lower_dentry, ia);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c7b1ee..567bc4b9f70a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..0e20e43ad740 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -14,11 +14,8 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 {
 	dest->i_mode = src->i_mode;
 	dest->i_uid = src->i_uid;
@@ -29,14 +26,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 	dest->i_ctime = src->i_ctime;
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
-
-	/*
-	 * Update the nlinks AFTER updating the above fields, because the
-	 * get_links callback may depend on them.
-	 */
-	if (!get_nlinks)
-		dest->i_nlink = src->i_nlink;
-	else
-		dest->i_nlink = (*get_nlinks)(dest);
+	dest->i_nlink = src->i_nlink;
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ceeefc9..aa60311900dd 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -8,9 +8,7 @@
 #include <linux/fs.h>
 
 /* externs for fs/stack.c */
-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *));
-
+extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
 extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
 
 /* inlines */
-- 
cgit v1.2.3


From 1b8ab8159ef8f818f870a1d2e3b6953d80eefd3f Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Thu, 3 Dec 2009 21:56:09 -0500
Subject: VFS/fsstack: handle 32-bit smp + preempt + large files in
 fsstack_copy_inode_size

Copy the inode size and blocks from one inode to another correctly on 32-bit
systems with CONFIG_SMP, CONFIG_PREEMPT, or CONFIG_LBDAF.  Use proper inode
spinlocks only when i_size/i_blocks cannot fit in one 32-bit word.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stack.c               | 54 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs_stack.h |  2 +-
 2 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/stack.c b/fs/stack.c
index 0e20e43ad740..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,10 +7,58 @@
  * This function cannot be inlined since i_size_{read,write} is rather
  * heavy-weight on 32-bit systems
  */
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 {
-	i_size_write(dst, i_size_read((struct inode *)src));
-	dst->i_blocks = src->i_blocks;
+	loff_t i_size;
+	blkcnt_t i_blocks;
+
+	/*
+	 * i_size_read() includes its own seqlocking and protection from
+	 * preemption (see include/linux/fs.h): we need nothing extra for
+	 * that here, and prefer to avoid nesting locks than attempt to keep
+	 * i_size and i_blocks in sync together.
+	 */
+	i_size = i_size_read(src);
+
+	/*
+	 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+	 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+	 * though stat's generic_fillattr() doesn't bother, and we won't be
+	 * applying quotas (where i_blocks does become important) at the
+	 * upper level.
+	 *
+	 * We don't actually know what locking is used at the lower level;
+	 * but if it's a filesystem that supports quotas, it will be using
+	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
+	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+	 * holes; but its i_blocks cannot carry into the upper long without
+	 * almost 2TB swap - let's ignore that case.
+	 */
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_lock(&src->i_lock);
+	i_blocks = src->i_blocks;
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&src->i_lock);
+
+	/*
+	 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+	 * fsstack_copy_inode_size() to hold some lock around
+	 * i_size_write(), otherwise i_size_read() may spin forever (see
+	 * include/linux/fs.h).  We don't necessarily hold i_mutex when this
+	 * is called, so take i_lock for that case.
+	 *
+	 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+	 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+	 * for that case too, and do both at once by combining the tests.
+	 *
+	 * There is none of this locking overhead in the 64-bit case.
+	 */
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_lock(&dst->i_lock);
+	i_size_write(dst, i_size);
+	dst->i_blocks = i_blocks;
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&dst->i_lock);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index aa60311900dd..da317c7163ab 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -9,7 +9,7 @@
 
 /* externs for fs/stack.c */
 extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
+extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
 
 /* inlines */
 static inline void fsstack_copy_attr_atime(struct inode *dest,
-- 
cgit v1.2.3


From 76b7e0058d09f8104387980a690001681c04cc0a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:20 +0100
Subject: fix up O_SYNC comments

Proper Posix O_SYNC handling only made it into 2.6.33, not 2.6.32.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/fcntl.h | 2 +-
 arch/mips/include/asm/fcntl.h  | 2 +-
 arch/sparc/include/asm/fcntl.h | 2 +-
 include/asm-generic/fcntl.h    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 21b1117a0c61..70145cbb21cb 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -16,7 +16,7 @@
 #define O_NOATIME	04000000
 #define O_CLOEXEC	010000000 /* set close_on_exec */
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h
index 7c6681aa2ab8..e482fe90fe88 100644
--- a/arch/mips/include/asm/fcntl.h
+++ b/arch/mips/include/asm/fcntl.h
@@ -19,7 +19,7 @@
 #define FASYNC		0x1000	/* fcntl, for BSD compatibility */
 #define O_LARGEFILE	0x2000	/* allow large file opens */
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h
index 3b9cfb39175e..38f37b333cc7 100644
--- a/arch/sparc/include/asm/fcntl.h
+++ b/arch/sparc/include/asm/fcntl.h
@@ -19,7 +19,7 @@
 #define O_NOATIME	0x200000
 #define O_CLOEXEC	0x400000
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 681ddf3e844c..fcd268ce0674 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -51,7 +51,7 @@
 #endif
 
 /*
- * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
  * the O_SYNC flag.  We continue to use the existing numerical value
  * for O_DSYNC semantics now, but using the correct symbolic name for it.
  * This new value is used to request true Posix O_SYNC semantics.  It is
-- 
cgit v1.2.3


From 7a0ad10c367ab57c899d340372f37880cbe6ab52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:40 +0100
Subject: fold do_sync_file_range into sys_sync_file_range

We recently go rid of all callers of do_sync_file_range as they're better
served with vfs_fsync or the filemap_write_and_wait.  Now that
do_sync_file_range is down to a single caller fold it into it so that people
don't start using it again accidentally.  While at it also switch it from
using __filemap_fdatawrite_range(..., WB_SYNC_ALL) to the more clear
filemap_fdatawrite_range().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c          | 59 +++++++++++++++++++++---------------------------------
 include/linux/fs.h |  4 ----
 2 files changed, 23 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 {
 	int ret;
 	struct file *file;
+	struct address_space *mapping;
 	loff_t endbyte;			/* inclusive */
 	int fput_needed;
 	umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 			!S_ISLNK(i_mode))
 		goto out_put;
 
-	ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+	mapping = file->f_mapping;
+	if (!mapping) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	ret = 0;
+	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WRITE) {
+		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
 out_put:
 	fput_light(file, fput_needed);
 out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
 }
 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
 #endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			  loff_t endbyte, unsigned int flags)
-{
-	int ret;
-
-	if (!mapping) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = 0;
-	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WRITE) {
-		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_ALL);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-	}
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66bc0a54b284..77a975089d9a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,10 +1095,6 @@ struct file_lock {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/* fs/sync.c */
-extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			loff_t endbyte, unsigned int flags);
-
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-- 
cgit v1.2.3


From eaff8079d4f1016a12e34ab323737314f24127dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:25:01 +0100
Subject: kill I_LOCK

After I_SYNC was split from I_LOCK the leftover is always used together with
I_NEW and thus superflous.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/inode.c             |  2 +-
 fs/inode.c                  | 26 +++++++++++++-------------
 fs/jfs/jfs_txnmgr.c         |  2 +-
 fs/ntfs/inode.c             |  6 +++---
 fs/ubifs/file.c             |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 fs/xfs/xfs_iget.c           |  4 ++--
 include/linux/fs.h          | 36 ++++++++++++++++--------------------
 include/linux/writeback.h   |  3 +--
 9 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa793da..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
  * directory entry when gfs2_inode_lookup() is invoked. Part of the code
  * segment inside gfs2_inode_lookup code needs to get moved around.
  *
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
  **/
 
 void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
 	 * Prevent speculative execution through spin_unlock(&inode_lock);
 	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_LOCK);
+	wake_up_bit(&inode->i_state, __I_NEW);
 }
 
 /**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
 	}
 #endif
 	/*
-	 * This is special!  We do not need the spinlock when clearing I_LOCK,
+	 * This is special!  We do not need the spinlock when clearing I_NEW,
 	 * because we're guaranteed that nobody else tries to do anything about
 	 * the state of the inode when it is locked, as we just created it (so
-	 * there can be no old holders that haven't tested I_LOCK).
+	 * there can be no old holders that haven't tested I_NEW).
 	 * However we must emit the memory barrier so that other CPUs reliably
-	 * see the clearing of I_LOCK after the other inode initialisation has
+	 * see the clearing of I_NEW after the other inode initialisation has
 	 * completed.
 	 */
 	smp_mb();
-	WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
-	inode->i_state &= ~(I_LOCK|I_NEW);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
 	wake_up_inode(inode);
 }
 EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 				goto set_failed;
 
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		if (!old) {
 			inode->i_ino = ino;
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 
 	while (1) {
 		struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
  * until the deletion _might_ have completed.  Callers are responsible
  * to recheck inode state.
  *
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
  * wake_up_inode() after removing from the hash list will DTRT.
  *
  * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
 static void __wait_on_freeing_inode(struct inode *inode)
 {
 	wait_queue_head_t *wq;
-	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
-	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_lock);
 	schedule();
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		 */
 		/*
 		 * I believe this code is no longer needed.  Splitting I_LOCK
-		 * into two bits, I_LOCK and I_SYNC should prevent this
+		 * into two bits, I_NEW and I_SYNC should prevent this
 		 * deadlock as well.  But since I don't have a JFS testload
 		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
 		 * Joern
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
  * the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
  *    is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
  * necessary fields in @vi as well as initializing the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
  * normal directory inodes.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
  *
  * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
  * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
  * set as well. However, UBIFS disables readahead.
  */
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
 	struct inode		*inode = &ip->i_vnode;
 
 	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW|I_LOCK;
+	inode->i_state = I_NEW;
 	inode_add_to_lists(ip->i_mount->m_super, inode);
 
 	inode->i_mode	= ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2a46f1..fa402a6bbbcf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
 	ip->i_new_size = 0;
 
 	/* prevent anyone from using this yet */
-	VFS_I(ip)->i_state = I_NEW|I_LOCK;
+	VFS_I(ip)->i_state = I_NEW;
 
 	return ip;
 }
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
 			trace_xfs_iget_reclaim(ip);
 			goto out_error;
 		}
-		inode->i_state = I_LOCK|I_NEW;
+		inode->i_state = I_NEW;
 	} else {
 		/* If the VFS inode is being torn down, pause and try again. */
 		if (!igrab(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 77a975089d9a..cca191933ff6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1587,7 +1587,7 @@ struct super_operations {
  * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
  * various stages of removing an inode.
  *
- * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
  *
  * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
  *			fdatasync().  i_atime is the usual cause.
@@ -1596,8 +1596,14 @@ struct super_operations {
  *			don't have to write inode on fdatasync() when only
  *			mtime has changed in it.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
- * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
- *			are cleared by unlock_new_inode(), called from iget().
+ * I_NEW		Serves as both a mutex and completion notification.
+ *			New inodes set I_NEW.  If two processes both create
+ *			the same inode, one of them will release its inode and
+ *			wait for I_NEW to be released before returning.
+ *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ *			also cause waiting on I_NEW, without I_NEW actually
+ *			being set.  find_inode() uses this to prevent returning
+ *			nearly-dead inodes.
  * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
  *			is zero.  I_FREEING must be set when I_WILL_FREE is
  *			cleared.
@@ -1611,20 +1617,11 @@ struct super_operations {
  *			prohibited for many purposes.  iget() must wait for
  *			the inode to be completely released, then create it
  *			anew.  Other functions will just ignore such inodes,
- *			if appropriate.  I_LOCK is used for waiting.
+ *			if appropriate.  I_NEW is used for waiting.
  *
- * I_LOCK		Serves as both a mutex and completion notification.
- *			New inodes set I_LOCK.  If two processes both create
- *			the same inode, one of them will release its inode and
- *			wait for I_LOCK to be released before returning.
- *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- *			also cause waiting on I_LOCK, without I_LOCK actually
- *			being set.  find_inode() uses this to prevent returning
- *			nearly-dead inodes.
- * I_SYNC		Similar to I_LOCK, but limited in scope to writeback
- *			of inode dirty data.  Having a separate lock for this
- *			purpose reduces latency and prevents some filesystem-
- *			specific deadlocks.
+ * I_SYNC		Synchonized write of dirty inode data.  The bits is
+ *			set during data writeback, and cleared with a wakeup
+ *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
@@ -1633,13 +1630,12 @@ struct super_operations {
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
 #define I_DIRTY_PAGES		4
-#define I_NEW			8
+#define __I_NEW			3
+#define I_NEW			(1 << __I_NEW)
 #define I_WILL_FREE		16
 #define I_FREEING		32
 #define I_CLEAR			64
-#define __I_LOCK		7
-#define I_LOCK			(1 << __I_LOCK)
-#define __I_SYNC		8
+#define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 705f01fe413a..c18c008f4bbf 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
-							TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 static inline void inode_sync_wait(struct inode *inode)
 {
-- 
cgit v1.2.3


From a2770d86b33024f71df269fde2de096df89d6a48 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 12:51:05 -0800
Subject: Revert "fix mismerge with Trond's stuff (create_mnt_ns() export is
 gone now)"

This reverts commit e9496ff46a20a8592fdc7bdaaf41b45eb808d310. Quoth Al:

 "it's dependent on a lot of other stuff not currently in mainline
  and badly broken with current fs/namespace.c.  Sorry, badly
  out-of-order cherry-pick from old queue.

  PS: there's a large pending series reworking the refcounting and
  lifetime rules for vfsmounts that will, among other things, allow to
  rip a subtree away _without_ dissolving connections in it, to be
  garbage-collected when all active references are gone.  It's
  considerably saner wrt "is the subtree busy" logics, but it's nowhere
  near being ready for merge at the moment; this changeset is one of the
  things becoming possible with that sucker, but it certainly shouldn't
  have been picked during this cycle.  My apologies..."

Noticed-by: Eric Paris <eparis@redhat.com>
Requested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c                | 3 ++-
 fs/nfs/super.c                | 8 ++++++++
 include/linux/mnt_namespace.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index faab1273281e..7d70d63ceb29 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,6 +2080,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
+EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d5b112bcf3de..ce907efc5508 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,13 +2648,21 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
+	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
+	ns_private = create_mnt_ns(root_mnt);
+	ret = PTR_ERR(ns_private);
+	if (IS_ERR(ns_private))
+		goto out_mntput;
+
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
+	put_mnt_ns(ns_private);
+
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d9ebf1037dfa..d74785c2393a 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,6 +23,7 @@ struct proc_mounts {
 
 struct fs_struct;
 
+extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From b6e3224fb20954f155e41ec5709b2ab70b50ae2d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 13:23:24 -0800
Subject: Revert "task_struct: make journal_info conditional"

This reverts commit e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319, as
requested by Alexey:

 "I think I gave a good enough arguments to not merge it.
  To iterate:
   * patch makes impossible to start using ext3 on EXT3_FS=n kernels
     without reboot.
   * this is done only for one pointer on task_struct"

  None of config options which define task_struct are tristate directly
  or effectively."

Requested-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig                | 4 ----
 fs/btrfs/Kconfig          | 1 -
 fs/ext4/Kconfig           | 1 -
 fs/gfs2/Kconfig           | 1 -
 fs/jbd/Kconfig            | 1 -
 fs/jbd2/Kconfig           | 1 -
 fs/nilfs2/Kconfig         | 1 -
 fs/reiserfs/Kconfig       | 1 -
 include/linux/init_task.h | 8 +-------
 include/linux/sched.h     | 2 --
 10 files changed, 1 insertion(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad628..64d44efad7a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
 
 if BLOCK
 
-config FS_JOURNAL_INFO
-	bool
-	default n
-
 source "fs/ext2/Kconfig"
 source "fs/ext3/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bfb..7bb3c020e570 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
-	select FS_JOURNAL_INFO
 	help
 	  Btrfs is a new filesystem with extents, writable snapshotting,
 	  support for multiple devices and many more features.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e4..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
 	tristate "The Extended 4 (ext4) filesystem"
 	select JBD2
 	select CRC16
-	select FS_JOURNAL_INFO
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa6..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
 	select SLOW_WORK
 	select QUOTA
 	select QUOTACTL
-	select FS_JOURNAL_INFO
 	help
 	  A cluster filesystem.
 
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd4..4e28beeed157 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
 config JBD
 	tristate
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journalling layer for block devices.  It is
 	  currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdfd..f32f346f4b0a 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
 config JBD2
 	tristate
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journaling layer for block devices that support
 	  both 32-bit and 64-bit block numbers.  It is currently used by
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b2166..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
 	tristate "NILFS2 file system support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  NILFS2 is a log-structured file system (LFS) supporting continuous
 	  snapshotting.  In addition to versioning capability of the entire
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f8..513f431038f9 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
 config REISERFS_FS
 	tristate "Reiserfs support"
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  Stores not just filenames but the files themselves in a balanced
 	  tree.  Uses journalling.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5ed8b9c50355..abec69b63d7e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,12 +111,6 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
-#define INIT_JOURNAL_INFO	.journal_info = NULL,
-#else
-#define INIT_JOURNAL_INFO
-#endif
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -168,6 +162,7 @@ extern struct cred init_cred;
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
+	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
@@ -178,7 +173,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
-	INIT_JOURNAL_INFO						\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..211ed32befbd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1446,10 +1446,8 @@ struct task_struct {
 	gfp_t lockdep_reclaim_gfp;
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
 /* journalling filesystem info */
 	void *journal_info;
-#endif
 
 /* stacked block device info */
 	struct bio *bio_list, **bio_tail;
-- 
cgit v1.2.3


From 5d0bb2c4238e333ae18c5cd23f75e02a3dac3519 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bernhard@bwalle.de>
Date: Thu, 17 Dec 2009 15:27:11 -0800
Subject: vt: don't export vt_kmsg_redirect() to userspace

Fix following warning in linux-next by guarding the function definition
(both the "extern" and the inline) with #ifdef __KERNEL__.

usr/include/linux/vt.h:89: userspace cannot call function or variable defined in
the kernel

Introduced by commit 5ada918b82399eef3afd6a71e3637697d6bd719f ("vt:
introduce and use vt_kmsg_redirect() function").

Signed-off-by: Bernhard Walle <bernhard@bwalle.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/vt.h b/include/linux/vt.h
index 3fb9944e50a6..d5dd0bc408fd 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -84,6 +84,8 @@ struct vt_setactivate {
 
 #define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
 
+#ifdef __KERNEL__
+
 #ifdef CONFIG_VT_CONSOLE
 
 extern int vt_kmsg_redirect(int new);
@@ -97,6 +99,8 @@ static inline int vt_kmsg_redirect(int new)
 
 #endif
 
+#endif /* __KERNEL__ */
+
 #define vt_get_kmsg_redirect() vt_kmsg_redirect(-1)
 
 #endif /* _LINUX_VT_H */
-- 
cgit v1.2.3


From f6151dfea21496d43dbaba32cfcd9c9f404769bc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 17 Dec 2009 15:27:16 -0800
Subject: mm: introduce coredump parameter structure

Introduce coredump parameter data structure (struct coredump_params) to
simplify binfmt->core_dump() arguments.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c        | 13 +++++++------
 fs/binfmt_elf.c         | 24 +++++++++++++-----------
 fs/binfmt_elf_fdpic.c   | 29 +++++++++++++++--------------
 fs/binfmt_flat.c        |  6 +++---
 fs/binfmt_som.c         |  2 +-
 fs/exec.c               | 38 +++++++++++++++++++++-----------------
 include/linux/binfmts.h | 10 +++++++++-
 7 files changed, 69 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..346b69405363 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
+	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7f..edd90c49003c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  * don't even try.
  */
 #ifdef CONFIG_ELF_CORE
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
 #else
 #define elf_core_dump	NULL
 #endif
@@ -1272,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
@@ -1947,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -2009,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 #endif
 
  	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
 		goto end_coredump;
 
-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8e..c25256a5c5b0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
 					     struct file *, struct mm_struct *);
 
 #ifdef CONFIG_ELF_CORE
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
 #endif
 
 static struct linux_binfmt elf_fdpic_format = {
@@ -1326,8 +1326,9 @@ static int writenote(struct memelfnote *men, struct file *file)
 #undef DUMP_WRITE
 #undef DUMP_SEEK
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1583,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 		goto cleanup;
 #endif
 
-	if (signr) {
+	if (cprm->signr) {
 		struct core_thread *ct;
 		struct elf_thread_status *tmp;
 
@@ -1661,14 +1661,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 			int sz;
 
 			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
 			thread_status_size += sz;
 		}
 	}
 
 	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1703,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
   	/* Try to dump the FPU. */
 	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1774,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1783,14 +1783,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
 		goto end_coredump;
 
-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
 		goto end_coredump;
 
 #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..d4a00ea1054c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
  * Currently only a stub-function.
  */
 
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
 {
 	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
 	return(1);
 }
 
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..2a9b5330cc5e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
  * don't even try.
  */
 #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
 #else
 #define som_core_dump	NULL
 #endif
diff --git a/fs/exec.c b/fs/exec.c
index 77db9a97a773..632b02e34ec7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1763,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
-	struct file * file;
 	const struct cred *old_cred;
 	struct cred *cred;
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};
 
 	audit_core_dumps(signr);
 
@@ -1829,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
 		goto fail_unlock;
 
  	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
 			 * non-zero limit gets set to RLIM_INFINITY below, but
 			 * a limit of 0 skips the dump.  This is a consistent
 			 * way to catch recursive crashes.  We can still crash
@@ -1870,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto fail_dropcount;
 		}
 
-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
  		}
  	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
 		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1901,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 */
 	if (inode->i_uid != current_fsuid())
 		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
 		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
 		goto close_fail;
 
-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);
 
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
 close_fail:
 	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486ac734..cd4349bdc34e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{
 
 #define BINPRM_MAX_RECURSION 4
 
+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
 	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
 	int hasvdso;
 };
-- 
cgit v1.2.3


From ed8b67040965e4fe695db333d5914e18ea5f146f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 16 Dec 2009 22:17:09 +0000
Subject: drm: convert drm_ioctl to unlocked_ioctl

drm_ioctl is called with the Big Kernel Lock held,
which shows up very high in statistics on vfs_ioctl.

Moving the lock into the drm_ioctl function itself
makes sure we blame the right subsystem and it gets
us one step closer to eliminating the locked version
of fops->ioctl.

Since drm_ioctl does not require the lock itself,
we only need to hold it while calling the specific
handler. The 32 bit conversion handlers do not
interact with any other code, so they don't need
the BKL here either and can just call drm_ioctl.

As a bonus, this cleans up all the other users
of drm_ioctl which now no longer have to find
the inode or call lock_kernel.

[airlied: squashed the non-driver bits
of the second patch in here, this provides
the flag for drivers to use to select unlocked
ioctls - but doesn't modify any drivers].

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: David Airlie <airlied@linux.ie>
Cc: dri-devel@lists.sourceforge.net
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c               | 13 +++--
 drivers/gpu/drm/drm_ioc32.c             | 89 +++++++++++----------------------
 drivers/gpu/drm/i810/i810_dma.c         |  2 +-
 drivers/gpu/drm/i810/i810_drv.c         |  2 +-
 drivers/gpu/drm/i830/i830_dma.c         |  2 +-
 drivers/gpu/drm/i830/i830_drv.c         |  2 +-
 drivers/gpu/drm/i915/i915_drv.c         |  2 +-
 drivers/gpu/drm/i915/i915_ioc32.c       | 23 ++++-----
 drivers/gpu/drm/mga/mga_drv.c           |  2 +-
 drivers/gpu/drm/mga/mga_ioc32.c         | 13 ++---
 drivers/gpu/drm/nouveau/nouveau_drv.c   |  2 +-
 drivers/gpu/drm/nouveau/nouveau_ioc32.c |  4 +-
 drivers/gpu/drm/r128/r128_drv.c         |  2 +-
 drivers/gpu/drm/r128/r128_ioc32.c       | 16 ++----
 drivers/gpu/drm/radeon/radeon_drv.c     |  4 +-
 drivers/gpu/drm/radeon/radeon_ioc32.c   | 38 +++++---------
 drivers/gpu/drm/savage/savage_drv.c     |  2 +-
 drivers/gpu/drm/sis/sis_drv.c           |  2 +-
 drivers/gpu/drm/tdfx/tdfx_drv.c         |  2 +-
 drivers/gpu/drm/via/via_drv.c           |  2 +-
 include/drm/drmP.h                      |  5 +-
 21 files changed, 89 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index ff2f1042cb44..766c46875a20 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -434,11 +434,11 @@ static int drm_version(struct drm_device *dev, void *data,
  * Looks up the ioctl function in the ::ioctls table, checking for root
  * previleges if so required, and dispatches to the respective function.
  */
-int drm_ioctl(struct inode *inode, struct file *filp,
+long drm_ioctl(struct file *filp,
 	      unsigned int cmd, unsigned long arg)
 {
 	struct drm_file *file_priv = filp->private_data;
-	struct drm_device *dev = file_priv->minor->dev;
+	struct drm_device *dev;
 	struct drm_ioctl_desc *ioctl;
 	drm_ioctl_t *func;
 	unsigned int nr = DRM_IOCTL_NR(cmd);
@@ -446,6 +446,7 @@ int drm_ioctl(struct inode *inode, struct file *filp,
 	char stack_kdata[128];
 	char *kdata = NULL;
 
+	dev = file_priv->minor->dev;
 	atomic_inc(&dev->ioctl_count);
 	atomic_inc(&dev->counts[_DRM_STAT_IOCTLS]);
 	++file_priv->ioctl_count;
@@ -501,7 +502,13 @@ int drm_ioctl(struct inode *inode, struct file *filp,
 				goto err_i1;
 			}
 		}
-		retcode = func(dev, kdata, file_priv);
+		if (ioctl->flags & DRM_UNLOCKED)
+			retcode = func(dev, kdata, file_priv);
+		else {
+			lock_kernel();
+			retcode = func(dev, kdata, file_priv);
+			unlock_kernel();
+		}
 
 		if (cmd & IOC_OUT) {
 			if (copy_to_user((void __user *)arg, kdata,
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 282d9fdf9f4e..d61d185cf040 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -104,7 +104,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
 			  &version->desc))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
+	err = drm_ioctl(file,
 			DRM_IOCTL_VERSION, (unsigned long)version);
 	if (err)
 		return err;
@@ -145,8 +145,7 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd,
 			  &u->unique))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_GET_UNIQUE, (unsigned long)u);
+	err = drm_ioctl(file, DRM_IOCTL_GET_UNIQUE, (unsigned long)u);
 	if (err)
 		return err;
 
@@ -174,8 +173,7 @@ static int compat_drm_setunique(struct file *file, unsigned int cmd,
 			  &u->unique))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_SET_UNIQUE, (unsigned long)u);
+	return drm_ioctl(file, DRM_IOCTL_SET_UNIQUE, (unsigned long)u);
 }
 
 typedef struct drm_map32 {
@@ -205,8 +203,7 @@ static int compat_drm_getmap(struct file *file, unsigned int cmd,
 	if (__put_user(idx, &map->offset))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_GET_MAP, (unsigned long)map);
+	err = drm_ioctl(file, DRM_IOCTL_GET_MAP, (unsigned long)map);
 	if (err)
 		return err;
 
@@ -246,8 +243,7 @@ static int compat_drm_addmap(struct file *file, unsigned int cmd,
 	    || __put_user(m32.flags, &map->flags))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_ADD_MAP, (unsigned long)map);
+	err = drm_ioctl(file, DRM_IOCTL_ADD_MAP, (unsigned long)map);
 	if (err)
 		return err;
 
@@ -284,8 +280,7 @@ static int compat_drm_rmmap(struct file *file, unsigned int cmd,
 	if (__put_user((void *)(unsigned long)handle, &map->handle))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RM_MAP, (unsigned long)map);
+	return drm_ioctl(file, DRM_IOCTL_RM_MAP, (unsigned long)map);
 }
 
 typedef struct drm_client32 {
@@ -314,8 +309,7 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd,
 	if (__put_user(idx, &client->idx))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_GET_CLIENT, (unsigned long)client);
+	err = drm_ioctl(file, DRM_IOCTL_GET_CLIENT, (unsigned long)client);
 	if (err)
 		return err;
 
@@ -351,8 +345,7 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd,
 	if (!access_ok(VERIFY_WRITE, stats, sizeof(*stats)))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_GET_STATS, (unsigned long)stats);
+	err = drm_ioctl(file, DRM_IOCTL_GET_STATS, (unsigned long)stats);
 	if (err)
 		return err;
 
@@ -395,8 +388,7 @@ static int compat_drm_addbufs(struct file *file, unsigned int cmd,
 	    || __put_user(agp_start, &buf->agp_start))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_ADD_BUFS, (unsigned long)buf);
+	err = drm_ioctl(file, DRM_IOCTL_ADD_BUFS, (unsigned long)buf);
 	if (err)
 		return err;
 
@@ -427,8 +419,7 @@ static int compat_drm_markbufs(struct file *file, unsigned int cmd,
 	    || __put_user(b32.high_mark, &buf->high_mark))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_MARK_BUFS, (unsigned long)buf);
+	return drm_ioctl(file, DRM_IOCTL_MARK_BUFS, (unsigned long)buf);
 }
 
 typedef struct drm_buf_info32 {
@@ -469,8 +460,7 @@ static int compat_drm_infobufs(struct file *file, unsigned int cmd,
 	    || __put_user(list, &request->list))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_INFO_BUFS, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_INFO_BUFS, (unsigned long)request);
 	if (err)
 		return err;
 
@@ -531,8 +521,7 @@ static int compat_drm_mapbufs(struct file *file, unsigned int cmd,
 	    || __put_user(list, &request->list))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_MAP_BUFS, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_MAP_BUFS, (unsigned long)request);
 	if (err)
 		return err;
 
@@ -578,8 +567,7 @@ static int compat_drm_freebufs(struct file *file, unsigned int cmd,
 			  &request->list))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_FREE_BUFS, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_FREE_BUFS, (unsigned long)request);
 }
 
 typedef struct drm_ctx_priv_map32 {
@@ -605,8 +593,7 @@ static int compat_drm_setsareactx(struct file *file, unsigned int cmd,
 			  &request->handle))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_SET_SAREA_CTX, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_SET_SAREA_CTX, (unsigned long)request);
 }
 
 static int compat_drm_getsareactx(struct file *file, unsigned int cmd,
@@ -628,8 +615,7 @@ static int compat_drm_getsareactx(struct file *file, unsigned int cmd,
 	if (__put_user(ctx_id, &request->ctx_id))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_GET_SAREA_CTX, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_GET_SAREA_CTX, (unsigned long)request);
 	if (err)
 		return err;
 
@@ -664,8 +650,7 @@ static int compat_drm_resctx(struct file *file, unsigned int cmd,
 			  &res->contexts))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_RES_CTX, (unsigned long)res);
+	err = drm_ioctl(file, DRM_IOCTL_RES_CTX, (unsigned long)res);
 	if (err)
 		return err;
 
@@ -718,8 +703,7 @@ static int compat_drm_dma(struct file *file, unsigned int cmd,
 			  &d->request_sizes))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_DMA, (unsigned long)d);
+	err = drm_ioctl(file, DRM_IOCTL_DMA, (unsigned long)d);
 	if (err)
 		return err;
 
@@ -751,8 +735,7 @@ static int compat_drm_agp_enable(struct file *file, unsigned int cmd,
 	if (put_user(m32.mode, &mode->mode))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_AGP_ENABLE, (unsigned long)mode);
+	return drm_ioctl(file, DRM_IOCTL_AGP_ENABLE, (unsigned long)mode);
 }
 
 typedef struct drm_agp_info32 {
@@ -781,8 +764,7 @@ static int compat_drm_agp_info(struct file *file, unsigned int cmd,
 	if (!access_ok(VERIFY_WRITE, info, sizeof(*info)))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_AGP_INFO, (unsigned long)info);
+	err = drm_ioctl(file, DRM_IOCTL_AGP_INFO, (unsigned long)info);
 	if (err)
 		return err;
 
@@ -827,16 +809,14 @@ static int compat_drm_agp_alloc(struct file *file, unsigned int cmd,
 	    || __put_user(req32.type, &request->type))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_AGP_ALLOC, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_AGP_ALLOC, (unsigned long)request);
 	if (err)
 		return err;
 
 	if (__get_user(req32.handle, &request->handle)
 	    || __get_user(req32.physical, &request->physical)
 	    || copy_to_user(argp, &req32, sizeof(req32))) {
-		drm_ioctl(file->f_path.dentry->d_inode, file,
-			  DRM_IOCTL_AGP_FREE, (unsigned long)request);
+		drm_ioctl(file, DRM_IOCTL_AGP_FREE, (unsigned long)request);
 		return -EFAULT;
 	}
 
@@ -856,8 +836,7 @@ static int compat_drm_agp_free(struct file *file, unsigned int cmd,
 	    || __put_user(handle, &request->handle))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_AGP_FREE, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_AGP_FREE, (unsigned long)request);
 }
 
 typedef struct drm_agp_binding32 {
@@ -881,8 +860,7 @@ static int compat_drm_agp_bind(struct file *file, unsigned int cmd,
 	    || __put_user(req32.offset, &request->offset))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_AGP_BIND, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_AGP_BIND, (unsigned long)request);
 }
 
 static int compat_drm_agp_unbind(struct file *file, unsigned int cmd,
@@ -898,8 +876,7 @@ static int compat_drm_agp_unbind(struct file *file, unsigned int cmd,
 	    || __put_user(handle, &request->handle))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_AGP_UNBIND, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_AGP_UNBIND, (unsigned long)request);
 }
 #endif				/* __OS_HAS_AGP */
 
@@ -923,8 +900,7 @@ static int compat_drm_sg_alloc(struct file *file, unsigned int cmd,
 	    || __put_user(x, &request->size))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_SG_ALLOC, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_SG_ALLOC, (unsigned long)request);
 	if (err)
 		return err;
 
@@ -950,8 +926,7 @@ static int compat_drm_sg_free(struct file *file, unsigned int cmd,
 	    || __put_user(x << PAGE_SHIFT, &request->handle))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_SG_FREE, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_SG_FREE, (unsigned long)request);
 }
 
 #if defined(CONFIG_X86) || defined(CONFIG_IA64)
@@ -981,8 +956,7 @@ static int compat_drm_update_draw(struct file *file, unsigned int cmd,
 	    __put_user(update32.data, &request->data))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_UPDATE_DRAW, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_UPDATE_DRAW, (unsigned long)request);
 	return err;
 }
 #endif
@@ -1023,8 +997,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
 	    || __put_user(req32.request.signal, &request->request.signal))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_WAIT_VBLANK, (unsigned long)request);
+	err = drm_ioctl(file, DRM_IOCTL_WAIT_VBLANK, (unsigned long)request);
 	if (err)
 		return err;
 
@@ -1094,16 +1067,14 @@ long drm_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	 * than always failing.
 	 */
 	if (nr >= ARRAY_SIZE(drm_compat_ioctls))
-		return drm_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
+		return drm_ioctl(filp, cmd, arg);
 
 	fn = drm_compat_ioctls[nr];
 
-	lock_kernel();		/* XXX for now */
 	if (fn != NULL)
 		ret = (*fn) (filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index 7d1d88cdf2dc..de32d22a8c39 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -115,7 +115,7 @@ static int i810_mmap_buffers(struct file *filp, struct vm_area_struct *vma)
 static const struct file_operations i810_buffer_fops = {
 	.open = drm_open,
 	.release = drm_release,
-	.ioctl = drm_ioctl,
+	.unlocked_ioctl = drm_ioctl,
 	.mmap = i810_mmap_buffers,
 	.fasync = drm_fasync,
 };
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index fabb9a817966..c1e02752e023 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -59,7 +59,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c
index 877bf6cb14a4..06bd732e6463 100644
--- a/drivers/gpu/drm/i830/i830_dma.c
+++ b/drivers/gpu/drm/i830/i830_dma.c
@@ -117,7 +117,7 @@ static int i830_mmap_buffers(struct file *filp, struct vm_area_struct *vma)
 static const struct file_operations i830_buffer_fops = {
 	.open = drm_open,
 	.release = drm_release,
-	.ioctl = drm_ioctl,
+	.unlocked_ioctl = drm_ioctl,
 	.mmap = i830_mmap_buffers,
 	.fasync = drm_fasync,
 };
diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c
index 389597e4a623..44f990bed8f4 100644
--- a/drivers/gpu/drm/i830/i830_drv.c
+++ b/drivers/gpu/drm/i830/i830_drv.c
@@ -70,7 +70,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2fa217862058..24286ca168fc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -329,7 +329,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_gem_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c
index 1fe68a251b75..13b028994b2b 100644
--- a/drivers/gpu/drm/i915/i915_ioc32.c
+++ b/drivers/gpu/drm/i915/i915_ioc32.c
@@ -66,8 +66,7 @@ static int compat_i915_batchbuffer(struct file *file, unsigned int cmd,
 			  &batchbuffer->cliprects))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_I915_BATCHBUFFER,
+	return drm_ioctl(file, DRM_IOCTL_I915_BATCHBUFFER,
 			 (unsigned long)batchbuffer);
 }
 
@@ -102,8 +101,8 @@ static int compat_i915_cmdbuffer(struct file *file, unsigned int cmd,
 			  &cmdbuffer->cliprects))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_I915_CMDBUFFER, (unsigned long)cmdbuffer);
+	return drm_ioctl(file, DRM_IOCTL_I915_CMDBUFFER,
+			 (unsigned long)cmdbuffer);
 }
 
 typedef struct drm_i915_irq_emit32 {
@@ -125,8 +124,8 @@ static int compat_i915_irq_emit(struct file *file, unsigned int cmd,
 			  &request->irq_seq))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_I915_IRQ_EMIT, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_I915_IRQ_EMIT,
+			 (unsigned long)request);
 }
 typedef struct drm_i915_getparam32 {
 	int param;
@@ -149,8 +148,8 @@ static int compat_i915_getparam(struct file *file, unsigned int cmd,
 			  &request->value))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_I915_GETPARAM, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_I915_GETPARAM,
+			 (unsigned long)request);
 }
 
 typedef struct drm_i915_mem_alloc32 {
@@ -178,8 +177,8 @@ static int compat_i915_alloc(struct file *file, unsigned int cmd,
 			  &request->region_offset))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_I915_ALLOC, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_I915_ALLOC,
+			 (unsigned long)request);
 }
 
 drm_ioctl_compat_t *i915_compat_ioctls[] = {
@@ -211,12 +210,10 @@ long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(i915_compat_ioctls))
 		fn = i915_compat_ioctls[nr - DRM_COMMAND_BASE];
 
-	lock_kernel();		/* XXX for now */
 	if (fn != NULL)
 		ret = (*fn) (filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c
index 97ee566ef749..ddfe16197b59 100644
--- a/drivers/gpu/drm/mga/mga_drv.c
+++ b/drivers/gpu/drm/mga/mga_drv.c
@@ -68,7 +68,7 @@ static struct drm_driver driver = {
 		.owner = THIS_MODULE,
 		.open = drm_open,
 		.release = drm_release,
-		.ioctl = drm_ioctl,
+		.unlocked_ioctl = drm_ioctl,
 		.mmap = drm_mmap,
 		.poll = drm_poll,
 		.fasync = drm_fasync,
diff --git a/drivers/gpu/drm/mga/mga_ioc32.c b/drivers/gpu/drm/mga/mga_ioc32.c
index 30d00478ddee..c1f877b7bac1 100644
--- a/drivers/gpu/drm/mga/mga_ioc32.c
+++ b/drivers/gpu/drm/mga/mga_ioc32.c
@@ -100,8 +100,7 @@ static int compat_mga_init(struct file *file, unsigned int cmd,
 	if (err)
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_MGA_INIT, (unsigned long)init);
+	return drm_ioctl(file, DRM_IOCTL_MGA_INIT, (unsigned long)init);
 }
 
 typedef struct drm_mga_getparam32 {
@@ -125,8 +124,7 @@ static int compat_mga_getparam(struct file *file, unsigned int cmd,
 			  &getparam->value))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_MGA_GETPARAM, (unsigned long)getparam);
+	return drm_ioctl(file, DRM_IOCTL_MGA_GETPARAM, (unsigned long)getparam);
 }
 
 typedef struct drm_mga_drm_bootstrap32 {
@@ -166,8 +164,7 @@ static int compat_mga_dma_bootstrap(struct file *file, unsigned int cmd,
 	    || __put_user(dma_bootstrap32.agp_size, &dma_bootstrap->agp_size))
 		return -EFAULT;
 
-	err = drm_ioctl(file->f_path.dentry->d_inode, file,
-			DRM_IOCTL_MGA_DMA_BOOTSTRAP,
+	err = drm_ioctl(file, DRM_IOCTL_MGA_DMA_BOOTSTRAP,
 			(unsigned long)dma_bootstrap);
 	if (err)
 		return err;
@@ -220,12 +217,10 @@ long mga_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
 		fn = mga_compat_ioctls[nr - DRM_COMMAND_BASE];
 
-	lock_kernel();		/* XXX for now */
 	if (fn != NULL)
 		ret = (*fn) (filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 35249c35118f..3f943c01e995 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -341,7 +341,7 @@ static struct drm_driver driver = {
 		.owner = THIS_MODULE,
 		.open = drm_open,
 		.release = drm_release,
-		.ioctl = drm_ioctl,
+		.unlocked_ioctl = drm_ioctl,
 		.mmap = nouveau_ttm_mmap,
 		.poll = drm_poll,
 		.fasync = drm_fasync,
diff --git a/drivers/gpu/drm/nouveau/nouveau_ioc32.c b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
index a2c30f4611ba..475ba810bba3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ioc32.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
@@ -61,12 +61,10 @@ long nouveau_compat_ioctl(struct file *filp, unsigned int cmd,
 	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
 		fn = nouveau_compat_ioctls[nr - DRM_COMMAND_BASE];
 #endif
-	lock_kernel();	  /* XXX for now */
 	if (fn != NULL)
 		ret = (*fn)(filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 601f4c0e5da5..b806fdcc7170 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -64,7 +64,7 @@ static struct drm_driver driver = {
 		.owner = THIS_MODULE,
 		.open = drm_open,
 		.release = drm_release,
-		.ioctl = drm_ioctl,
+		.unlocked_ioctl = drm_ioctl,
 		.mmap = drm_mmap,
 		.poll = drm_poll,
 		.fasync = drm_fasync,
diff --git a/drivers/gpu/drm/r128/r128_ioc32.c b/drivers/gpu/drm/r128/r128_ioc32.c
index d3cb676eee84..51c99fc4dd38 100644
--- a/drivers/gpu/drm/r128/r128_ioc32.c
+++ b/drivers/gpu/drm/r128/r128_ioc32.c
@@ -95,8 +95,7 @@ static int compat_r128_init(struct file *file, unsigned int cmd,
 			  &init->agp_textures_offset))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_R128_INIT, (unsigned long)init);
+	return drm_ioctl(file, DRM_IOCTL_R128_INIT, (unsigned long)init);
 }
 
 typedef struct drm_r128_depth32 {
@@ -129,8 +128,7 @@ static int compat_r128_depth(struct file *file, unsigned int cmd,
 			  &depth->mask))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_R128_DEPTH, (unsigned long)depth);
+	return drm_ioctl(file, DRM_IOCTL_R128_DEPTH, (unsigned long)depth);
 
 }
 
@@ -153,8 +151,7 @@ static int compat_r128_stipple(struct file *file, unsigned int cmd,
 			  &stipple->mask))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_R128_STIPPLE, (unsigned long)stipple);
+	return drm_ioctl(file, DRM_IOCTL_R128_STIPPLE, (unsigned long)stipple);
 }
 
 typedef struct drm_r128_getparam32 {
@@ -178,8 +175,7 @@ static int compat_r128_getparam(struct file *file, unsigned int cmd,
 			  &getparam->value))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_R128_GETPARAM, (unsigned long)getparam);
+	return drm_ioctl(file, DRM_IOCTL_R128_GETPARAM, (unsigned long)getparam);
 }
 
 drm_ioctl_compat_t *r128_compat_ioctls[] = {
@@ -210,12 +206,10 @@ long r128_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(r128_compat_ioctls))
 		fn = r128_compat_ioctls[nr - DRM_COMMAND_BASE];
 
-	lock_kernel();		/* XXX for now */
 	if (fn != NULL)
 		ret = (*fn) (filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index dbd56ef82f9c..8ba3de7994d4 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -196,7 +196,7 @@ static struct drm_driver driver_old = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
@@ -284,7 +284,7 @@ static struct drm_driver kms_driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = radeon_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/radeon/radeon_ioc32.c b/drivers/gpu/drm/radeon/radeon_ioc32.c
index a1bf11de308a..48b7cea31e08 100644
--- a/drivers/gpu/drm/radeon/radeon_ioc32.c
+++ b/drivers/gpu/drm/radeon/radeon_ioc32.c
@@ -92,8 +92,7 @@ static int compat_radeon_cp_init(struct file *file, unsigned int cmd,
 			  &init->gart_textures_offset))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_CP_INIT, (unsigned long)init);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_CP_INIT, (unsigned long)init);
 }
 
 typedef struct drm_radeon_clear32 {
@@ -125,8 +124,7 @@ static int compat_radeon_cp_clear(struct file *file, unsigned int cmd,
 			  &clr->depth_boxes))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_CLEAR, (unsigned long)clr);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_CLEAR, (unsigned long)clr);
 }
 
 typedef struct drm_radeon_stipple32 {
@@ -149,8 +147,7 @@ static int compat_radeon_cp_stipple(struct file *file, unsigned int cmd,
 			  &request->mask))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_STIPPLE, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_STIPPLE, (unsigned long)request);
 }
 
 typedef struct drm_radeon_tex_image32 {
@@ -204,8 +201,7 @@ static int compat_radeon_cp_texture(struct file *file, unsigned int cmd,
 			  &image->data))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_TEXTURE, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_TEXTURE, (unsigned long)request);
 }
 
 typedef struct drm_radeon_vertex2_32 {
@@ -238,8 +234,7 @@ static int compat_radeon_cp_vertex2(struct file *file, unsigned int cmd,
 			  &request->prim))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_VERTEX2, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_VERTEX2, (unsigned long)request);
 }
 
 typedef struct drm_radeon_cmd_buffer32 {
@@ -268,8 +263,7 @@ static int compat_radeon_cp_cmdbuf(struct file *file, unsigned int cmd,
 			  &request->boxes))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_CMDBUF, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_CMDBUF, (unsigned long)request);
 }
 
 typedef struct drm_radeon_getparam32 {
@@ -293,8 +287,7 @@ static int compat_radeon_cp_getparam(struct file *file, unsigned int cmd,
 			  &request->value))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_GETPARAM, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_GETPARAM, (unsigned long)request);
 }
 
 typedef struct drm_radeon_mem_alloc32 {
@@ -322,8 +315,7 @@ static int compat_radeon_mem_alloc(struct file *file, unsigned int cmd,
 			  &request->region_offset))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_ALLOC, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_ALLOC, (unsigned long)request);
 }
 
 typedef struct drm_radeon_irq_emit32 {
@@ -345,8 +337,7 @@ static int compat_radeon_irq_emit(struct file *file, unsigned int cmd,
 			  &request->irq_seq))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_path.dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_IRQ_EMIT, (unsigned long)request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_IRQ_EMIT, (unsigned long)request);
 }
 
 /* The two 64-bit arches where alignof(u64)==4 in 32-bit code */
@@ -372,8 +363,7 @@ static int compat_radeon_cp_setparam(struct file *file, unsigned int cmd,
 			  &request->value))
 		return -EFAULT;
 
-	return drm_ioctl(file->f_dentry->d_inode, file,
-			 DRM_IOCTL_RADEON_SETPARAM, (unsigned long) request);
+	return drm_ioctl(file, DRM_IOCTL_RADEON_SETPARAM, (unsigned long) request);
 }
 #else
 #define compat_radeon_cp_setparam NULL
@@ -413,12 +403,10 @@ long radeon_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(radeon_compat_ioctls))
 		fn = radeon_compat_ioctls[nr - DRM_COMMAND_BASE];
 
-	lock_kernel();		/* XXX for now */
 	if (fn != NULL)
 		ret = (*fn) (filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+		ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
@@ -431,9 +419,7 @@ long radeon_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	lock_kernel();		/* XXX for now */
-	ret = drm_ioctl(filp->f_path.dentry->d_inode, filp, cmd, arg);
-	unlock_kernel();
+	ret = drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index eee52aa92a7c..021de44c15ab 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -50,7 +50,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index e725cc0b1155..4fd1f067d380 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -80,7 +80,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index 012ff2e356b2..ec5a43e65722 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++ b/drivers/gpu/drm/tdfx/tdfx_drv.c
@@ -48,7 +48,7 @@ static struct drm_driver driver = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
-		 .ioctl = drm_ioctl,
+		 .unlocked_ioctl = drm_ioctl,
 		 .mmap = drm_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c
index bc2f51843005..7a1b210401e0 100644
--- a/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@ -58,7 +58,7 @@ static struct drm_driver driver = {
 		.owner = THIS_MODULE,
 		.open = drm_open,
 		.release = drm_release,
-		.ioctl = drm_ioctl,
+		.unlocked_ioctl = drm_ioctl,
 		.mmap = drm_mmap,
 		.poll = drm_poll,
 		.fasync = drm_fasync,
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 19ef8ebdc662..71dafb69cfeb 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -296,6 +296,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd,
 #define	DRM_MASTER	0x2
 #define DRM_ROOT_ONLY	0x4
 #define DRM_CONTROL_ALLOW 0x8
+#define DRM_UNLOCKED	0x10
 
 struct drm_ioctl_desc {
 	unsigned int cmd;
@@ -1128,8 +1129,8 @@ static inline int drm_mtrr_del(int handle, unsigned long offset,
 				/* Driver support (drm_drv.h) */
 extern int drm_init(struct drm_driver *driver);
 extern void drm_exit(struct drm_driver *driver);
-extern int drm_ioctl(struct inode *inode, struct file *filp,
-		     unsigned int cmd, unsigned long arg);
+extern long drm_ioctl(struct file *filp,
+		      unsigned int cmd, unsigned long arg);
 extern long drm_compat_ioctl(struct file *filp,
 			     unsigned int cmd, unsigned long arg);
 extern int drm_lastclose(struct drm_device *dev);
-- 
cgit v1.2.3


From 925cc71e512a29e2594bcc17dc58d0a0e9c4d524 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 17 Dec 2009 14:44:38 +0000
Subject: mm: Add notifier in pageblock isolation for balloon drivers

Memory balloon drivers can allocate a large amount of memory which is not
movable but could be freed to accomodate memory hotplug remove.

Prior to calling the memory hotplug notifier chain the memory in the
pageblock is isolated.  Currently, if the migrate type is not
MIGRATE_MOVABLE the isolation will not proceed, causing the memory removal
for that page range to fail.

Rather than failing pageblock isolation if the migrateteype is not
MIGRATE_MOVABLE, this patch checks if all of the pages in the pageblock,
and not on the LRU, are owned by a registered balloon driver (or other
entity) using a notifier chain.  If all of the non-movable pages are owned
by a balloon, they can be freed later through the memory notifier chain
and the range can still be isolated in set_migratetype_isolate().

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Brian King <brking@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Gerald Schaefer <geralds@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/base/memory.c  | 19 +++++++++++++++++
 include/linux/memory.h | 27 ++++++++++++++++++++++++
 mm/page_alloc.c        | 57 +++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 96 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c4c8f2e1dd15..d7d77d4a402c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_notifier);
 
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
 	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+int memory_isolate_notify(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
 /*
  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
  * OK to have direct references to sparsemem variables in here.
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 37fa19b34ef5..1adfe779eb99 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -50,6 +50,19 @@ struct memory_notify {
 	int status_change_nid;
 };
 
+/*
+ * During pageblock isolation, count the number of pages within the
+ * range [start_pfn, start_pfn + nr_pages) which are owned by code
+ * in the notifier chain.
+ */
+#define MEM_ISOLATE_COUNT	(1<<0)
+
+struct memory_isolate_notify {
+	unsigned long start_pfn;	/* Start of range to check */
+	unsigned int nr_pages;		/* # pages in range to check */
+	unsigned int pages_found;	/* # pages owned found by callbacks */
+};
+
 struct notifier_block;
 struct mem_section;
 
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
 {
 	return 0;
 }
+static inline int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+}
+static inline int memory_isolate_notify(unsigned long val, void *v)
+{
+	return 0;
+}
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int register_memory_isolate_notifier(struct notifier_block *nb);
+extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74af449b1f1d..998eacc1e4c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <linux/memory.h>
 #include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 int set_migratetype_isolate(struct page *page)
 {
 	struct zone *zone;
-	unsigned long flags;
+	struct page *curr_page;
+	unsigned long flags, pfn, iter;
+	unsigned long immobile = 0;
+	struct memory_isolate_notify arg;
+	int notifier_ret;
 	int ret = -EBUSY;
 	int zone_idx;
 
 	zone = page_zone(page);
 	zone_idx = zone_idx(zone);
+
 	spin_lock_irqsave(&zone->lock, flags);
+	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+	    zone_idx == ZONE_MOVABLE) {
+		ret = 0;
+		goto out;
+	}
+
+	pfn = page_to_pfn(page);
+	arg.start_pfn = pfn;
+	arg.nr_pages = pageblock_nr_pages;
+	arg.pages_found = 0;
+
 	/*
-	 * In future, more migrate types will be able to be isolation target.
+	 * It may be possible to isolate a pageblock even if the
+	 * migratetype is not MIGRATE_MOVABLE. The memory isolation
+	 * notifier chain is used by balloon drivers to return the
+	 * number of pages in a range that are held by the balloon
+	 * driver to shrink memory. If all the pages are accounted for
+	 * by balloons, are free, or on the LRU, isolation can continue.
+	 * Later, for example, when memory hotplug notifier runs, these
+	 * pages reported as "can be isolated" should be isolated(freed)
+	 * by the balloon driver through the memory notifier chain.
 	 */
-	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
-	    zone_idx != ZONE_MOVABLE)
+	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+	notifier_ret = notifier_to_errno(notifier_ret);
+	if (notifier_ret || !arg.pages_found)
 		goto out;
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	move_freepages_block(zone, page, MIGRATE_ISOLATE);
-	ret = 0;
+
+	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		curr_page = pfn_to_page(iter);
+		if (!page_count(curr_page) || PageLRU(curr_page))
+			continue;
+
+		immobile++;
+	}
+
+	if (arg.pages_found == immobile)
+		ret = 0;
+
 out:
+	if (!ret) {
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		move_freepages_block(zone, page, MIGRATE_ISOLATE);
+	}
+
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
 		drain_all_pages();
-- 
cgit v1.2.3


From 8c0414cd524e9f1c483ffb3ff1c2d860f5c567c8 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 3 Dec 2009 12:46:51 -0800
Subject: fiemap: Add new extent flag FIEMAP_EXTENT_SHARED

Some filesystems may allow multiple files to point to a particular
extent.  This patch adds flag FIEMAP_EXTENT_SHARED to denote extents
that are shared with other inodes.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 include/linux/fiemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 934e22d65801..d830747f5c0b 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -62,5 +62,7 @@ struct fiemap {
 #define FIEMAP_EXTENT_MERGED		0x00001000 /* File does not natively
 						    * support extents. Result
 						    * merged for efficiency. */
+#define FIEMAP_EXTENT_SHARED		0x00002000 /* Space shared with other
+						    * files. */
 
 #endif /* _LINUX_FIEMAP_H */
-- 
cgit v1.2.3


From 622e99bf0d54c4517cb0524540cd77257db8621a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 18 Dec 2009 17:43:20 +0100
Subject: [S390] rename NT_PRXSTATUS to NT_S390_HIGHREGS

The elf notes number for the upper register halves is s390 specific.
Change the name of the elf notes to include S390.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 2 +-
 include/linux/elf.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 653c6a178740..13815d39f7dd 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -959,7 +959,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_fpregs_set,
 	},
 	[REGSET_GENERAL_EXTENDED] = {
-		.core_note_type = NT_PRXSTATUS,
+		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 90a4ed0ea0e5..0cc4d55151b7 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -361,7 +361,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
-#define NT_PRXSTATUS	0x300		/* s390 upper register halves */
+#define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 13:55:48 +0100
Subject: mac80211: fix peer HT capabilities

I noticed yesterday, because Jeff had noticed
a speed regression, cf. bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138
that the SM PS settings for peers were wrong.
Instead of overwriting the SM PS settings with
the local bits, we need to keep the remote bits.

The bug was part of the original HT code from
over two years ago, but unfortunately nobody
noticed that it makes no sense -- we shouldn't
be overwriting the peer's setting with our own
but rather keep it intact when masking the peer
capabilities with our own.

While fixing that, I noticed that the masking of
capabilities is completely useless for most of
the bits, so also fix those other bits.

Finally, I also noticed that PSMP_SUPPORT no
longer exists in the final 802.11n version, so
also remove that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800lib.c |  3 +--
 include/linux/ieee80211.h               |  2 +-
 net/mac80211/ht.c                       | 25 ++++++++++++++++++++++---
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 6bf6c0f12f35..27bf887f1453 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -2080,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	    IEEE80211_HT_CAP_SGI_20 |
 	    IEEE80211_HT_CAP_SGI_40 |
 	    IEEE80211_HT_CAP_TX_STBC |
-	    IEEE80211_HT_CAP_RX_STBC |
-	    IEEE80211_HT_CAP_PSMP_SUPPORT;
+	    IEEE80211_HT_CAP_RX_STBC;
 	spec->ht.ampdu_factor = 3;
 	spec->ht.ampdu_density = 4;
 	spec->ht.mcs.tx_params =
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9724a28c0c2..163c840437d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -832,7 +832,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
 #define IEEE80211_HT_CAP_DSSSCCK40		0x1000
-#define IEEE80211_HT_CAP_PSMP_SUPPORT		0x2000
+#define IEEE80211_HT_CAP_RESERVED		0x2000
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..d7dcee680728 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	ht_cap->ht_supported = true;
 
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap;
-	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	/*
+	 * The bits listed in this expression should be
+	 * the same for the peer and us, if the station
+	 * advertises more then we can't use those thus
+	 * we mask them out.
+	 */
+	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(sband->ht_cap.cap |
+		 ~(IEEE80211_HT_CAP_LDPC_CODING |
+		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		   IEEE80211_HT_CAP_GRN_FLD |
+		   IEEE80211_HT_CAP_SGI_20 |
+		   IEEE80211_HT_CAP_SGI_40 |
+		   IEEE80211_HT_CAP_DSSSCCK40));
+	/*
+	 * The STBC bits are asymmetric -- if we don't have
+	 * TX then mask out the peer's RX and vice versa.
+	 */
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
 	ht_cap->ampdu_factor =
-- 
cgit v1.2.3


From 78f1699659963fff97975df44db6d5dbe7218e55 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:09 -0700
Subject: ACPI: processor: call _PDC early

We discovered that at least one machine (HP Envy), methods in the DSDT
attempt to call external methods defined in a dynamically loaded SSDT.

Unfortunately, the DSDT methods we are trying to call are part of the
EC initialization, which happens very early, and the the dynamic SSDT
is only loaded when a processor _PDC method runs much later.

This results in namespace lookup errors for the (as of yet) undefined
methods.

Since Windows doesn't have any issues with this machine, we take it
as a hint that they must be evaluating _PDC much earlier than we are.

Thus, the proper thing for Linux to do should be to match the Windows
implementation more closely.

Provide a mechanism to call _PDC before we enable the EC. Doing so loads
the dynamic tables, and allows the EC to be enabled correctly.

The ACPI processor driver will still evaluate _PDC in its .add() method
to cover the hotplug case.

Resolves: http://bugzilla.kernel.org/show_bug.cgi?id=14824

Cc: ming.m.lin@intel.com
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/Makefile         |   1 +
 drivers/acpi/bus.c            |   2 +
 drivers/acpi/internal.h       |   1 +
 drivers/acpi/processor_core.c |  69 ---------------------------
 drivers/acpi/processor_pdc.c  | 108 ++++++++++++++++++++++++++++++++++++++++++
 include/acpi/processor.h      |   3 ++
 6 files changed, 115 insertions(+), 69 deletions(-)
 create mode 100644 drivers/acpi/processor_pdc.c

(limited to 'include')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index c7b10b4298e9..66cc3f36a954 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -32,6 +32,7 @@ acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 #
 acpi-y				+= bus.o glue.o
 acpi-y				+= scan.o
+acpi-y				+= processor_pdc.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o pci_bind.o
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 65f7e335f122..0bdf24a6fd01 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -888,6 +888,8 @@ static int __init acpi_bus_init(void)
 		goto error1;
 	}
 
+	acpi_early_processor_set_pdc();
+
 	/*
 	 * Maybe EC region is required at bus_scan/acpi_get_devices. So it
 	 * is necessary to enable it as early as possible.
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 074cf8682d52..cb28e0502acc 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -43,6 +43,7 @@ int acpi_power_transition(struct acpi_device *device, int state);
 extern int acpi_power_nocheck;
 
 int acpi_wakeup_device_init(void);
+void acpi_early_processor_set_pdc(void);
 
 /* --------------------------------------------------------------------------
                                   Embedded Controller
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 41731236f9a1..a19a4ff962ea 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -124,29 +124,6 @@ static const struct file_operations acpi_processor_info_fops = {
 
 DEFINE_PER_CPU(struct acpi_processor *, processors);
 struct acpi_processor_errata errata __read_mostly;
-static int set_no_mwait(const struct dmi_system_id *id)
-{
-	printk(KERN_NOTICE PREFIX "%s detected - "
-		"disabling mwait for CPU C-states\n", id->ident);
-	idle_nomwait = 1;
-	return 0;
-}
-
-static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
-	{
-	set_no_mwait, "IFL91 board", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
-	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
-	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
-	{
-	set_no_mwait, "Extensa 5220", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
-	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
-	{},
-};
 
 /* --------------------------------------------------------------------------
                                 Errata Handling
@@ -276,45 +253,6 @@ static int acpi_processor_errata(struct acpi_processor *pr)
 	return result;
 }
 
-/* --------------------------------------------------------------------------
-                              Common ACPI processor functions
-   -------------------------------------------------------------------------- */
-
-/*
- * _PDC is required for a BIOS-OS handshake for most of the newer
- * ACPI processor features.
- */
-static int acpi_processor_set_pdc(struct acpi_processor *pr)
-{
-	struct acpi_object_list *pdc_in = pr->pdc;
-	acpi_status status = AE_OK;
-
-
-	if (!pdc_in)
-		return status;
-	if (idle_nomwait) {
-		/*
-		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
-		 * mode will be disabled in the parameter of _PDC object.
-		 * Of course C1_FFH access mode will also be disabled.
-		 */
-		union acpi_object *obj;
-		u32 *buffer = NULL;
-
-		obj = pdc_in->pointer;
-		buffer = (u32 *)(obj->buffer.pointer);
-		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
-	}
-	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
-
-	if (ACPI_FAILURE(status))
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-		    "Could not evaluate _PDC, using legacy perf. control...\n"));
-
-	return status;
-}
-
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -825,9 +763,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 	}
 
 	/* _PDC call should be done before doing anything else (if reqd.). */
-	arch_acpi_processor_init_pdc(pr);
 	acpi_processor_set_pdc(pr);
-	arch_acpi_processor_cleanup_pdc(pr);
 
 #ifdef CONFIG_CPU_FREQ
 	acpi_processor_ppc_has_changed(pr, 0);
@@ -1145,11 +1081,6 @@ static int __init acpi_processor_init(void)
 	if (!acpi_processor_dir)
 		return -ENOMEM;
 #endif
-	/*
-	 * Check whether the system is DMI table. If yes, OSPM
-	 * should not use mwait for CPU-states.
-	 */
-	dmi_check_system(processor_idle_dmi_table);
 	result = cpuidle_register_driver(&acpi_idle_driver);
 	if (result < 0)
 		goto out_proc;
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
new file mode 100644
index 000000000000..b416c32dda04
--- /dev/null
+++ b/drivers/acpi/processor_pdc.c
@@ -0,0 +1,108 @@
+#include <linux/dmi.h>
+
+#include <acpi/acpi_drivers.h>
+#include <acpi/processor.h>
+
+#include "internal.h"
+
+#define PREFIX			"ACPI: "
+#define _COMPONENT		ACPI_PROCESSOR_COMPONENT
+ACPI_MODULE_NAME("processor_pdc");
+
+static int set_no_mwait(const struct dmi_system_id *id)
+{
+	printk(KERN_NOTICE PREFIX "%s detected - "
+		"disabling mwait for CPU C-states\n", id->ident);
+	idle_nomwait = 1;
+	return 0;
+}
+
+static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
+	{
+	set_no_mwait, "IFL91 board", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
+	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
+	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
+	{
+	set_no_mwait, "Extensa 5220", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
+	{},
+};
+
+/*
+ * _PDC is required for a BIOS-OS handshake for most of the newer
+ * ACPI processor features.
+ */
+static int acpi_processor_eval_pdc(struct acpi_processor *pr)
+{
+	struct acpi_object_list *pdc_in = pr->pdc;
+	acpi_status status = AE_OK;
+
+	if (!pdc_in)
+		return status;
+	if (idle_nomwait) {
+		/*
+		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
+		 * mode will be disabled in the parameter of _PDC object.
+		 * Of course C1_FFH access mode will also be disabled.
+		 */
+		union acpi_object *obj;
+		u32 *buffer = NULL;
+
+		obj = pdc_in->pointer;
+		buffer = (u32 *)(obj->buffer.pointer);
+		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+	}
+	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
+
+	if (ACPI_FAILURE(status))
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+		    "Could not evaluate _PDC, using legacy perf. control.\n"));
+
+	return status;
+}
+
+void acpi_processor_set_pdc(struct acpi_processor *pr)
+{
+	arch_acpi_processor_init_pdc(pr);
+	acpi_processor_eval_pdc(pr);
+	arch_acpi_processor_cleanup_pdc(pr);
+}
+EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
+
+static acpi_status
+early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	struct acpi_processor pr;
+
+	pr.handle = handle;
+
+	/* x86 implementation looks at pr.id to determine some
+	 * CPU capabilites. We can just hard code to 0 since we're
+	 * assuming the CPUs in the system are homogenous and all
+	 * have the same capabilities.
+	 */
+	pr.id = 0;
+
+	acpi_processor_set_pdc(&pr);
+
+	return AE_OK;
+}
+
+void acpi_early_processor_set_pdc(void)
+{
+	/*
+	 * Check whether the system is DMI table. If yes, OSPM
+	 * should not use mwait for CPU-states.
+	 */
+	dmi_check_system(processor_idle_dmi_table);
+
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+			    ACPI_UINT32_MAX,
+			    early_init_pdc, NULL, NULL, NULL);
+}
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 29245c6b5c0e..a1b748a7a766 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -325,6 +325,9 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 
 #endif				/* CONFIG_CPU_FREQ */
 
+/* in processor_pdc.c */
+void acpi_processor_set_pdc(struct acpi_processor *pr);
+
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
 int acpi_processor_get_throttling_info(struct acpi_processor *pr);
-- 
cgit v1.2.3


From 6c5807d7bc7d051fce00863ffb98d36325501eb2 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:29 -0700
Subject: ACPI: processor: finish unifying arch_acpi_processor_init_pdc()

The only thing arch-specific about calling _PDC is what bits get
set in the input obj_list buffer.

There's no need for several levels of indirection to twiddle those
bits. Additionally, since we're just messing around with a buffer,
we can simplify the interface; no need to pass around the entire
struct acpi_processor * just to get at the buffer.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/include/asm/acpi.h      |  4 ++++
 arch/ia64/kernel/acpi-processor.c | 18 ------------------
 arch/x86/include/asm/acpi.h       | 19 +++++++++++++++++++
 arch/x86/kernel/acpi/processor.c  | 34 ----------------------------------
 drivers/acpi/processor_pdc.c      |  6 +++---
 include/acpi/processor.h          |  1 -
 6 files changed, 26 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 3b7888294648..7ae58892ba8d 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -133,6 +133,10 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
 static inline bool arch_has_acpi_pdc(void) { return true; }
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+}
 
 #define acpi_unlazy_tlb(x)
 
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
index ebe23f58bd6e..7ba5accebf66 100644
--- a/arch/ia64/kernel/acpi-processor.c
+++ b/arch/ia64/kernel/acpi-processor.c
@@ -14,24 +14,6 @@
 #include <acpi/processor.h>
 #include <asm/acpi.h>
 
-static void init_intel_pdc(struct acpi_processor *pr)
-{
-	u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer;
-
-	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
-
-	return;
-}
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
-	init_intel_pdc(pr);
-	return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
-
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
 {
 	if (pr->pdc) {
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index d787e6e92bd1..56f462cf22d2 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -149,6 +149,25 @@ static inline bool arch_has_acpi_pdc(void)
 		c->x86_vendor == X86_VENDOR_CENTAUR);
 }
 
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+
+	if (cpu_has(c, X86_FEATURE_EST))
+		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+
+	if (cpu_has(c, X86_FEATURE_ACPI))
+		buf[2] |= ACPI_PDC_T_FFH;
+
+	/*
+	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+	 */
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+}
+
 #else /* !CONFIG_ACPI */
 
 #define acpi_lapic 0
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index d722ca8cb4c7..0f57307f8224 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -12,40 +12,6 @@
 #include <acpi/processor.h>
 #include <asm/acpi.h>
 
-static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
-{
-	u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer;
-
-	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
-
-	if (cpu_has(c, X86_FEATURE_EST))
-		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
-
-	if (cpu_has(c, X86_FEATURE_ACPI))
-		buf[2] |= ACPI_PDC_T_FFH;
-
-	/*
-	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
-	 */
-	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
-
-	return;
-}
-
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
-	struct cpuinfo_x86 *c = &cpu_data(pr->id);
-
-	init_intel_pdc(pr, c);
-
-	return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
-
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
 {
 	if (pr->pdc) {
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index ccda7c95d073..48df08ebcec4 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -40,6 +40,9 @@ static void acpi_set_pdc_bits(u32 *buf)
 
 	/* Enable coordination with firmware's _TSD info */
 	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
+
+	/* Twiddle arch-specific bits needed for _PDC */
+	arch_acpi_set_pdc_bits(buf);
 }
 
 static void acpi_processor_init_pdc(struct acpi_processor *pr)
@@ -81,9 +84,6 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr)
 	obj_list->pointer = obj;
 	pr->pdc = obj_list;
 
-	/* Now let the arch do the bit-twiddling to buf[] */
-	arch_acpi_processor_init_pdc(pr);
-
 	return;
 }
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index a1b748a7a766..50edd734aec6 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -257,7 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module);
 DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
 
 #ifdef ARCH_HAS_POWER_INIT
-- 
cgit v1.2.3


From 47817254b8637b56730aec26eed2c337d3938bb5 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:34 -0700
Subject: ACPI: processor: unify arch_acpi_processor_cleanup_pdc

The x86 and ia64 implementations of the function in $subject are
exactly the same.

Also, since the arch-specific implementations of setting _PDC have
been completely hollowed out, remove the empty shells.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/kernel/Makefile         |  4 ----
 arch/ia64/kernel/acpi-processor.c | 27 ---------------------------
 arch/x86/kernel/acpi/Makefile     |  2 +-
 arch/x86/kernel/acpi/processor.c  | 25 -------------------------
 drivers/acpi/processor_pdc.c      | 21 ++++++++++++++++++++-
 include/acpi/processor.h          |  2 --
 6 files changed, 21 insertions(+), 60 deletions(-)
 delete mode 100644 arch/ia64/kernel/acpi-processor.c
 delete mode 100644 arch/x86/kernel/acpi/processor.c

(limited to 'include')

diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 2a75e937ae8d..e1236349c99f 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -18,10 +18,6 @@ obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
 obj-$(CONFIG_IA64_HP_ZX1)	+= acpi-ext.o
 obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
 
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= acpi-processor.o
-endif
-
 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
deleted file mode 100644
index 7ba5accebf66..000000000000
--- a/arch/ia64/kernel/acpi-processor.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * arch/ia64/kernel/acpi-processor.c
- *
- * Copyright (C) 2005 Intel Corporation
- * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * 	- Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
-{
-	if (pr->pdc) {
-		kfree(pr->pdc->pointer->buffer.pointer);
-		kfree(pr->pdc->pointer);
-		kfree(pr->pdc);
-		pr->pdc = NULL;
-	}
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index fd5ca97a2ad5..6f35260bb3ef 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_rm.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= cstate.o processor.o
+obj-y				+= cstate.o
 endif
 
 $(obj)/wakeup_rm.o:    $(obj)/realmode/wakeup.bin
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
deleted file mode 100644
index 0f57307f8224..000000000000
--- a/arch/x86/kernel/acpi/processor.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2005 Intel Corporation
- * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * 	- Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
-{
-	if (pr->pdc) {
-		kfree(pr->pdc->pointer->buffer.pointer);
-		kfree(pr->pdc->pointer);
-		kfree(pr->pdc);
-		pr->pdc = NULL;
-	}
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 48df08ebcec4..e786e2ce1882 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright (C) 2005 Intel Corporation
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ *	Alex Chiang <achiang@hp.com>
+ *	- Unified x86/ia64 implementations
+ *	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *	- Added _PDC for platforms with Intel CPUs
+ */
 #include <linux/dmi.h>
 
 #include <acpi/acpi_drivers.h>
@@ -121,6 +130,16 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr)
 	return status;
 }
 
+static void acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+	if (pr->pdc) {
+		kfree(pr->pdc->pointer->buffer.pointer);
+		kfree(pr->pdc->pointer);
+		kfree(pr->pdc);
+		pr->pdc = NULL;
+	}
+}
+
 void acpi_processor_set_pdc(struct acpi_processor *pr)
 {
 	if (arch_has_acpi_pdc() == false)
@@ -128,7 +147,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr)
 
 	acpi_processor_init_pdc(pr);
 	acpi_processor_eval_pdc(pr);
-	arch_acpi_processor_cleanup_pdc(pr);
+	acpi_processor_cleanup_pdc(pr);
 }
 EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 50edd734aec6..0873cd57510c 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -257,8 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module);
 DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
-
 #ifdef ARCH_HAS_POWER_INIT
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu);
-- 
cgit v1.2.3


From 43bab25ced218385f7e6a076c2459ea008cfd2e1 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:23:16 -0700
Subject: ACPI: processor: change acpi_processor_set_pdc() interface

When calling _PDC, we really only need the handle to the processor
to call the method; we don't look at any other parts of the
struct acpi_processor * given to us.

In the early path, when we walk the namespace, we are given the
handle directly, so just pass it through to acpi_processor_set_pdc()
without stuffing it into a wasteful struct acpi_processor allocated
on the stack each time

This saves 2834 bytes of stack.

Update the interface accordingly.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_core.c |  2 +-
 drivers/acpi/processor_pdc.c  | 18 +++---------------
 include/acpi/processor.h      |  2 +-
 3 files changed, 5 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index a19a4ff962ea..9863c98c81ba 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -763,7 +763,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 	}
 
 	/* _PDC call should be done before doing anything else (if reqd.). */
-	acpi_processor_set_pdc(pr);
+	acpi_processor_set_pdc(pr->handle);
 
 #ifdef CONFIG_CPU_FREQ
 	acpi_processor_ppc_has_changed(pr, 0);
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index deeba22c932c..30e4dc0cdf30 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -125,7 +125,7 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 	return status;
 }
 
-void acpi_processor_set_pdc(struct acpi_processor *pr)
+void acpi_processor_set_pdc(acpi_handle handle)
 {
 	struct acpi_object_list *obj_list;
 
@@ -136,7 +136,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr)
 	if (!obj_list)
 		return;
 
-	acpi_processor_eval_pdc(pr->handle, obj_list);
+	acpi_processor_eval_pdc(handle, obj_list);
 
 	kfree(obj_list->pointer->buffer.pointer);
 	kfree(obj_list->pointer);
@@ -147,19 +147,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
 static acpi_status
 early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
-	struct acpi_processor pr;
-
-	pr.handle = handle;
-
-	/* x86 implementation looks at pr.id to determine some
-	 * CPU capabilites. We can just hard code to 0 since we're
-	 * assuming the CPUs in the system are homogenous and all
-	 * have the same capabilities.
-	 */
-	pr.id = 0;
-
-	acpi_processor_set_pdc(&pr);
-
+	acpi_processor_set_pdc(handle);
 	return AE_OK;
 }
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 0873cd57510c..7cc433d69932 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -323,7 +323,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 #endif				/* CONFIG_CPU_FREQ */
 
 /* in processor_pdc.c */
-void acpi_processor_set_pdc(struct acpi_processor *pr);
+void acpi_processor_set_pdc(acpi_handle handle);
 
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
-- 
cgit v1.2.3


From e59897fe443b5b0a71e135ef4020d1937c9f8901 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:23:21 -0700
Subject: ACPI: processor: remove _PDC object list from struct acpi_processor

When we call _PDC, we get a handle to the processor, allocate the
object list buffer as needed, and free it immediately after calling
_PDC.

There's no need to drag around this object list with us everywhere
else, so let's just get rid of it.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/processor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 7cc433d69932..0ea5ef4eb6a9 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -224,8 +224,6 @@ struct acpi_processor {
 	struct acpi_processor_throttling throttling;
 	struct acpi_processor_limit limit;
 	struct thermal_cooling_device *cdev;
-	/* the _PDC objects for this processor, if any */
-	struct acpi_object_list *pdc;
 };
 
 struct acpi_processor_errata {
-- 
cgit v1.2.3


From 482928d59db668b8d82a48717f78986d8cea72e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:10:39 -0500
Subject: Fix f_flags/f_mode in case of lookup_instantiate_filp() from
 open(pathname, 3)

Just set f_flags when shoving struct file into nameidata; don't
postpone that until __dentry_open().  do_filp_open() has correct
value; lookup_instantiate_filp() doesn't - we lose the difference
between O_RDWR and 3 by that point.

We still set .intent.open.flags, so no fs code needs to be changed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         |  7 +++++++
 fs/namei.c            |  6 ++++--
 fs/open.c             | 13 ++++++-------
 include/linux/namei.h |  2 --
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/internal.h b/fs/internal.h
index f67cd141d9a8..e96a1667d749 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,3 +85,10 @@ extern struct file *get_empty_filp(void);
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+
+/*
+ * open.c
+ */
+struct nameidata;
+extern struct file *nameidata_to_filp(struct nameidata *);
+extern void release_open_intent(struct nameidata *);
diff --git a/fs/namei.c b/fs/namei.c
index dad4b80257db..d517f73aa36b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1640,6 +1640,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		if (filp == NULL)
 			return ERR_PTR(-ENFILE);
 		nd.intent.open.file = filp;
+		filp->f_flags = open_flag;
 		nd.intent.open.flags = flag;
 		nd.intent.open.create_mode = 0;
 		error = do_path_lookup(dfd, pathname,
@@ -1685,6 +1686,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (filp == NULL)
 		goto exit_parent;
 	nd.intent.open.file = filp;
+	filp->f_flags = open_flag;
 	nd.intent.open.flags = flag;
 	nd.intent.open.create_mode = mode;
 	dir = nd.path.dentry;
@@ -1725,7 +1727,7 @@ do_last:
 			mnt_drop_write(nd.path.mnt);
 			goto exit;
 		}
-		filp = nameidata_to_filp(&nd, open_flag);
+		filp = nameidata_to_filp(&nd);
 		mnt_drop_write(nd.path.mnt);
 		if (nd.root.mnt)
 			path_put(&nd.root);
@@ -1789,7 +1791,7 @@ ok:
 			mnt_drop_write(nd.path.mnt);
 		goto exit;
 	}
-	filp = nameidata_to_filp(&nd, open_flag);
+	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
 			       (MAY_READ | MAY_WRITE | MAY_EXEC));
diff --git a/fs/open.c b/fs/open.c
index ca69241796bd..6daee28f6e8f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -821,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					int flags, struct file *f,
+					struct file *f,
 					int (*open)(struct inode *, struct file *),
 					const struct cred *cred)
 {
 	struct inode *inode;
 	int error;
 
-	f->f_flags = flags;
-	f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
@@ -930,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
 	if (IS_ERR(dentry))
 		goto out_err;
 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
-					     nd->intent.open.flags - 1,
 					     nd->intent.open.file,
 					     open, cred);
 out:
@@ -949,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
  *
  * Note that this function destroys the original nameidata
  */
-struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+struct file *nameidata_to_filp(struct nameidata *nd)
 {
 	const struct cred *cred = current_cred();
 	struct file *filp;
@@ -958,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL)
-		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
+		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
 				     NULL, cred);
 	else
 		path_put(&nd->path);
@@ -997,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 		return ERR_PTR(error);
 	}
 
-	return __dentry_open(dentry, mnt, flags, f, NULL, cred);
+	f->f_flags = flags;
+	return __dentry_open(dentry, mnt, f, NULL, cred);
 }
 EXPORT_SYMBOL(dentry_open);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 028946750289..05b441d93642 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,8 +72,6 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
-extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
-extern void release_open_intent(struct nameidata *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
-- 
cgit v1.2.3


From 5300990c0370e804e49d9a59d928c5d53fb73487 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:15:07 -0500
Subject: Sanitize f_flags helpers

* pull ACC_MODE to fs.h; we have several copies all over the place
* nightmarish expression calculating f_mode by f_flags deserves a helper
too (OPEN_FMODE(flags))

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c       | 10 +---------
 fs/namei.c             |  2 --
 fs/open.c              |  2 +-
 include/linux/fs.h     |  3 +++
 kernel/auditsc.c       |  1 -
 security/tomoyo/file.c |  1 -
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 598237e97221..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -89,19 +89,11 @@ struct file *anon_inode_getfile(const char *name,
 	struct qstr this;
 	struct path path;
 	struct file *file;
-	fmode_t mode;
 	int error;
 
 	if (IS_ERR(anon_inode_inode))
 		return ERR_PTR(-ENODEV);
 
-	switch (flags & O_ACCMODE) {
-	case O_RDONLY: mode = FMODE_READ;		break;
-	case O_WRONLY: mode = FMODE_WRITE;		break;
-	case O_RDWR:   mode = FMODE_READ | FMODE_WRITE;	break;
-	default:       return ERR_PTR(-EINVAL);
-	}
-
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -129,7 +121,7 @@ struct file *anon_inode_getfile(const char *name,
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(&path, mode, fops);
+	file = alloc_file(&path, OPEN_FMODE(flags), fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
diff --git a/fs/namei.c b/fs/namei.c
index d517f73aa36b..68921d9b5302 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -37,8 +37,6 @@
 
 #include "internal.h"
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
-
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
diff --git a/fs/open.c b/fs/open.c
index 6daee28f6e8f..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -828,7 +828,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	struct inode *inode;
 	int error;
 
-	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cca191933ff6..9e13b533aaef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2464,5 +2464,8 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f0198..fc0f928167e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
 #endif
 };
 
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 static inline int open_arg(int flags, int mask)
 {
 	int n = ACC_MODE(flags);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 8346938809b1..9a6c58881c0a 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -12,7 +12,6 @@
 #include "common.h"
 #include "tomoyo.h"
 #include "realpath.h"
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /*
  * tomoyo_globally_readable_file_entry is a structure which is used for holding
-- 
cgit v1.2.3


From 95ebc3a7930d5965b00bbedbf36bfd3eb9124d65 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 28 Oct 2009 01:46:33 +0100
Subject: Remove obsolete comment in fs.h

This question was determined to be a bug which was fixed in
commit 4a3b0a49.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Jan Blunck <jblunck@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9e13b533aaef..7e3012e0ac06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1624,8 +1624,6 @@ struct super_operations {
  *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
- * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
- *    I_CLEAR?  If not, why?
  */
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
-- 
cgit v1.2.3


From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:26 -0800
Subject: kfifo: move struct kfifo in place

This is a new generic kernel FIFO implementation.

The current kernel fifo API is not very widely used, because it has to
many constrains.  Only 17 files in the current 2.6.31-rc5 used it.
FIFO's are like list's a very basic thing and a kfifo API which handles
the most use case would save a lot of development time and memory
resources.

I think this are the reasons why kfifo is not in use:

 - The API is to simple, important functions are missing
 - A fifo can be only allocated dynamically
 - There is a requirement of a spinlock whether you need it or not
 - There is no support for data records inside a fifo

So I decided to extend the kfifo in a more generic way without blowing up
the API to much.  The new API has the following benefits:

 - Generic usage: For kernel internal use and/or device driver.
 - Provide an API for the most use case.
 - Slim API: The whole API provides 25 functions.
 - Linux style habit.
 - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros
 - Direct copy_to_user from the fifo and copy_from_user into the fifo.
 - The kfifo itself is an in place member of the using data structure, this save an
   indirection access and does not waste the kernel allocator.
 - Lockless access: if only one reader and one writer is active on the fifo,
   which is the common use case, no additional locking is necessary.
 - Remove spinlock - give the user the freedom of choice what kind of locking to use if
   one is required.
 - Ability to handle records. Three type of records are supported:
   - Variable length records between 0-255 bytes, with a record size
     field of 1 bytes.
   - Variable length records between 0-65535 bytes, with a record size
     field of 2 bytes.
   - Fixed size records, which no record size field.
 - Preserve memory resource.
 - Performance!
 - Easy to use!

This patch:

Since most users want to have the kfifo as part of another object,
reorganize the code to allow including struct kfifo in another data
structure.  This requires changing the kfifo_alloc and kfifo_init
prototypes so that we pass an existing kfifo pointer into them.  This
patch changes the implementation and all existing users.

[akpm@linux-foundation.org: fix warning]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 21 +++++-----
 drivers/char/sonypi.c                       | 40 +++++++++---------
 drivers/infiniband/hw/cxgb3/cxio_hal.h      |  9 ++--
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 60 +++++++++++++-------------
 drivers/media/video/meye.c                  | 48 ++++++++++-----------
 drivers/media/video/meye.h                  |  4 +-
 drivers/net/wireless/libertas/cmd.c         |  4 +-
 drivers/net/wireless/libertas/dev.h         |  4 +-
 drivers/net/wireless/libertas/main.c        | 16 ++++---
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++----
 drivers/platform/x86/sony-laptop.c          | 46 ++++++++++----------
 drivers/scsi/libiscsi.c                     | 22 ++++------
 drivers/scsi/libiscsi_tcp.c                 | 29 +++++++------
 drivers/scsi/libsrp.c                       | 13 +++---
 drivers/usb/host/fhci-sched.c               | 10 ++---
 drivers/usb/host/fhci-tds.c                 | 35 ++++++++--------
 drivers/usb/host/fhci.h                     | 10 ++---
 drivers/usb/serial/usb-serial.c             |  5 +--
 include/linux/kfifo.h                       | 11 ++---
 include/scsi/libiscsi.h                     |  3 +-
 include/scsi/libiscsi_tcp.h                 |  2 +-
 include/scsi/libsrp.h                       |  2 +-
 kernel/kfifo.c                              | 65 +++++++++++++++--------------
 net/dccp/probe.c                            | 20 ++++-----
 24 files changed, 238 insertions(+), 259 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index d3400b20444f..0f39bec28b45 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -358,7 +358,7 @@ struct port {
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
-	struct kfifo *fifo_ul;
+	struct kfifo fifo_ul;
 	void __iomem *dl_addr[2];
 	u32 dl_size[2];
 	u8 toggle_dl;
@@ -685,8 +685,8 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			dc->port[i].fifo_ul =
-			    kfifo_alloc(FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+			kfifo_alloc(&dc->port[i].fifo_ul,
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(port->fifo_ul, dc->send_buf,
+	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(dc->port[port].fifo_ul)) {
+		if (__kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(dc->port[port].fifo_ul));
+				__kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1536,8 +1536,7 @@ static void __devexit nozomi_card_exit(struct pci_dev *pdev)
 	free_irq(pdev->irq, dc);
 
 	for (i = 0; i < MAX_PORT; i++)
-		if (dc->port[i].fifo_ul)
-			kfifo_free(dc->port[i].fifo_ul);
+		kfifo_free(&dc->port[i].fifo_ul);
 
 	kfree(dc->send_buf);
 
@@ -1673,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(port->fifo_ul, (unsigned char *)buffer, count);
+	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1721,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
+	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1878,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(port->fifo_ul);
+	rval = __kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 8c262aaf7c26..9e6efb1f029f 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -487,7 +487,7 @@ static struct sonypi_device {
 	int camera_power;
 	int bluetooth_power;
 	struct mutex lock;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	wait_queue_head_t fifo_proc_list;
 	struct fasync_struct *fifo_async;
@@ -496,7 +496,7 @@ static struct sonypi_device {
 	struct input_dev *input_jog_dev;
 	struct input_dev *input_key_dev;
 	struct work_struct input_work;
-	struct kfifo *input_fifo;
+	struct kfifo input_fifo;
 	spinlock_t input_fifo_lock;
 } sonypi_device;
 
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -827,7 +827,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(sonypi_device.input_fifo,
+		kfifo_put(&sonypi_device.input_fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 		schedule_work(&sonypi_device.input_work);
 	}
@@ -880,7 +880,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -906,7 +906,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	mutex_lock(&sonypi_device.lock);
 	/* Flush input queue on first open */
 	if (!sonypi_device.open_count)
-		kfifo_reset(sonypi_device.fifo);
+		kfifo_reset(&sonypi_device.fifo);
 	sonypi_device.open_count++;
 	mutex_unlock(&sonypi_device.lock);
 	unlock_kernel();
@@ -919,17 +919,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_device.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_device.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_device.fifo_proc_list,
-				       kfifo_len(sonypi_device.fifo) != 0);
+				       kfifo_len(&sonypi_device.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -946,7 +946,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_device.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_device.fifo))
+	if (kfifo_len(&sonypi_device.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -1313,11 +1313,11 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_device.fifo_lock);
-	if (IS_ERR(sonypi_device.fifo)) {
+	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_device.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_device.fifo_proc_list);
@@ -1393,12 +1393,10 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		}
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
-		sonypi_device.input_fifo =
-			kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
-				    &sonypi_device.input_fifo_lock);
-		if (IS_ERR(sonypi_device.input_fifo)) {
+		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
+				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-			error = PTR_ERR(sonypi_device.input_fifo);
 			goto err_inpdev_unregister;
 		}
 
@@ -1423,7 +1421,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		pci_disable_device(pcidev);
  err_put_pcidev:
 	pci_dev_put(pcidev);
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return error;
 }
@@ -1438,7 +1436,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
 		input_unregister_device(sonypi_device.input_jog_dev);
-		kfifo_free(sonypi_device.input_fifo);
+		kfifo_free(&sonypi_device.input_fifo);
 	}
 
 	misc_deregister(&sonypi_misc_device);
@@ -1451,7 +1449,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 		pci_dev_put(sonypi_device.dev);
 	}
 
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index bfd03bf8be54..f3d440cc68f2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -34,6 +34,7 @@
 
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/kfifo.h>
 
 #include "t3_cpl.h"
 #include "t3cdev.h"
@@ -75,13 +76,13 @@ struct cxio_hal_ctrl_qp {
 };
 
 struct cxio_hal_resource {
-	struct kfifo *tpt_fifo;
+	struct kfifo tpt_fifo;
 	spinlock_t tpt_fifo_lock;
-	struct kfifo *qpid_fifo;
+	struct kfifo qpid_fifo;
 	spinlock_t qpid_fifo_lock;
-	struct kfifo *cqid_fifo;
+	struct kfifo cqid_fifo;
 	spinlock_t cqid_fifo_lock;
-	struct kfifo *pdid_fifo;
+	struct kfifo pdid_fifo;
 	spinlock_t pdid_fifo_lock;
 };
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index bd233c087653..65072bdfc1bf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -39,12 +39,12 @@
 #include "cxio_resource.h"
 #include "cxio_hal.h"
 
-static struct kfifo *rhdl_fifo;
+static struct kfifo rhdl_fifo;
 static spinlock_t rhdl_fifo_lock;
 
 #define RANDOM_SIZE 16
 
-static int __cxio_init_resource_fifo(struct kfifo **fifo,
+static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				   spinlock_t *fifo_lock,
 				   u32 nr, u32 skip_low,
 				   u32 skip_high,
@@ -55,12 +55,11 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	*fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
-	if (IS_ERR(*fifo))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -72,33 +71,33 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
 	return 0;
 }
 
-static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
 	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
 					  skip_high, 0));
 }
 
-static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+static int cxio_init_resource_fifo_random(struct kfifo *fifo,
 				   spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
@@ -113,15 +112,14 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
-	rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
 					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock);
-	if (IS_ERR(rdev_p->rscp->qpid_fifo))
+					      &rdev_p->rscp->qpid_fifo_lock))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(rdev_p->rscp->qpid_fifo,
+			__kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -134,7 +132,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
 
 void cxio_hal_destroy_rhdl_resource(void)
 {
-	kfifo_free(rhdl_fifo);
+	kfifo_free(&rhdl_fifo);
 }
 
 /* nr_* must be power of 2 */
@@ -167,11 +165,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
 		goto pdid_err;
 	return 0;
 pdid_err:
-	kfifo_free(rscp->cqid_fifo);
+	kfifo_free(&rscp->cqid_fifo);
 cqid_err:
-	kfifo_free(rscp->qpid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
 qpid_err:
-	kfifo_free(rscp->tpt_fifo);
+	kfifo_free(&rscp->tpt_fifo);
 tpt_err:
 	return -ENOMEM;
 }
@@ -195,17 +193,17 @@ static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -213,35 +211,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
 {
-	kfifo_free(rscp->tpt_fifo);
-	kfifo_free(rscp->cqid_fifo);
-	kfifo_free(rscp->qpid_fifo);
-	kfifo_free(rscp->pdid_fifo);
+	kfifo_free(&rscp->tpt_fifo);
+	kfifo_free(&rscp->cqid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
+	kfifo_free(&rscp->pdid_fifo);
 	kfree(rscp);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 6ffa64cd1c6d..dacbbb839b9e 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,7 +820,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -831,7 +831,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -859,8 +859,8 @@ static int meye_open(struct file *file)
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 	return 0;
 }
 
@@ -933,7 +933,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +965,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1452,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1467,18 +1467,18 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 
 	mutex_lock(&meye.lock);
 
-	if (kfifo_len(meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
+	if (kfifo_len(&meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
 		mutex_unlock(&meye.lock);
 		return -EAGAIN;
 	}
 
 	if (wait_event_interruptible(meye.proc_list,
-				     kfifo_len(meye.doneq) != 0) < 0) {
+				     kfifo_len(&meye.doneq) != 0) < 0) {
 		mutex_unlock(&meye.lock);
 		return -EINTR;
 	}
 
-	if (!kfifo_get(meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int))) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
@@ -1529,8 +1529,8 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 {
 	mutex_lock(&meye.lock);
 	mchip_hic_stop();
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
@@ -1572,7 +1572,7 @@ static unsigned int meye_poll(struct file *file, poll_table *wait)
 
 	mutex_lock(&meye.lock);
 	poll_wait(file, &meye.proc_list, wait);
-	if (kfifo_len(meye.doneq))
+	if (kfifo_len(&meye.doneq))
 		res = POLLIN | POLLRDNORM;
 	mutex_unlock(&meye.lock);
 	return res;
@@ -1745,16 +1745,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	meye.grabq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock);
-	if (IS_ERR(meye.grabq)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.grabq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	meye.doneq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock);
-	if (IS_ERR(meye.doneq)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.doneq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
@@ -1868,9 +1866,9 @@ outregions:
 outenabledev:
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 outsonypienable:
-	kfifo_free(meye.doneq);
+	kfifo_free(&meye.doneq);
 outkfifoalloc2:
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.grabq);
 outkfifoalloc1:
 	vfree(meye.grab_temp);
 outvmalloc:
@@ -1901,8 +1899,8 @@ static void __devexit meye_remove(struct pci_dev *pcidev)
 
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 
-	kfifo_free(meye.doneq);
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.doneq);
+	kfifo_free(&meye.grabq);
 
 	vfree(meye.grab_temp);
 
diff --git a/drivers/media/video/meye.h b/drivers/media/video/meye.h
index 5f70a106ba2b..1321ad5d6597 100644
--- a/drivers/media/video/meye.h
+++ b/drivers/media/video/meye.h
@@ -303,9 +303,9 @@ struct meye {
 	struct meye_grab_buffer grab_buffer[MEYE_MAX_BUFNBRS];
 	int vma_use_count[MEYE_MAX_BUFNBRS]; /* mmap count */
 	struct mutex lock;		/* mutex for open/mmap... */
-	struct kfifo *grabq;		/* queue for buffers to be grabbed */
+	struct kfifo grabq;		/* queue for buffers to be grabbed */
 	spinlock_t grabq_lock;		/* lock protecting the queue */
-	struct kfifo *doneq;		/* queue for grabbed buffers */
+	struct kfifo doneq;		/* queue for grabbed buffers */
 	spinlock_t doneq_lock;		/* lock protecting the queue */
 	wait_queue_head_t proc_list;	/* wait queue */
 	struct video_device *video_dev;	/* video device parameters */
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index b9b371bfa30f..ffed17f4f506 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h
index 6a8d2b291d8c..05bb298dfae9 100644
--- a/drivers/net/wireless/libertas/dev.h
+++ b/drivers/net/wireless/libertas/dev.h
@@ -10,7 +10,7 @@
 #include "scan.h"
 #include "assoc.h"
 
-
+#include <linux/kfifo.h>
 
 /** sleep_params */
 struct sleep_params {
@@ -120,7 +120,7 @@ struct lbs_private {
 	u32 resp_len[2];
 
 	/* Events sent from hardware to driver */
-	struct kfifo *event_fifo;
+	struct kfifo event_fifo;
 
 	/** thread to service interrupts */
 	struct task_struct *main_thread;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index db38a5a719fa..403909287414 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(priv->event_fifo))
+		else if (__kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(priv->event_fifo)) {
+		while (__kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(priv->event_fifo, (unsigned char *) &event,
+			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -883,10 +883,9 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	priv->event_fifo = kfifo_alloc(sizeof(u32) * 16, GFP_KERNEL, NULL);
-	if (IS_ERR(priv->event_fifo)) {
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -901,8 +900,7 @@ static void lbs_free_adapter(struct lbs_private *priv)
 	lbs_deb_enter(LBS_DEB_MAIN);
 
 	lbs_free_cmd_buffer(priv);
-	if (priv->event_fifo)
-		kfifo_free(priv->event_fifo);
+	kfifo_free(&priv->event_fifo);
 	del_timer(&priv->command_timer);
 	del_timer(&priv->auto_deepsleep_timer);
 	kfree(priv->networks);
@@ -1177,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index bcd4ba8be7db..f999fba0e25e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -164,7 +164,7 @@ struct fujitsu_hotkey_t {
 	struct input_dev *input;
 	char phys[32];
 	struct platform_device *pf_device;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	int rfkill_supported;
 	int rfkill_state;
@@ -824,12 +824,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	fujitsu_hotkey->fifo =
-	    kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL,
-			&fujitsu_hotkey->fifo_lock);
-	if (IS_ERR(fujitsu_hotkey->fifo)) {
+	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
-		error = PTR_ERR(fujitsu_hotkey->fifo);
 		goto err_stop;
 	}
 
@@ -934,7 +932,7 @@ err_unregister_input_dev:
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 err_stop:
 	return result;
 }
@@ -956,7 +954,7 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
 
 	input_free_device(input);
 
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 
 	fujitsu_hotkey->acpi_handle = NULL;
 
@@ -1008,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(fujitsu_hotkey->fifo,
+				status = kfifo_put(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode));
 				if (status != sizeof(keycode)) {
@@ -1022,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			} else if (keycode == 0) {
 				while ((status =
 					kfifo_get
-					(fujitsu_hotkey->fifo, (unsigned char *)
+					(&fujitsu_hotkey->fifo, (unsigned char *)
 					 &keycode_r,
 					 sizeof
 					 (keycode_r))) == sizeof(keycode_r)) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 7a2cc8a5c975..04625a048e74 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -142,7 +142,7 @@ struct sony_laptop_input_s {
 	atomic_t		users;
 	struct input_dev	*jog_dev;
 	struct input_dev	*key_dev;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	struct workqueue_struct	*wq;
 };
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -362,7 +362,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(sony_laptop_input.fifo,
+		kfifo_put(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 
 		if (!work_pending(&sony_laptop_release_key_work))
@@ -385,12 +385,11 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
-	sony_laptop_input.fifo =
-		kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 			    &sony_laptop_input.fifo_lock);
-	if (IS_ERR(sony_laptop_input.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		error = PTR_ERR(sony_laptop_input.fifo);
 		goto err_dec_users;
 	}
 
@@ -474,7 +473,7 @@ err_destroy_wq:
 	destroy_workqueue(sony_laptop_input.wq);
 
 err_free_kfifo:
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 
 err_dec_users:
 	atomic_dec(&sony_laptop_input.users);
@@ -500,7 +499,7 @@ static void sony_laptop_remove_input(void)
 	}
 
 	destroy_workqueue(sony_laptop_input.wq);
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 }
 
 /*********** Platform Device ***********/
@@ -2079,7 +2078,7 @@ static struct attribute_group spic_attribute_group = {
 
 struct sonypi_compat_s {
 	struct fasync_struct	*fifo_async;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	wait_queue_head_t	fifo_proc_list;
 	atomic_t		open_count;
@@ -2104,12 +2103,12 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	/* Flush input queue on first open */
 	unsigned long flags;
 
-	spin_lock_irqsave(sonypi_compat.fifo->lock, flags);
+	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(sonypi_compat.fifo);
+		__kfifo_reset(&sonypi_compat.fifo);
 
-	spin_unlock_irqrestore(sonypi_compat.fifo->lock, flags);
+	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
 	return 0;
 }
@@ -2120,17 +2119,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_compat.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_compat.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_compat.fifo_proc_list,
-				       kfifo_len(sonypi_compat.fifo) != 0);
+				       kfifo_len(&sonypi_compat.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2147,7 +2146,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_compat.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_compat.fifo))
+	if (kfifo_len(&sonypi_compat.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -2309,7 +2308,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,11 +2318,12 @@ static int sonypi_compat_init(void)
 	int error;
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
-	sonypi_compat.fifo = kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_compat.fifo_lock);
-	if (IS_ERR(sonypi_compat.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_compat.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_compat.fifo_proc_list);
@@ -2342,14 +2342,14 @@ static int sonypi_compat_init(void)
 	return 0;
 
 err_free_kfifo:
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 	return error;
 }
 
 static void sonypi_compat_exit(void)
 {
 	misc_deregister(&sonypi_misc_device);
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 }
 #else
 static int sonypi_compat_init(void) { return 0; }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b7689f3d05f5..cf0aa7e90be9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
+	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(session->cmdpool.queue,
+		if (!__kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(conn->session->cmdpool.queue,
+	if (!__kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2461,12 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-			      GFP_KERNEL, NULL);
-	if (IS_ERR(q->queue)) {
-		q->queue = NULL;
-		goto enomem;
-	}
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
@@ -2474,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2497,7 +2492,6 @@ void iscsi_pool_free(struct iscsi_pool *q)
 	for (i = 0; i < q->max; i++)
 		kfree(q->pool[i]);
 	kfree(q->pool);
-	kfree(q->queue);
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
@@ -2825,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(session->cmdpool.queue,
+	if (!__kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2845,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2908,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index ca25ee5190b0..a83ee56a185e 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
+	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(tcp_task->r2tpool.queue,
+				__kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(tcp_task->r2tqueue,
+			__kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
@@ -1127,9 +1127,8 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 		}
 
 		/* R2T xmit queue */
-		tcp_task->r2tqueue = kfifo_alloc(
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
-		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
+		if (kfifo_alloc(&tcp_task->r2tqueue,
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
@@ -1142,7 +1141,7 @@ r2t_alloc_fail:
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 	return -ENOMEM;
@@ -1157,7 +1156,7 @@ void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 }
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 9ad38e81e343..b1b5e51ca8e3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,19 +58,16 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	q->queue = kfifo_init((void *) q->pool, max * sizeof(void *),
-			      GFP_KERNEL, &q->lock);
-	if (IS_ERR(q->queue))
-		goto free_item;
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
+			      &q->lock);
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(q->queue, (void *) &iue, sizeof(void *));
+		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
 	return 0;
 
-free_item:
 	kfree(q->items);
 free_pool:
 	kfree(q->pool);
@@ -167,7 +164,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -179,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index 00a29855d0c4..ff43747a614f 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -37,7 +37,7 @@ static void recycle_frame(struct fhci_usb *usb, struct packet *pkt)
 	pkt->info = 0;
 	pkt->priv_data = NULL;
 
-	cq_put(usb->ep0->empty_frame_Q, pkt);
+	cq_put(&usb->ep0->empty_frame_Q, pkt);
 }
 
 /* confirm submitted packet */
@@ -57,7 +57,7 @@ void fhci_transaction_confirm(struct fhci_usb *usb, struct packet *pkt)
 		if ((td->data + td->actual_len) && trans_len)
 			memcpy(td->data + td->actual_len, pkt->data,
 			       trans_len);
-		cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+		cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 	}
 
 	recycle_frame(usb, pkt);
@@ -213,7 +213,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 	}
 
 	/* update frame object fields before transmitting */
-	pkt = cq_get(usb->ep0->empty_frame_Q);
+	pkt = cq_get(&usb->ep0->empty_frame_Q);
 	if (!pkt) {
 		fhci_dbg(usb->fhci, "there is no empty frame\n");
 		return -1;
@@ -222,7 +222,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 
 	pkt->info = 0;
 	if (data == NULL) {
-		data = cq_get(usb->ep0->dummy_packets_Q);
+		data = cq_get(&usb->ep0->dummy_packets_Q);
 		BUG_ON(!data);
 		pkt->info = PKT_DUMMY_PACKET;
 	}
@@ -246,7 +246,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 		list_del_init(&td->frame_lh);
 		td->status = USB_TD_OK;
 		if (pkt->info & PKT_DUMMY_PACKET)
-			cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+			cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 		recycle_frame(usb, pkt);
 		usb->actual_frame->total_bytes -= (len + PROTOCOL_OVERHEAD);
 		fhci_err(usb->fhci, "host transaction failed\n");
diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index b40332290319..d224ab467a40 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -106,33 +106,33 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cpm_muram_free(cpm_muram_offset(ep->td_base));
 
 		if (ep->conf_frame_Q) {
-			size = cq_howmany(ep->conf_frame_Q);
+			size = cq_howmany(&ep->conf_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->conf_frame_Q);
+				struct packet *pkt = cq_get(&ep->conf_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->conf_frame_Q);
+			cq_delete(&ep->conf_frame_Q);
 		}
 
 		if (ep->empty_frame_Q) {
-			size = cq_howmany(ep->empty_frame_Q);
+			size = cq_howmany(&ep->empty_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->empty_frame_Q);
+				struct packet *pkt = cq_get(&ep->empty_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->empty_frame_Q);
+			cq_delete(&ep->empty_frame_Q);
 		}
 
 		if (ep->dummy_packets_Q) {
-			size = cq_howmany(ep->dummy_packets_Q);
+			size = cq_howmany(&ep->dummy_packets_Q);
 			for (; size; size--) {
-				u8 *buff = cq_get(ep->dummy_packets_Q);
+				u8 *buff = cq_get(&ep->dummy_packets_Q);
 
 				kfree(buff);
 			}
-			cq_delete(ep->dummy_packets_Q);
+			cq_delete(&ep->dummy_packets_Q);
 		}
 
 		kfree(ep);
@@ -175,10 +175,9 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 	ep->td_base = cpm_muram_addr(ep_offset);
 
 	/* zero all queue pointers */
-	ep->conf_frame_Q = cq_new(ring_len + 2);
-	ep->empty_frame_Q = cq_new(ring_len + 2);
-	ep->dummy_packets_Q = cq_new(ring_len + 2);
-	if (!ep->conf_frame_Q || !ep->empty_frame_Q || !ep->dummy_packets_Q) {
+	if (cq_new(&ep->conf_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->empty_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->dummy_packets_Q, ring_len + 2)) {
 		err_for = "frame_queues";
 		goto err;
 	}
@@ -199,8 +198,8 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 			err_for = "buffer";
 			goto err;
 		}
-		cq_put(ep->empty_frame_Q, pkt);
-		cq_put(ep->dummy_packets_Q, buff);
+		cq_put(&ep->empty_frame_Q, pkt);
+		cq_put(&ep->dummy_packets_Q, buff);
 	}
 
 	/* we put the endpoint parameter RAM right behind the TD ring */
@@ -319,7 +318,7 @@ static void fhci_td_transaction_confirm(struct fhci_usb *usb)
 		if ((buf == DUMMY2_BD_BUFFER) && !(td_status & ~TD_W))
 			continue;
 
-		pkt = cq_get(ep->conf_frame_Q);
+		pkt = cq_get(&ep->conf_frame_Q);
 		if (!pkt)
 			fhci_err(usb->fhci, "no frame to confirm\n");
 
@@ -460,9 +459,9 @@ u32 fhci_host_transaction(struct fhci_usb *usb,
 		out_be16(&td->length, pkt->len);
 
 	/* put the frame to the confirmation queue */
-	cq_put(ep->conf_frame_Q, pkt);
+	cq_put(&ep->conf_frame_Q, pkt);
 
-	if (cq_howmany(ep->conf_frame_Q) == 1)
+	if (cq_howmany(&ep->conf_frame_Q) == 1)
 		out_8(&usb->fhci->regs->usb_comm, USB_CMD_STR_FIFO);
 
 	return 0;
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 7116284ed21a..2277428ef5d3 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -423,9 +423,9 @@ struct endpoint {
 	struct usb_td __iomem *td_base; /* first TD in the ring */
 	struct usb_td __iomem *conf_td; /* next TD for confirm after transac */
 	struct usb_td __iomem *empty_td;/* next TD for new transaction req. */
-	struct kfifo *empty_frame_Q;  /* Empty frames list to use */
-	struct kfifo *conf_frame_Q;   /* frames passed to TDs,waiting for tx */
-	struct kfifo *dummy_packets_Q;/* dummy packets for the CRC overun */
+	struct kfifo empty_frame_Q;  /* Empty frames list to use */
+	struct kfifo conf_frame_Q;   /* frames passed to TDs,waiting for tx */
+	struct kfifo dummy_packets_Q;/* dummy packets for the CRC overun */
 
 	bool already_pushed_dummy_bd;
 };
@@ -493,9 +493,9 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 }
 
 /* fifo of pointers */
-static inline struct kfifo *cq_new(int size)
+static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 4543f359be75..44b72d47fac2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,9 +939,8 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
-			&port->lock);
-		if (IS_ERR(port->write_fifo))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
+			&port->lock))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index ad6bdf5a5970..c3f8d82efd34 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,10 +33,10 @@ struct kfifo {
 	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
-extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-				gfp_t gfp_mask, spinlock_t *lock);
-extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
-				 spinlock_t *lock);
+extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+			unsigned int size, spinlock_t *lock);
+extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
+			gfp_t gfp_mask, spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 7394e3bc8f4b..ff92b46f5153 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kfifo.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
 #include <scsi/scsi_transport_iscsi.h>
@@ -231,7 +232,7 @@ struct iscsi_conn {
 };
 
 struct iscsi_pool {
-	struct kfifo		*queue;		/* FIFO Queue */
+	struct kfifo		queue;		/* FIFO Queue */
 	void			**pool;		/* Pool of elements */
 	int			max;		/* Max number of elements */
 };
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 9e3182e659db..741ae7ed4394 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -80,7 +80,7 @@ struct iscsi_tcp_task {
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
 	struct iscsi_pool	r2tpool;
-	struct kfifo		*r2tqueue;
+	struct kfifo		r2tqueue;
 	void			*dd_data;
 };
 
diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index ba615e4c1d7c..07e3adde21d9 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -21,7 +21,7 @@ struct srp_buf {
 struct srp_queue {
 	void *pool;
 	void *items;
-	struct kfifo *queue;
+	struct kfifo queue;
 	spinlock_t lock;
 };
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bbe..8da6bb9782bb 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,49 +27,51 @@
 #include <linux/kfifo.h>
 #include <linux/log2.h>
 
+static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+		unsigned int size, spinlock_t *lock)
+{
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->lock = lock;
+
+	kfifo_reset(fifo);
+}
+
 /**
- * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * kfifo_init - initialize a FIFO using a preallocated buffer
+ * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
- * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
- * &struct kfifo with kfree().
  */
-struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 gfp_t gfp_mask, spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
+			spinlock_t *lock)
 {
-	struct kfifo *fifo;
-
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
-	if (!fifo)
-		return ERR_PTR(-ENOMEM);
-
-	fifo->buffer = buffer;
-	fifo->size = size;
-	fifo->in = fifo->out = 0;
-	fifo->lock = lock;
-
-	return fifo;
+	_kfifo_init(fifo, buffer, size, lock);
 }
 EXPORT_SYMBOL(kfifo_init);
 
 /**
- * kfifo_alloc - allocates a new FIFO and its internal buffer
- * @size: the size of the internal buffer to be allocated.
+ * kfifo_alloc - allocates a new FIFO internal buffer
+ * @fifo: the fifo to assign then new buffer
+ * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
+ * This function dynamically allocates a new fifo internal buffer
+ *
  * The size will be rounded-up to a power of 2.
+ * The buffer will be release with kfifo_free().
+ * Return 0 if no error, otherwise the an error code
  */
-struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
+			spinlock_t *lock)
 {
 	unsigned char *buffer;
-	struct kfifo *ret;
 
 	/*
 	 * round up to the next power of 2, since our 'let the indices
@@ -80,26 +83,24 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 	}
 
 	buffer = kmalloc(size, gfp_mask);
-	if (!buffer)
-		return ERR_PTR(-ENOMEM);
-
-	ret = kfifo_init(buffer, size, gfp_mask, lock);
+	if (!buffer) {
+		_kfifo_init(fifo, 0, 0, NULL);
+		return -ENOMEM;
+	}
 
-	if (IS_ERR(ret))
-		kfree(buffer);
+	_kfifo_init(fifo, buffer, size, lock);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_alloc);
 
 /**
- * kfifo_free - frees the FIFO
+ * kfifo_free - frees the FIFO internal buffer
  * @fifo: the fifo to be freed.
  */
 void kfifo_free(struct kfifo *fifo)
 {
 	kfree(fifo->buffer);
-	kfree(fifo);
 }
 EXPORT_SYMBOL(kfifo_free);
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index dc328425fa20..6230ceb0823e 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -43,7 +43,7 @@ static int bufsize = 64 * 1024;
 static const char procname[] = "dccpprobe";
 
 static struct {
-	struct kfifo	  *fifo;
+	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
 	struct timespec	  tstart;
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(dccpw.fifo, tbuf, len);
+	kfifo_put(&dccpw.fifo, tbuf, len);
 	wake_up(&dccpw.wait);
 }
 
@@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = {
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
-	kfifo_reset(dccpw.fifo);
+	kfifo_reset(&dccpw.fifo);
 	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
@@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(dccpw.fifo) != 0);
+					 __kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,10 +156,8 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
-	if (IS_ERR(dccpw.fifo))
-		return PTR_ERR(dccpw.fifo);
-
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
@@ -172,14 +170,14 @@ static __init int dccpprobe_init(void)
 err1:
 	proc_net_remove(&init_net, procname);
 err0:
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	return ret;
 }
 module_init(dccpprobe_init);
 
 static __exit void dccpprobe_exit(void)
 {
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	proc_net_remove(&init_net, procname);
 	unregister_jprobe(&dccp_send_probe);
 
-- 
cgit v1.2.3


From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:27 -0800
Subject: kfifo: move out spinlock

Move the pointer to the spinlock out of struct kfifo.  Most users in
tree do not actually use a spinlock, so the few exceptions now have to
call kfifo_{get,put}_locked, which takes an extra argument to a
spinlock.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  2 +-
 drivers/char/sonypi.c                       | 21 ++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 36 +++++++------
 drivers/media/video/meye.c                  | 35 +++++++------
 drivers/net/wireless/libertas/main.c        |  2 +-
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++---
 drivers/platform/x86/sony-laptop.c          | 22 ++++----
 drivers/scsi/libiscsi.c                     |  2 +-
 drivers/scsi/libiscsi_tcp.c                 |  2 +-
 drivers/scsi/libsrp.c                       |  9 ++--
 drivers/usb/host/fhci.h                     |  2 +-
 drivers/usb/serial/generic.c                |  4 +-
 drivers/usb/serial/usb-serial.c             |  3 +-
 include/linux/kfifo.h                       | 80 +++++++++++++----------------
 kernel/kfifo.c                              | 17 +++---
 net/dccp/probe.c                            |  6 +--
 16 files changed, 131 insertions(+), 130 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 0f39bec28b45..935b30d80adf 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -686,7 +686,7 @@ static int nozomi_read_config_table(struct nozomi *dc)
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
 			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 9e6efb1f029f..dbcb3bd192c7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,8 +777,9 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+			 sizeof(kp), &sonypi_device.input_fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -827,8 +828,9 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(&sonypi_device.input_fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sonypi_device.input_fifo,
+			(unsigned char *)&kp, sizeof(kp),
+			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
 	}
 }
@@ -880,7 +882,8 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -929,7 +932,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -1313,8 +1317,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_device.fifo_lock);
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 		return error;
@@ -1394,7 +1397,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
 		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
-				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+				GFP_KERNEL);
 		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 			goto err_inpdev_unregister;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 65072bdfc1bf..98f24e6d906e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -55,7 +55,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
@@ -86,7 +86,8 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock);
 	return 0;
 }
 
@@ -113,8 +114,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
 	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock))
+					      GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
@@ -177,33 +177,37 @@ tpt_err:
 /*
  * returns 0 if no resource available
  */
-static u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
 }
 
-static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
+		u32 entry)
 {
-	BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+	BUG_ON(
+	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	== 0);
 }
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
+			&rscp->qpid_fifo_lock);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -211,27 +215,27 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index dacbbb839b9e..38bcedfd9fec 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,8 +800,8 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
 		}
@@ -811,7 +811,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,8 +821,8 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
 		}
@@ -831,7 +832,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -933,7 +935,8 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +968,8 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1456,8 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1478,8 +1483,8 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
-		       sizeof(int))) {
+	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
 	}
@@ -1745,14 +1750,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 403909287414..2cc7ecd8d123 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -883,7 +883,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
 	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
 		goto out;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index f999fba0e25e..13dc7bedcfce 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -825,7 +825,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
 	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
-			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+			GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
 		goto err_stop;
@@ -1006,9 +1006,10 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(&fujitsu_hotkey->fifo,
+				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
-						   sizeof(keycode));
+						   sizeof(keycode),
+						   &fujitsu_hotkey->fifo_lock);
 				if (status != sizeof(keycode)) {
 					vdbg_printk(FUJLAPTOP_DBG_WARN,
 					    "Could not push keycode [0x%x]\n",
@@ -1019,11 +1020,12 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get
-					(&fujitsu_hotkey->fifo, (unsigned char *)
-					 &keycode_r,
-					 sizeof
-					 (keycode_r))) == sizeof(keycode_r)) {
+					kfifo_get_locked(
+					 &fujitsu_hotkey->fifo,
+					 (unsigned char *) &keycode_r,
+					 sizeof(keycode_r),
+					 &fujitsu_hotkey->fifo_lock))
+					 == sizeof(keycode_r)) {
 					input_report_key(input, keycode_r, 0);
 					input_sync(input);
 					vdbg_printk(FUJLAPTOP_DBG_TRACE,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 04625a048e74..1538a0a3c0af 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,8 +300,9 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+			sizeof(kp), &sony_laptop_input.fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -362,8 +363,9 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(&sony_laptop_input.fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sony_laptop_input.fifo,
+			  (unsigned char *)&kp, sizeof(kp),
+			  &sony_laptop_input.fifo_lock);
 
 		if (!work_pending(&sony_laptop_release_key_work))
 			queue_work(sony_laptop_input.wq,
@@ -386,8 +388,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
 	error =
-	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-			    &sony_laptop_input.fifo_lock);
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		goto err_dec_users;
@@ -2129,7 +2130,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2308,7 +2310,8 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,8 +2322,7 @@ static int sonypi_compat_init(void)
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
 	error =
-	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_compat.fifo_lock);
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		return error;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index cf0aa7e90be9..1bccbc1e588e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2461,7 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*));
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index a83ee56a185e..41643c860d26 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1128,7 +1128,7 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 
 		/* R2T xmit queue */
 		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index b1b5e51ca8e3..db1b41c55fd3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,8 +58,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
-			      &q->lock);
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
 		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
@@ -164,7 +163,8 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -176,7 +176,8 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 2277428ef5d3..a76da201183b 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -495,7 +495,7 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 /* fifo of pointers */
 static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index bbe005cefcfb..61eef18218be 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put(port->write_fifo, buf, count);
+	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 44b72d47fac2..636a4f23445e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,8 +939,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
-			&port->lock))
+		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c3f8d82efd34..e0f5c9d4197d 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -30,13 +30,12 @@ struct kfifo {
 	unsigned int size;	/* the size of the allocated buffer */
 	unsigned int in;	/* data is added at offset (in % size) */
 	unsigned int out;	/* data is extracted from off. (out % size) */
-	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-			unsigned int size, spinlock_t *lock);
+			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-			gfp_t gfp_mask, spinlock_t *lock);
+			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
@@ -58,58 +57,67 @@ static inline void __kfifo_reset(struct kfifo *fifo)
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
 	__kfifo_reset(fifo);
+}
+
+/**
+ * __kfifo_len - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline unsigned int __kfifo_len(struct kfifo *fifo)
+{
+	register unsigned int	out;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	out = fifo->out;
+	smp_rmb();
+	return fifo->in - out;
 }
 
 /**
- * kfifo_put - puts some data into the FIFO
+ * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
- * @len: the length of the data to be added.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, buffer, len);
+	ret = __kfifo_put(fifo, from, n);
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
 /**
- * kfifo_get - gets some data from the FIFO
+ * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
- * @len: the size of the destination buffer.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @lock: pointer to the spinlock to use for locking.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  */
-static inline unsigned int kfifo_get(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, buffer, len);
+	ret = __kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -118,36 +126,18 @@ static inline unsigned int kfifo_get(struct kfifo *fifo,
 	if (fifo->in == fifo->out)
 		fifo->in = fifo->out = 0;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
-/**
- * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
- * @fifo: the fifo to be used.
- */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
-{
-	return fifo->in - fifo->out;
-}
-
 /**
  * kfifo_len - returns the number of bytes available in the FIFO
  * @fifo: the fifo to be used.
  */
 static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
-	ret = __kfifo_len(fifo);
-
-	spin_unlock_irqrestore(fifo->lock, flags);
-
-	return ret;
+	return __kfifo_len(fifo);
 }
 
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 8da6bb9782bb..4950bdbe3477 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,11 +28,10 @@
 #include <linux/log2.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-		unsigned int size, spinlock_t *lock)
+		unsigned int size)
 {
 	fifo->buffer = buffer;
 	fifo->size = size;
-	fifo->lock = lock;
 
 	kfifo_reset(fifo);
 }
@@ -42,16 +41,14 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @lock: the lock to be used to protect the fifo buffer
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
-			spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 }
 EXPORT_SYMBOL(kfifo_init);
 
@@ -60,7 +57,6 @@ EXPORT_SYMBOL(kfifo_init);
  * @fifo: the fifo to assign then new buffer
  * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
  *
  * This function dynamically allocates a new fifo internal buffer
  *
@@ -68,8 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  * The buffer will be release with kfifo_free().
  * Return 0 if no error, otherwise the an error code
  */
-int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
-			spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 {
 	unsigned char *buffer;
 
@@ -84,11 +79,11 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0, NULL);
+		_kfifo_init(fifo, 0, 0);
 		return -ENOMEM;
 	}
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 
 	return 0;
 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 6230ceb0823e..c6b50351aa78 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(&dccpw.fifo, tbuf, len);
+	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
+	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,7 +156,7 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
-- 
cgit v1.2.3


From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: cleanup namespace

change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo
should be reserved for internal functions only.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 12 +++++------
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 10 ++++-----
 drivers/net/wireless/libertas/cmd.c         |  4 ++--
 drivers/net/wireless/libertas/main.c        |  8 ++++----
 drivers/platform/x86/sony-laptop.c          |  2 +-
 drivers/scsi/libiscsi.c                     | 14 ++++++-------
 drivers/scsi/libiscsi_tcp.c                 | 20 +++++++++---------
 drivers/scsi/libsrp.c                       |  2 +-
 drivers/usb/host/fhci.h                     |  6 +++---
 drivers/usb/serial/generic.c                |  4 ++--
 include/linux/kfifo.h                       | 32 +++++++----------------------
 kernel/kfifo.c                              | 12 +++++------
 net/dccp/probe.c                            |  2 +-
 13 files changed, 55 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 935b30d80adf..61f5bfe74f38 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(&dc->port[port].fifo_ul)) {
+		if (kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(&dc->port[port].fifo_ul));
+				kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1720,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
+	room = port->fifo_ul.size - kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1877,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(&port->fifo_ul);
+	rval = kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 98f24e6d906e..d7d18fb02c93 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,19 +71,19 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
 		kfifo_get_locked(fifo, (unsigned char *) &entry,
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index ffed17f4f506..42611bea76a3 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2cc7ecd8d123..0622104f0a03 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(&priv->event_fifo))
+		else if (kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(&priv->event_fifo)) {
+		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 1538a0a3c0af..36e5dc6fc953 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2107,7 +2107,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(&sonypi_compat.fifo);
+		kfifo_reset(&sonypi_compat.fifo);
 
 	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1bccbc1e588e..5f0c46f43ee1 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 41643c860d26..c0be926637b1 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
+	BUG_ON(kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(&tcp_task->r2tqueue,
+			kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index db1b41c55fd3..975e448cfcb9 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index a76da201183b..96aa787f208f 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -505,19 +505,19 @@ static inline void cq_delete(struct kfifo *kfifo)
 
 static inline unsigned int cq_howmany(struct kfifo *kfifo)
 {
-	return __kfifo_len(kfifo) / sizeof(void *);
+	return kfifo_len(kfifo) / sizeof(void *);
 }
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return __kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_put(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	__kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_get(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 61eef18218be..d0a2e464cacd 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (__kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
+		room = port->write_fifo->size - kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e0f5c9d4197d..a893acda3964 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -37,34 +37,25 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int __kfifo_put(struct kfifo *fifo,
+extern unsigned int kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
-extern unsigned int __kfifo_get(struct kfifo *fifo,
+extern unsigned int kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
-/**
- * __kfifo_reset - removes the entire FIFO contents, no locking version
- * @fifo: the fifo to be emptied.
- */
-static inline void __kfifo_reset(struct kfifo *fifo)
-{
-	fifo->in = fifo->out = 0;
-}
-
 /**
  * kfifo_reset - removes the entire FIFO contents
  * @fifo: the fifo to be emptied.
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	__kfifo_reset(fifo);
+	fifo->in = fifo->out = 0;
 }
 
 /**
- * __kfifo_len - returns the number of bytes available in the FIFO
+ * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
  */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
+static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
 	register unsigned int	out;
 
@@ -92,7 +83,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, from, n);
+	ret = kfifo_put(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -117,7 +108,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, to, n);
+	ret = kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -131,13 +122,4 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 	return ret;
 }
 
-/**
- * kfifo_len - returns the number of bytes available in the FIFO
- * @fifo: the fifo to be used.
- */
-static inline unsigned int kfifo_len(struct kfifo *fifo)
-{
-	return __kfifo_len(fifo);
-}
-
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4950bdbe3477..963ffde4af1a 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,7 +100,7 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * __kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_put - puts some data into the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(kfifo_free);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_put(struct kfifo *fifo,
+unsigned int kfifo_put(struct kfifo *fifo,
 			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -144,10 +144,10 @@ unsigned int __kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_put);
+EXPORT_SYMBOL(kfifo_put);
 
 /**
- * __kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_get - gets some data from the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(__kfifo_put);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_get(struct kfifo *fifo,
+unsigned int kfifo_get(struct kfifo *fifo,
 			 unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -190,4 +190,4 @@ unsigned int __kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_get);
+EXPORT_SYMBOL(kfifo_get);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index c6b50351aa78..9ef36849edd7 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(&dccpw.fifo) != 0);
+					 kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-- 
cgit v1.2.3


From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into
 kfifo_out...

rename kfifo_put...  into kfifo_in...  to prevent miss use of old non in
kernel-tree drivers

ditto for kfifo_get...  -> kfifo_out...

Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc
annotations more readable.

Add mini "howto porting to the new API" in kfifo.h

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  4 +--
 drivers/char/sonypi.c                       |  8 +++---
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 16 ++++++------
 drivers/media/video/meye.c                  | 16 ++++++------
 drivers/net/wireless/libertas/main.c        |  5 ++--
 drivers/platform/x86/fujitsu-laptop.c       |  4 +--
 drivers/platform/x86/sony-laptop.c          |  8 +++---
 drivers/scsi/libiscsi.c                     | 14 +++++------
 drivers/scsi/libiscsi_tcp.c                 | 18 ++++++-------
 drivers/scsi/libsrp.c                       |  6 ++---
 drivers/usb/host/fhci.h                     |  4 +--
 drivers/usb/serial/generic.c                |  4 +--
 include/linux/kfifo.h                       | 39 +++++++++++++++++++++--------
 kernel/kfifo.c                              | 32 +++++++++++------------
 net/dccp/probe.c                            |  4 +--
 15 files changed, 101 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 61f5bfe74f38..9ef243429014 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_out(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_in(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index dbcb3bd192c7..0798754a607c 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp), &sonypi_device.input_fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -828,7 +828,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sonypi_device.input_fifo,
+		kfifo_in_locked(&sonypi_device.input_fifo,
 			(unsigned char *)&kp, sizeof(kp),
 			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
@@ -882,7 +882,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_device.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -932,7 +932,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_device.fifo, &c, sizeof(c),
 				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index d7d18fb02c93..dcbf2606c438 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,22 +71,22 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get_locked(fifo, (unsigned char *) &entry,
+		kfifo_out_locked(fifo, (unsigned char *) &entry,
 				sizeof(u32), fifo_lock);
 	return 0;
 }
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_in(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -180,7 +180,7 @@ tpt_err:
 static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
+	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
@@ -190,7 +190,7 @@ static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
 		u32 entry)
 {
 	BUG_ON(
-	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
 	== 0);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 38bcedfd9fec..884a569d60a2 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
@@ -821,7 +821,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -832,7 +832,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
@@ -935,7 +935,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+	kfifo_in_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
 			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -968,7 +968,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
 				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
@@ -1456,7 +1456,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+	kfifo_in_locked(&meye.grabq, (unsigned char *)&buf->index,
 			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -1483,7 +1483,7 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_out_locked(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0622104f0a03..2bcfa745524a 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -513,7 +513,8 @@ static int lbs_thread(void *data)
 		spin_lock_irq(&priv->driver_lock);
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+
+			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1176,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_in(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 13dc7bedcfce..b66029bd75d0 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1006,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
+				status = kfifo_in_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode),
 						   &fujitsu_hotkey->fifo_lock);
@@ -1020,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get_locked(
+					kfifo_out_locked(
 					 &fujitsu_hotkey->fifo,
 					 (unsigned char *) &keycode_r,
 					 sizeof(keycode_r),
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 36e5dc6fc953..2896ca4cd9ab 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			sizeof(kp), &sony_laptop_input.fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -363,7 +363,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sony_laptop_input.fifo,
+		kfifo_in_locked(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp),
 			  &sony_laptop_input.fifo_lock);
 
@@ -2130,7 +2130,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_compat.fifo, &c, sizeof(c),
 			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
@@ -2310,7 +2310,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_compat.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5f0c46f43ee1..c28a712fd4db 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_in(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_out(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_out(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_in(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_out(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index c0be926637b1..d51ffeca2ec9 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_in(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_get(&tcp_task->r2tqueue,
+			kfifo_out(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 975e448cfcb9..8424b8606efb 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_in(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
@@ -163,7 +163,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+	kfifo_in_locked(&iue->target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 96aa787f208f..72dae1c5ab38 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -510,14 +510,14 @@ static inline unsigned int cq_howmany(struct kfifo *kfifo)
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_in(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_out(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d0a2e464cacd..b0f1183755c9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index a893acda3964..1b59c4a0e85f 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -19,6 +19,25 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
+
+/*
+ * Howto porting drivers to the new generic fifo API:
+ *
+ * - Modify the declaration of the "struct kfifo *" object into a
+ *   in-place "struct kfifo" object
+ * - Init the in-place object with kfifo_alloc() or kfifo_init()
+ *   Note: The address of the in-place "struct kfifo" object must be
+ *   passed as the first argument to this functions
+ * - Replace the use of __kfifo_put into kfifo_in and __kfifo_get
+ *   into kfifo_out
+ * - Replace the use of kfifo_put into kfifo_in_locked and kfifo_get
+ *   into kfifo_out_locked
+ *   Note: the spinlock pointer formerly passed to kfifo_init/kfifo_alloc
+ *   must be passed now to the kfifo_in_locked and kfifo_out_locked
+ *   as the last parameter.
+ * - All formerly name __kfifo_* functions has been renamed into kfifo_*
+ */
+
 #ifndef _LINUX_KFIFO_H
 #define _LINUX_KFIFO_H
 
@@ -37,10 +56,10 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len);
-extern unsigned int kfifo_get(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+				const unsigned char *from, unsigned int len);
+extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
+				unsigned char *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -65,7 +84,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 }
 
 /**
- * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
+ * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @from: the data to be added.
  * @n: the length of the data to be added.
@@ -75,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -83,7 +102,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_put(fifo, from, n);
+	ret = kfifo_in(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -91,7 +110,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 }
 
 /**
- * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
+ * kfifo_out_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @n: the size of the destination buffer.
@@ -100,7 +119,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
  * This function copies at most @len bytes from the FIFO into the
  * @to buffer and returns the number of copied bytes.
  */
-static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -108,7 +127,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_get(fifo, to, n);
+	ret = kfifo_out(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 963ffde4af1a..d659442e73f2 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,20 +100,20 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
+ * @from: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_put(struct kfifo *fifo,
-			const unsigned char *buffer, unsigned int len)
+unsigned int kfifo_in(struct kfifo *fifo,
+			const unsigned char *from, unsigned int len)
 {
 	unsigned int l;
 
@@ -128,10 +128,10 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	memcpy(fifo->buffer, buffer + l, len - l);
+	memcpy(fifo->buffer, from + l, len - l);
 
 	/*
 	 * Ensure that we add the bytes to the kfifo -before-
@@ -144,22 +144,22 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_put);
+EXPORT_SYMBOL(kfifo_in);
 
 /**
- * kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
+ * @to: where the data must be copied.
  * @len: the size of the destination buffer.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_get(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo,
+			 unsigned char *to, unsigned int len)
 {
 	unsigned int l;
 
@@ -174,10 +174,10 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	memcpy(buffer + l, fifo->buffer, len - l);
+	memcpy(to + l, fifo->buffer, len - l);
 
 	/*
 	 * Ensure that we remove the bytes from the kfifo -before-
@@ -190,4 +190,4 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_get);
+EXPORT_SYMBOL(kfifo_out);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ef36849edd7..a1362dc8abb0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
-- 
cgit v1.2.3


From 9842c38e917636fa7dc6b88aff17a8f1fd7f0cc0 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:29 -0800
Subject: kfifo: fix warn_unused_result

Fix the "ignoring return value of '...', declared with attribute
warn_unused_result" compiler warning in several users of the new kfifo
API.

It removes the __must_check attribute from kfifo_in() and
kfifo_in_locked() which must not necessary performed.

Fix the allocation bug in the nozomi driver file, by moving out the
kfifo_alloc from the interrupt handler into the probe function.

Fix the kfifo_out() and kfifo_out_locked() users to handle a unexpected
end of fifo.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 31 +++++++++++++++++++++++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c |  5 +++--
 drivers/media/video/meye.c                  |  5 +++--
 drivers/net/wireless/libertas/main.c        |  6 ++++--
 drivers/scsi/libiscsi_tcp.c                 |  9 +++++++--
 drivers/scsi/libsrp.c                       |  7 +++++--
 include/linux/kfifo.h                       |  4 ++--
 7 files changed, 51 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 9ef243429014..7d73cd430340 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -685,8 +685,6 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -1433,6 +1431,16 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 		goto err_free_sbuf;
 	}
 
+	for (i = PORT_MDM; i < MAX_PORT; i++) {
+		if (kfifo_alloc(&dc->port[i].fifo_ul,
+		      FIFO_BUFFER_SIZE_UL, GFP_ATOMIC)) {
+			dev_err(&pdev->dev,
+					"Could not allocate kfifo buffer\n");
+			ret = -ENOMEM;
+			goto err_free_kfifo;
+		}
+	}
+
 	spin_lock_init(&dc->spin_mutex);
 
 	nozomi_setup_private_data(dc);
@@ -1445,7 +1453,7 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 			NOZOMI_NAME, dc);
 	if (unlikely(ret)) {
 		dev_err(&pdev->dev, "can't request irq %d\n", pdev->irq);
-		goto err_free_sbuf;
+		goto err_free_kfifo;
 	}
 
 	DBG1("base_addr: %p", dc->base_addr);
@@ -1464,13 +1472,28 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 	dc->state = NOZOMI_STATE_ENABLED;
 
 	for (i = 0; i < MAX_PORT; i++) {
+		struct device *tty_dev;
+
 		mutex_init(&dc->port[i].tty_sem);
 		tty_port_init(&dc->port[i].port);
-		tty_register_device(ntty_driver, dc->index_start + i,
+		tty_dev = tty_register_device(ntty_driver, dc->index_start + i,
 							&pdev->dev);
+
+		if (IS_ERR(tty_dev)) {
+			ret = PTR_ERR(tty_dev);
+			dev_err(&pdev->dev, "Could not allocate tty?\n");
+			goto err_free_tty;
+		}
 	}
+
 	return 0;
 
+err_free_tty:
+	for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i)
+		tty_unregister_device(ntty_driver, i);
+err_free_kfifo:
+	for (i = 0; i < MAX_PORT; i++)
+		kfifo_free(&dc->port[i].fifo_ul);
 err_free_sbuf:
 	kfree(dc->send_buf);
 	iounmap(dc->base_addr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index dcbf2606c438..31f9201b2980 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -86,8 +86,9 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_out_locked(fifo, (unsigned char *) &entry,
-				sizeof(u32), fifo_lock);
+		if (kfifo_out_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock) != sizeof(u32))
+					break;
 	return 0;
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 884a569d60a2..b421858ccf90 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -968,8 +968,9 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
-				sizeof(int), &meye.doneq_lock);
+		if (kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock) != sizeof(int))
+					break;
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2bcfa745524a..c2975c8e2f21 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -514,8 +514,10 @@ static int lbs_thread(void *data)
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
 
-			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
-				sizeof(event));
+			if (kfifo_out(&priv->event_fifo,
+				(unsigned char *) &event, sizeof(event)) !=
+				sizeof(event))
+					break;
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
 			spin_lock_irq(&priv->driver_lock);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index d51ffeca2ec9..db6856c138fc 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -990,8 +990,13 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_out(&tcp_task->r2tqueue,
-				    (void *)&tcp_task->r2t, sizeof(void *));
+			if (kfifo_out(&tcp_task->r2tqueue,
+			    (void *)&tcp_task->r2t, sizeof(void *)) !=
+			    sizeof(void *)) {
+				WARN_ONCE(1, "unexpected fifo state");
+				r2t = NULL;
+			}
+
 			r2t = tcp_task->r2t;
 		}
 		spin_unlock_bh(&session->lock);
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 8424b8606efb..ab19b3b4be52 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -163,8 +163,11 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
-			sizeof(void *), &target->iu_queue.lock);
+	if (kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
+		sizeof(void *), &target->iu_queue.lock) != sizeof(void *)) {
+			WARN_ONCE(1, "unexpected fifo state");
+			return NULL;
+	}
 	if (!iue)
 		return iue;
 	iue->target = target;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 1b59c4a0e85f..5ed2565c89b6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -56,7 +56,7 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+extern unsigned int kfifo_in(struct kfifo *fifo,
 				const unsigned char *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				unsigned char *to, unsigned int len);
@@ -94,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
+static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
-- 
cgit v1.2.3


From 37bdfbbfaab47811fcec84dff23c4e8da1a09f9e Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:30 -0800
Subject: kfifo: add DEFINE_KFIFO and friends, add very tiny functions

Add DECLARE_KFIFO - macro to declare a kfifo and the associated buffer inside a struct
 Add INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
 Add DEFINE_KFIFO - macro to define and initialize a kfifo as a global or local object
 Add kfifo_size() - returns the size of the fifo in bytes
 Add kfifo_is_empty() - returns true if the fifo is empty
 Add kfifo_is_full() - returns true if the fifo is full
 Add kfifo_avail() - returns the number of bytes available in the FIFO
 Do some code cleanup

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 94 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 5ed2565c89b6..dd53eed3e2af 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -51,6 +51,60 @@ struct kfifo {
 	unsigned int out;	/* data is extracted from off. (out % size) */
 };
 
+/*
+ * Macros for declaration and initialization of the kfifo datatype
+ */
+
+/* helper macro */
+#define __kfifo_initializer(s, b) \
+	(struct kfifo) { \
+		.size	= s, \
+		.in	= 0, \
+		.out	= 0, \
+		.buffer = b \
+	}
+
+/**
+ * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used inside struct or union declaration
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DECLARE_KFIFO(name, size) \
+union { \
+	struct kfifo name; \
+	unsigned char name##kfifo_buffer[size + sizeof(struct kfifo)]; \
+}
+
+/**
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ */
+#define INIT_KFIFO(name) \
+	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
+				sizeof(struct kfifo), name##kfifo_buffer)
+
+/**
+ * DEFINE_KFIFO - macro to define and initialize a kfifo
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used for global and local kfifo data type variables
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DEFINE_KFIFO(name, size) \
+	unsigned char name##kfifo_buffer[size]; \
+	struct kfifo name = __kfifo_initializer(size, name##kfifo_buffer)
+
+#undef __kfifo_initializer
+
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
@@ -70,6 +124,15 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_size - returns the size of the fifo in bytes
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_size(struct kfifo *fifo)
+{
+	return fifo->size;
+}
+
 /**
  * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
@@ -83,6 +146,33 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 	return fifo->in - out;
 }
 
+/**
+ * kfifo_is_empty - returns true if the fifo is empty
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_empty(struct kfifo *fifo)
+{
+	return fifo->in == fifo->out;
+}
+
+/**
+ * kfifo_is_full - returns true if the fifo is full
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_full(struct kfifo *fifo)
+{
+	return kfifo_len(fifo) == kfifo_size(fifo);
+}
+
+/**
+ * kfifo_avail - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
+{
+	return kfifo_size(fifo) - kfifo_len(fifo);
+}
+
 /**
  * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
@@ -133,8 +223,8 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	 * optimization: if the FIFO is empty, set the indices to 0
 	 * so we don't wrap the next time
 	 */
-	if (fifo->in == fifo->out)
-		fifo->in = fifo->out = 0;
+	if (kfifo_is_empty(fifo))
+		kfifo_reset(fifo);
 
 	spin_unlock_irqrestore(lock, flags);
 
-- 
cgit v1.2.3


From a121f24accac1600bf5b6fb1e12eeabdfed7cb1a Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:31 -0800
Subject: kfifo: add kfifo_skip, kfifo_from_user and kfifo_to_user

Add kfifo_reset_out() for save lockless discard the fifo output
 Add kfifo_skip() to skip a number of output bytes
 Add kfifo_from_user() to copy user space data into the fifo
 Add kfifo_to_user() to copy fifo data to user space

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  47 +++++++++++++++++
 kernel/kfifo.c        | 139 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 170 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index dd53eed3e2af..d3230fb08bc7 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,6 +124,16 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_reset_out - skip FIFO contents
+ * @fifo: the fifo to be emptied.
+ */
+static inline void kfifo_reset_out(struct kfifo *fifo)
+{
+	smp_mb();
+	fifo->out = fifo->in;
+}
+
 /**
  * kfifo_size - returns the size of the fifo in bytes
  * @fifo: the fifo to be used.
@@ -231,4 +241,41 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	return ret;
 }
 
+extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
+
+extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n);
+
+extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n);
+
+/**
+ * __kfifo_add_out internal helper function for updating the out offset
+ */
+static inline void __kfifo_add_out(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_mb();
+	fifo->out += off;
+}
+
+/**
+ * __kfifo_add_in internal helper function for updating the in offset
+ */
+static inline void __kfifo_add_in(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_wmb();
+	fifo->in += off;
+}
+
+/**
+ * __kfifo_off internal helper function for calculating the index of a
+ * given offeset
+ */
+static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
+{
+	return off & (fifo->size - 1);
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index d659442e73f2..2a78425ef67f 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -26,6 +26,7 @@
 #include <linux/err.h>
 #include <linux/kfifo.h>
 #include <linux/log2.h>
+#include <linux/uaccess.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 		unsigned int size)
@@ -99,6 +100,21 @@ void kfifo_free(struct kfifo *fifo)
 }
 EXPORT_SYMBOL(kfifo_free);
 
+/**
+ * kfifo_skip - skip output data
+ * @fifo: the fifo to be used.
+ * @len: number of bytes to skip
+ */
+void kfifo_skip(struct kfifo *fifo, unsigned int len)
+{
+	if (len < kfifo_len(fifo)) {
+		__kfifo_add_out(fifo, len);
+		return;
+	}
+	kfifo_reset_out(fifo);
+}
+EXPORT_SYMBOL(kfifo_skip);
+
 /**
  * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
@@ -115,6 +131,7 @@ EXPORT_SYMBOL(kfifo_free);
 unsigned int kfifo_in(struct kfifo *fifo,
 			const unsigned char *from, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->size - fifo->in + fifo->out);
@@ -126,21 +143,16 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
+	off = __kfifo_off(fifo, fifo->in);
+
 	/* first put the data starting from fifo->in to buffer end */
-	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
+	l = min(len, fifo->size - off);
+	memcpy(fifo->buffer + off, from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
 
-	/*
-	 * Ensure that we add the bytes to the kfifo -before-
-	 * we update the fifo->in index.
-	 */
-
-	smp_wmb();
-
-	fifo->in += len;
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
@@ -161,6 +173,7 @@ EXPORT_SYMBOL(kfifo_in);
 unsigned int kfifo_out(struct kfifo *fifo,
 			 unsigned char *to, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->in - fifo->out);
@@ -172,22 +185,116 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
+	off = __kfifo_off(fifo, fifo->out);
+
 	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	l = min(len, fifo->size - off);
+	memcpy(to, fifo->buffer + off, l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
 
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
+
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
 	/*
-	 * Ensure that we remove the bytes from the kfifo -before-
-	 * we update the fifo->out index.
+	 * Ensure that we sample the fifo->out index -before- we
+	 * start putting bytes into the kfifo.
 	 */
 
 	smp_mb();
 
-	fifo->out += len;
+	off = __kfifo_off(fifo, fifo->in);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - off);
+	ret = copy_from_user(fifo->buffer + off, from, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
+EXPORT_SYMBOL(kfifo_from_user);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	ret = copy_to_user(to + l, fifo->buffer, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_to_user);
+
-- 
cgit v1.2.3


From 86d4880313603810901f639ccb5c88ff13d4ad3c Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:32 -0800
Subject: kfifo: add record handling functions

Add kfifo_in_rec() - puts some record data into the FIFO
 Add kfifo_out_rec() - gets some record data from the FIFO
 Add kfifo_from_user_rec() - puts some data from user space into the FIFO
 Add kfifo_to_user_rec() - gets data from the FIFO and write it to user space
 Add kfifo_peek_rec() - gets the size of the next FIFO record field
 Add kfifo_skip_rec() - skip the next fifo out record
 Add kfifo_avail_rec() - determinate the number of bytes available in a record FIFO

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kfifo.c        | 286 +++++++++++++++++++++++++++++--------------
 2 files changed, 523 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index d3230fb08bc7..486e8ad3bb50 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -278,4 +278,334 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
+/**
+ * __kfifo_peek_n internal helper function for determinate the length of
+ * the next record in the fifo
+ */
+static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
+				unsigned int recsize)
+{
+#define __KFIFO_GET(fifo, off, shift) \
+	((fifo)->buffer[__kfifo_off((fifo), (fifo)->out+(off))] << (shift))
+
+	unsigned int l;
+
+	l = __KFIFO_GET(fifo, 0, 0);
+
+	if (--recsize)
+		l |= __KFIFO_GET(fifo, 1, 8);
+
+	return l;
+#undef	__KFIFO_GET
+}
+
+/**
+ * __kfifo_poke_n internal helper function for storing the length of
+ * the next record into the fifo
+ */
+static inline void __kfifo_poke_n(struct kfifo *fifo,
+			unsigned int recsize, unsigned int n)
+{
+#define __KFIFO_PUT(fifo, off, val, shift) \
+		( \
+		(fifo)->buffer[__kfifo_off((fifo), (fifo)->in+(off))] = \
+		(unsigned char)((val) >> (shift)) \
+		)
+
+	__KFIFO_PUT(fifo, 0, n, 0);
+
+	if (--recsize)
+		__KFIFO_PUT(fifo, 1, n, 8);
+#undef	__KFIFO_PUT
+}
+
+/**
+ * __kfifo_in_... internal functions for put date into the fifo
+ * do not call it directly, use kfifo_in_rec() instead
+ */
+extern unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_in_rec(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_in_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_in_rec - puts some record data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the FIFO and returns
+ * the number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
+	void *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_in_generic(fifo, from, n, recsize);
+	return __kfifo_in_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_out_... internal functions for get date from the fifo
+ * do not call it directly, use kfifo_out_rec() instead
+ */
+extern unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int reclen, unsigned int recsize);
+
+extern unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int n,
+	unsigned int recsize, unsigned int *total);
+
+static inline unsigned int __kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_out_n(fifo, to, l, recsize);
+}
+
+/**
+ * kfifo_out_rec - gets some record data from the FIFO
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to @to and returns the
+ * number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_out_generic(fifo, to, n, recsize, total);
+	return __kfifo_out_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_from_user_... internal functions for transfer from user space into
+ * the fifo. do not call it directly, use kfifo_from_user_rec() instead
+ */
+extern unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_from_user_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_from_user_rec - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the
+ * FIFO and returns the number of bytes which cannot be copied.
+ *
+ * If the returned value is equal or less the @n value, the copy_from_user()
+ * functions has failed. Otherwise the record doesn't fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_from_user_generic(fifo, from, n, recsize);
+	return __kfifo_from_user_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_to_user_... internal functions for transfer fifo data into user space
+ * do not call it directly, use kfifo_to_user_rec() instead
+ */
+extern unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int reclen,
+	unsigned int recsize);
+
+extern unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int recsize,
+	unsigned int *total);
+
+static inline unsigned int __kfifo_to_user_rec(struct kfifo *fifo,
+	void __user *to, unsigned int n,
+	unsigned int recsize, unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_to_user_n(fifo, to, n, l, recsize);
+}
+
+/**
+ * kfifo_to_user_rec - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to the @to.
+ * In case of an error, the function returns the number of bytes which cannot
+ * be copied.
+ * If the returned value is equal or less the @n value, the copy_to_user()
+ * functions has failed. Otherwise the record doesn't fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
+		void __user *to, unsigned int n, unsigned int recsize,
+		unsigned int *total)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_to_user_generic(fifo, to, n, recsize, total);
+	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_peek_... internal functions for peek into the next fifo record
+ * do not call it directly, use kfifo_peek_rec() instead
+ */
+extern unsigned int __kfifo_peek_generic(struct kfifo *fifo,
+				unsigned int recsize);
+
+/**
+ * kfifo_peek_rec - gets the size of the next FIFO record data
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function returns the size of the next FIFO record in number of bytes
+ */
+static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_peek_generic(fifo, recsize);
+	if (!recsize)
+		return kfifo_len(fifo);
+	return __kfifo_peek_n(fifo, recsize);
+}
+
+/**
+ * __kfifo_skip_... internal functions for skip the next fifo record
+ * do not call it directly, use kfifo_skip_rec() instead
+ */
+extern void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize);
+
+static inline void __kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l;
+
+	if (recsize) {
+		l = __kfifo_peek_n(fifo, recsize);
+
+		if (l + recsize <= kfifo_len(fifo)) {
+			__kfifo_add_out(fifo, l + recsize);
+			return;
+		}
+	}
+	kfifo_reset_out(fifo);
+}
+
+/**
+ * kfifo_skip_rec - skip the next fifo out record
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function skips the next FIFO record
+ */
+static inline void kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		__kfifo_skip_generic(fifo, recsize);
+	else
+		__kfifo_skip_rec(fifo, recsize);
+}
+
+/**
+ * kfifo_avail_rec - returns the number of bytes available in a record FIFO
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ */
+static inline __must_check unsigned int kfifo_avail_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l = kfifo_size(fifo) - kfifo_len(fifo);
+
+	return (l > recsize) ? l - recsize : 0;
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 2a78425ef67f..e92d519f93b1 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -115,27 +115,11 @@ void kfifo_skip(struct kfifo *fifo, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_skip);
 
-/**
- * kfifo_in - puts some data into the FIFO
- * @fifo: the fifo to be used.
- * @from: the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from buffer into
- * the FIFO depending on the free space, and returns the number of
- * bytes copied.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_in(struct kfifo *fifo,
-			const unsigned char *from, unsigned int len)
+static inline void __kfifo_in_data(struct kfifo *fifo,
+		const void *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -143,7 +127,7 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
@@ -151,33 +135,13 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
-
-	__kfifo_add_in(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_in);
 
-/**
- * kfifo_out - gets some data from the FIFO
- * @fifo: the fifo to be used.
- * @to: where the data must be copied.
- * @len: the size of the destination buffer.
- *
- * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_out(struct kfifo *fifo,
-			 unsigned char *to, unsigned int len)
+static inline void __kfifo_out_data(struct kfifo *fifo,
+		void *to, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->in - fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->in index -before- we
 	 * start removing bytes from the kfifo.
@@ -185,7 +149,7 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
-	off = __kfifo_off(fifo, fifo->out);
+	off = __kfifo_off(fifo, fifo->out + off);
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
@@ -193,34 +157,14 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
-
-	__kfifo_add_out(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
 
-/**
- * kfifo_from_user - puts some data from user space into the FIFO
- * @fifo: the fifo to be used.
- * @from: pointer to the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 	int ret;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -228,29 +172,101 @@ unsigned int kfifo_from_user(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
 
 	if (unlikely(ret))
-		return l - ret;
+		return ret + len - l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	return copy_from_user(fifo->buffer, from + l, len - l);
+}
+
+static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off)
+{
+	unsigned int l;
+	int ret;
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out + off);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
 
 	if (unlikely(ret))
-		return len - ret;
+		return ret + len - l;
 
-	__kfifo_add_in(fifo, len);
+	/* then get the rest (if any) from the beginning of the buffer */
+	return copy_to_user(to + l, fifo->buffer, len - l);
+}
 
+unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
+
+	__kfifo_in_data(fifo, from, len, recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_in_n);
+
+/**
+ * kfifo_in - puts some data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+				unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+
+	__kfifo_in_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
 	return len;
 }
-EXPORT_SYMBOL(kfifo_from_user);
+EXPORT_SYMBOL(kfifo_in);
+
+unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_in_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_in_generic);
+
+unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_len(fifo) < len + recsize)
+		return len;
+
+	__kfifo_out_data(fifo, to, len, recsize);
+	__kfifo_add_out(fifo, len + recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_out_n);
 
 /**
- * kfifo_to_user - gets data from the FIFO and write it to user space
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -261,40 +277,124 @@ EXPORT_SYMBOL(kfifo_from_user);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
 {
-	unsigned int off;
-	unsigned int l;
-	int ret;
+	len = min(kfifo_len(fifo), len);
 
-	len = min(len, fifo->in - fifo->out);
+	__kfifo_out_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 
-	/*
-	 * Ensure that we sample the fifo->in index -before- we
-	 * start removing bytes from the kfifo.
-	 */
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
 
-	smp_rmb();
+unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_out_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_out_generic);
 
-	off = __kfifo_off(fifo, fifo->out);
+unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
 
-	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - off);
-	ret = copy_to_user(to, fifo->buffer + off, l);
+	return __kfifo_from_user_data(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_n);
 
-	if (unlikely(ret))
-		return l - ret;
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_from_user);
 
-	/* then get the rest (if any) from the beginning of the buffer */
-	ret = copy_to_user(to + l, fifo->buffer, len - l);
+unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_from_user_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_generic);
 
-	if (unlikely(ret))
-		return len - ret;
+unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int reclen,
+	unsigned int recsize)
+{
+	unsigned int ret;
 
-	__kfifo_add_out(fifo, len);
+	if (kfifo_len(fifo) < reclen + recsize)
+		return len;
 
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+
+	if (likely(ret == 0))
+		__kfifo_add_out(fifo, reclen + recsize);
+
+	return ret;
+}
+EXPORT_SYMBOL(__kfifo_to_user_n);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	len = min(kfifo_len(fifo), len);
+	len -= __kfifo_to_user_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 	return len;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
+unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_to_user_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_to_user_generic);
+
+unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	if (recsize == 0)
+		return kfifo_avail(fifo);
+
+	return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_peek_generic);
+
+void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	__kfifo_skip_rec(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_skip_generic);
+
-- 
cgit v1.2.3


From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 06:52:08 -0500
Subject: ext4, jbd2: Add barriers for file systems with exernal journals

This is a bit complicated because we are trying to optimize when we
send barriers to the fs data disk.  We could just throw in an extra
barrier to the data disk whenever we send a barrier to the journal
disk, but that's not always strictly necessary.

We only need to send a barrier during a commit when there are data
blocks which are must be written out due to an inode written in
ordered mode, or if fsync() depends on the commit to force data blocks
to disk.  Finally, before we drop transactions from the beginning of
the journal during a checkpoint operation, we need to guarantee that
any blocks that were flushed out to the data disk are firmly on the
rust platter before we drop the transaction from the journal.

Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c      | 16 ++++++++++++++--
 fs/jbd2/checkpoint.c | 15 +++++++++++++++
 fs/jbd2/commit.c     | 19 +++++++++++--------
 include/linux/jbd2.h |  1 +
 4 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		return ext4_force_commit(inode->i_sb);
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-	if (jbd2_log_start_commit(journal, commit_tid))
+	if (jbd2_log_start_commit(journal, commit_tid)) {
+		/*
+		 * When the journal is on a different device than the
+		 * fs data disk, we need to issue the barrier in
+		 * writeback mode.  (In ordered mode, the jbd2 layer
+		 * will take care of issuing the barrier.  In
+		 * data=journal, all of the data blocks are written to
+		 * the journal device.)
+		 */
+		if (ext4_should_writeback_data(inode) &&
+		    (journal->j_fs_dev != journal->j_dev) &&
+		    (journal->j_flags & JBD2_BARRIER))
+			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 		jbd2_log_wait_commit(journal, commit_tid);
-	else if (journal->j_flags & JBD2_BARRIER)
+	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 
 /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * If there is an external journal, we need to make sure that
+	 * any data blocks that were recently written out --- perhaps
+	 * by jbd2_log_do_checkpoint() --- are flushed out before we
+	 * drop the transactions from the external journal.  It's
+	 * unlikely this will be necessary, especially with a
+	 * appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
+	 * doesn't get called all that often.
+	 */
+	if ((journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
+		commit_transaction->t_flushed_data_blocks = 1;
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -708,8 +709,17 @@ start_journal_io:
 		}
 	}
 
-	/* Done it all: now write the commit record asynchronously. */
+	/* 
+	 * If the journal is not located on the file system device,
+	 * then we must flush the file system device before we issue
+	 * the commit record
+	 */
+	if (commit_transaction->t_flushed_data_blocks &&
+	    (journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 
+	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
 			blkdev_issue_flush(journal->j_dev, NULL);
 	}
 
-	/*
-	 * This is the right place to wait for data buffers both for ASYNC
-	 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-	 * the commit block went to disk (which happens above). If commit is
-	 * SYNC, we need to wait for data buffers before we start writing
-	 * commit block, which happens below in such setting.
-	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
 	if (err) {
 		printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
+	unsigned int t_flushed_data_blocks:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From c459001fa4f71deafb62e00fa70d35f695498965 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Wed, 9 Dec 2009 03:05:30 +0300
Subject: ext3: quota macros cleanup [V2]

Currently all quota block reservation macros contains hardcoded "2"
aka MAXQUOTAS value. This is no good because in some places it is not
obvious to understand what does this digit represent. Let's introduce
new macro with self descriptive name.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c          | 8 ++++----
 fs/ext3/namei.c          | 8 ++++----
 include/linux/ext3_jbd.h | 7 +++++--
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509e..455e6e6e5cb9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 		if (max_blocks > DIO_MAX_BLOCKS)
 			max_blocks = DIO_MAX_BLOCKS;
 		handle = ext3_journal_start(inode, DIO_CREDITS +
-				2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+				EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
-					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+					EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during vfs_dq_init so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b77..81f7348b2de3 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2175,7 +2175,7 @@ static int ext3_symlink (struct inode * dir,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index cf82d519be40..d7b5ddca99c2 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -44,13 +44,13 @@
 
 #define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
 					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+					 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
 
-#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+#define EXT3_DELETE_TRANS_BLOCKS(sb)   (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
 
 /* Define an arbitrary limit for the amount of data we will anticipate
  * writing to any given transaction.  For unbounded transactions such as
@@ -86,6 +86,9 @@
 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0
 #endif
+#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
 
 int
 ext3_mark_iloc_dirty(handle_t *handle,
-- 
cgit v1.2.3


From b462707e7ccad058ae151e5c5b06eb5cadcb737f Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:12 +0300
Subject: Add unlocked version of inode_add_bytes() function

Quota code requires unlocked version of this function. Off course
we can just copy-paste the code, but copy-pasting is always an evil.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/stat.c          | 10 ++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8b..c4ecd52c5737 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 */
 
-void inode_add_bytes(struct inode *inode, loff_t bytes)
+/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
+void __inode_add_bytes(struct inode *inode, loff_t bytes)
 {
-	spin_lock(&inode->i_lock);
 	inode->i_blocks += bytes >> 9;
 	bytes &= 511;
 	inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
 		inode->i_blocks++;
 		inode->i_bytes -= 512;
 	}
+}
+
+void inode_add_bytes(struct inode *inode, loff_t bytes)
+{
+	spin_lock(&inode->i_lock);
+	__inode_add_bytes(inode, bytes);
 	spin_unlock(&inode->i_lock);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e3012e0ac06..9147ca88f253 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2297,6 +2297,7 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
-- 
cgit v1.2.3


From fd8fbfc1709822bd94247c5b2ab15a5f5041e103 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:13 +0300
Subject: quota: decouple fs reserved space from quota reservation

Currently inode_reservation is managed by fs itself and this
reservation is transfered on dquot_transfer(). This means what
inode_reservation must always be in sync with
dquot->dq_dqb.dqb_rsvspace. Otherwise dquot_transfer() will result
in incorrect quota(WARN_ON in dquot_claim_reserved_space() will be
triggered)
This is not easy because of complex locking order issues
for example http://bugzilla.kernel.org/show_bug.cgi?id=14739

The patch introduce quota reservation field for each fs-inode
(fs specific inode is used in order to prevent bloating generic
vfs inode). This reservation is managed by quota code internally
similar to i_blocks/i_bytes and may not be always in sync with
internal fs reservation.

Also perform some code rearrangement:
- Unify dquot_reserve_space() and dquot_reserve_space()
- Unify dquot_release_reserved_space() and dquot_free_space()
- Also this patch add missing warning update to release_rsv()
  dquot_release_reserved_space() must call flush_warnings() as
  dquot_free_space() does.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 213 ++++++++++++++++++++++++++++----------------------
 include/linux/quota.h |   5 +-
 2 files changed, 122 insertions(+), 96 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c13..1cb8fa84300f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1318,6 +1318,67 @@ void vfs_dq_drop(struct inode *inode)
 }
 EXPORT_SYMBOL(vfs_dq_drop);
 
+/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+	/* Filesystem must explicitly define it's own method in order to use
+	 * quota reservation interface */
+	BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+	return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) += number;
+	spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	__inode_add_bytes(inode, number);
+	spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+	qsize_t ret;
+	spin_lock(&inode->i_lock);
+	ret = *inode_reserved_space(inode);
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+				int reserve)
+{
+	if (reserve)
+		inode_add_rsv_space(inode, number);
+	else
+		inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+	if (reserve)
+		inode_sub_rsv_space(inode, number);
+	else
+		inode_sub_bytes(inode, number);
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1336,6 +1397,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
 
+	/*
+	 * First test before acquiring mutex - solves deadlocks when we
+	 * re-enter the quota code and are already holding the mutex
+	 */
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out;
+	}
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out_unlock;
+	}
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
@@ -1346,7 +1422,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
 			ret = NO_QUOTA;
-			goto out_unlock;
+			spin_unlock(&dq_data_lock);
+			goto out_flush_warn;
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1434,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		else
 			dquot_incr_space(inode->i_dquot[cnt], number);
 	}
-	if (!reserve)
-		inode_add_bytes(inode, number);
-out_unlock:
+	inode_incr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
-	flush_warnings(inode->i_dquot, warntype);
-	return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	int cnt, ret = QUOTA_OK;
-
-	/*
-	 * First test before acquiring mutex - solves deadlocks when we
-	 * re-enter the quota code and are already holding the mutex
-	 */
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out;
-	}
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out_unlock;
-	}
-
-	ret = __dquot_alloc_space(inode, number, warn, 0);
-	if (ret == NO_QUOTA)
-		goto out_unlock;
 
+	if (reserve)
+		goto out_flush_warn;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_flush_warn:
+	flush_warnings(inode->i_dquot, warntype);
 out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
 }
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+	return __dquot_alloc_space(inode, number, warn, 0);
+}
 EXPORT_SYMBOL(dquot_alloc_space);
 
 int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
 {
-	int ret = QUOTA_OK;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return __dquot_alloc_space(inode, number, warn, 1);
 }
 EXPORT_SYMBOL(dquot_reserve_space);
 
@@ -1471,14 +1516,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	int ret = QUOTA_OK;
 
 	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode))	{
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
@@ -1490,7 +1535,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 							number);
 	}
 	/* Update inode bytes */
-	inode_add_bytes(inode, number);
+	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1502,39 +1547,10 @@ out:
 }
 EXPORT_SYMBOL(dquot_claim_space);
 
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	int cnt;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	spin_lock(&dq_data_lock);
-	/* Release reserved dquots */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_free_reserved_space(inode->i_dquot[cnt], number);
-	}
-	spin_unlock(&dq_data_lock);
-
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
@@ -1543,7 +1559,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_sub:
-		inode_sub_bytes(inode, number);
+		inode_decr_space(inode, number, reserve);
 		return QUOTA_OK;
 	}
 
@@ -1558,20 +1574,42 @@ out_sub:
 		if (!inode->i_dquot[cnt])
 			continue;
 		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
-		dquot_decr_space(inode->i_dquot[cnt], number);
+		if (reserve)
+			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+		else
+			dquot_decr_space(inode->i_dquot[cnt], number);
 	}
-	inode_sub_bytes(inode, number);
+	inode_decr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
+
+	if (reserve)
+		goto out_unlock;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+	return  __dquot_free_space(inode, number, 0);
+}
 EXPORT_SYMBOL(dquot_free_space);
 
+/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+	__dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
 /*
  * This operation can block, but only after everything is updated
  */
@@ -1609,19 +1647,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
-/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
-	qsize_t reserved_space = 0;
-
-	if (sb_any_quota_active(inode->i_sb) &&
-	    inode->i_sb->dq_op->get_reserved_space)
-		reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
-	return reserved_space;
-}
-
 /*
  * Transfer the number of inode and blocks from one diskquota to an other.
  *
@@ -1665,7 +1690,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
-	rsv_space = dquot_get_reserved_space(inode);
+	rsv_space = inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e70e62194243..a6861f117480 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -315,8 +315,9 @@ struct dquot_operations {
 	int (*claim_space) (struct inode *, qsize_t);
 	/* release rsved quota for delayed alloc */
 	void (*release_rsv) (struct inode *, qsize_t);
-	/* get reserved quota for delayed alloc */
-	qsize_t (*get_reserved_space) (struct inode *);
+	/* get reserved quota for delayed alloc, value returned is managed by
+	 * quota code only */
+	qsize_t *(*get_reserved_space) (struct inode *);
 };
 
 /* Operations handling requests from userspace */
-- 
cgit v1.2.3


From b8a052d01669977f224255b0f9f2737018171ddb Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:00:30 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for the
 orphan list

Use a separate lock to protect the orphan list, so we can stop
overloading the use of lock_super().

Port of ext4 commit 3b9d4ed26680771295d904a6b83e88e620780893
by Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/namei.c            | 20 +++++++++++---------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 81f7348b2de3..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0, rc;
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT3_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
-	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * here (on s_orphan_lock), so race with ext3_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
 	ext3_std_error(inode->i_sb, err);
 	return err;
 }
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
 	ext3_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 806b8b780add..97dd3828384c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sb->dq_op = &ext3_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index f07f34de2f0e..dd61d83026a0 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -72,6 +72,7 @@ struct ext3_sb_info {
 	struct inode * s_journal_inode;
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 96d2a495c25d525873529b736cdb63ad502b101c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:01:05 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for resizing

Use a separate lock to protect s_groups_count and the other block
group descriptors which get changed via an on-line resize operation,
so we can stop overloading the use of lock_super().

Port of ext4 commit 32ed5058ce90024efcd811254b4b1de0468099df by
Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/resize.c           | 35 ++++++++++++++++++-----------------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b6179178..54351ac7cef9 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -662,11 +662,12 @@ exit_free:
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	/*
 	 * OK, now we've set up the new group.  Time to make it active.
 	 *
-	 * Current kernels don't lock all allocations via lock_super(),
+	 * We do not lock all allocations via s_resize_lock
 	 * so we have to be safe wrt. concurrent accesses the group
 	 * data.  So we need to be careful to set all of the relevant
 	 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
 	o_groups_count = EXT3_SB(sb)->s_groups_count;
 
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_resize_lock);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 						 EXT3_SB(sb)->s_sbh))) {
 		ext3_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 97dd3828384c..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1929,6 +1929,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index dd61d83026a0..258088ab3c6b 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -73,6 +73,7 @@ struct ext3_sb_info {
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Dec 2009 07:57:07 -0500
Subject: fs-writeback: Add helper function to start writeback if idle

ext4, at least, would like to start pushing on writeback if it starts
to get close to ENOSPC when reserving worst-case blocks for delalloc
writes.  Writing out delalloc data will convert those worst-case
predictions into usually smaller actual usage, freeing up space
before we hit ENOSPC based on this speculation.

Thanks to Jens for the suggestion for the helper function,
& the naming help.

I've made the helper return status on whether writeback was
started even though I don't plan to use it in the ext4 patch;
it seems like it would be potentially useful to test this
in some cases.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 17 +++++++++++++++++
 include/linux/writeback.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		writeback_inodes_sb(sb);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-- 
cgit v1.2.3


From 119eecc831a42bd090543568932e440c6831f1bb Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Wed, 23 Dec 2009 09:10:48 +0100
Subject: Fix usb_serial_probe() problem introduced by the recent kfifo changes

The USB serial code was a new user of the kfifo API, and it was missed
when porting things to the new kfifo API.

Please make the write_fifo in place.  Here is my patch to fix the
regression and full ported version.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Greg KH <greg@kroah.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/generic.c    | 12 ++++++------
 drivers/usb/serial/usb-serial.c |  5 ++---
 include/linux/usb/serial.h      |  3 ++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index b0f1183755c9..f1ea3a33b6e6 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(&port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(&port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - kfifo_len(port->write_fifo);
+		room = kfifo_avail(&port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -391,7 +391,7 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
 	} else if (serial->num_bulk_out)
-		chars = kfifo_len(port->write_fifo);
+		chars = kfifo_len(&port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -507,7 +507,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (status) {
 			dbg("%s - nonzero multi-urb write bulk status "
 				"received: %d", __func__, status);
-			kfifo_reset(port->write_fifo);
+			kfifo_reset_out(&port->write_fifo);
 		} else
 			usb_serial_generic_write_start(port);
 	}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 636a4f23445e..33c85f7084f8 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -595,8 +595,7 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
-	if (!IS_ERR(port->write_fifo) && port->write_fifo)
-		kfifo_free(port->write_fifo);
+	kfifo_free(&port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -939,7 +938,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index acf6e457c04b..1819396ed501 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -16,6 +16,7 @@
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/sysrq.h>
+#include <linux/kfifo.h>
 
 #define SERIAL_TTY_MAJOR	188	/* Nice legal number now */
 #define SERIAL_TTY_MINORS	254	/* loads of devices :) */
@@ -94,7 +95,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
-	struct kfifo		*write_fifo;
+	struct kfifo		write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v1.2.3


From 9c717de946ed7f5782e6dffacf2d05859073058c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 23 Dec 2009 09:23:33 -0800
Subject: kfifo: fix Error/broken kernel-doc notation

Fix kernel-doc errors and warnings in new header file kfifo.h.
Don't use kernel-doc "/**" for internal functions whose comments
are not in kernel-doc format.

kernel-doc section header names (like "Note:") must be unique
per function.  Looks like I need to document that.

  Error(include/linux/kfifo.h:76): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:88): Excess function parameter 'size' description in 'INIT_KFIFO'
  Error(include/linux/kfifo.h:101): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:257): No description found for parameter 'fifo'
    (many of this last type, from internal functions)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 486e8ad3bb50..3d44e9c65a8e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -69,8 +69,8 @@ struct kfifo {
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used inside struct or union declaration
- * Note: the macro creates two objects:
+ * Note1: the macro can be used inside struct or union declaration
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -83,7 +83,6 @@ union { \
 /**
  * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
  */
 #define INIT_KFIFO(name) \
 	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
@@ -94,8 +93,8 @@ union { \
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used for global and local kfifo data type variables
- * Note: the macro creates two objects:
+ * Note1: the macro can be used for global and local kfifo data type variables
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -249,7 +248,7 @@ extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
 	void __user *to, unsigned int n);
 
-/**
+/*
  * __kfifo_add_out internal helper function for updating the out offset
  */
 static inline void __kfifo_add_out(struct kfifo *fifo,
@@ -259,7 +258,7 @@ static inline void __kfifo_add_out(struct kfifo *fifo,
 	fifo->out += off;
 }
 
-/**
+/*
  * __kfifo_add_in internal helper function for updating the in offset
  */
 static inline void __kfifo_add_in(struct kfifo *fifo,
@@ -269,7 +268,7 @@ static inline void __kfifo_add_in(struct kfifo *fifo,
 	fifo->in += off;
 }
 
-/**
+/*
  * __kfifo_off internal helper function for calculating the index of a
  * given offeset
  */
@@ -278,7 +277,7 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
-/**
+/*
  * __kfifo_peek_n internal helper function for determinate the length of
  * the next record in the fifo
  */
@@ -299,7 +298,7 @@ static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
 #undef	__KFIFO_GET
 }
 
-/**
+/*
  * __kfifo_poke_n internal helper function for storing the length of
  * the next record into the fifo
  */
@@ -319,7 +318,7 @@ static inline void __kfifo_poke_n(struct kfifo *fifo,
 #undef	__KFIFO_PUT
 }
 
-/**
+/*
  * __kfifo_in_... internal functions for put date into the fifo
  * do not call it directly, use kfifo_in_rec() instead
  */
@@ -367,7 +366,7 @@ static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
 	return __kfifo_in_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_out_... internal functions for get date from the fifo
  * do not call it directly, use kfifo_out_rec() instead
  */
@@ -425,7 +424,7 @@ static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
 	return __kfifo_out_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_from_user_... internal functions for transfer from user space into
  * the fifo. do not call it directly, use kfifo_from_user_rec() instead
  */
@@ -474,7 +473,7 @@ static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
 	return __kfifo_from_user_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_to_user_... internal functions for transfer fifo data into user space
  * do not call it directly, use kfifo_to_user_rec() instead
  */
@@ -533,7 +532,7 @@ static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
 	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_peek_... internal functions for peek into the next fifo record
  * do not call it directly, use kfifo_peek_rec() instead
  */
@@ -557,7 +556,7 @@ static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
 	return __kfifo_peek_n(fifo, recsize);
 }
 
-/**
+/*
  * __kfifo_skip_... internal functions for skip the next fifo record
  * do not call it directly, use kfifo_skip_rec() instead
  */
-- 
cgit v1.2.3


From 26579ab70aa0e0ea434e6e100279d2f67c094431 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:19 +0200
Subject: Driver core: device_attribute parameters can often be const*

Most device_attributes are const, and are begging to be
put in a ro section. However, the create and remove
file interfaces were failing to propagate the const promise
which the only functions they call offer.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/sysfs.txt | 8 ++++----
 drivers/base/core.c                 | 6 ++++--
 include/linux/device.h              | 4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index b245d524d568..a4ac2b98c613 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -91,8 +91,8 @@ struct device_attribute {
 			 const char *buf, size_t count);
 };
 
-int device_create_file(struct device *, struct device_attribute *);
-void device_remove_file(struct device *, struct device_attribute *);
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
 
 It also defines this helper for defining device attributes: 
 
@@ -316,8 +316,8 @@ DEVICE_ATTR(_name, _mode, _show, _store);
 
 Creation/Removal:
 
-int device_create_file(struct device *device, struct device_attribute * attr);
-void device_remove_file(struct device * dev, struct device_attribute * attr);
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
 
 
 - bus drivers (include/linux/device.h)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f1290cbd1350..2fd9e611f8a6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -446,7 +446,8 @@ struct kset *devices_kset;
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-int device_create_file(struct device *dev, struct device_attribute *attr)
+int device_create_file(struct device *dev,
+		       const struct device_attribute *attr)
 {
 	int error = 0;
 	if (dev)
@@ -459,7 +460,8 @@ int device_create_file(struct device *dev, struct device_attribute *attr)
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-void device_remove_file(struct device *dev, struct device_attribute *attr)
+void device_remove_file(struct device *dev,
+			const struct device_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_file(&dev->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 2a73d9bcbc9c..aa5b3e66a147 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,9 +319,9 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   struct device_attribute *entry);
+					   const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
-			       struct device_attribute *attr);
+			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-- 
cgit v1.2.3


From 66ecb92be9eb579df93add22d19843e7869f168e Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:20 +0200
Subject: Driver core: bin_attribute parameters can often be const*

Many struct bin_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore
make the promise not to, which will let those descriptors
be put in a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 6 ++++--
 fs/sysfs/bin.c         | 6 ++++--
 include/linux/device.h | 6 +++---
 include/linux/sysfs.h  | 9 +++++----
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2fd9e611f8a6..83afc8b8f27b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -472,7 +472,8 @@ void device_remove_file(struct device *dev,
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-int device_create_bin_file(struct device *dev, struct bin_attribute *attr)
+int device_create_bin_file(struct device *dev,
+			   const struct bin_attribute *attr)
 {
 	int error = -EINVAL;
 	if (dev)
@@ -486,7 +487,8 @@ EXPORT_SYMBOL_GPL(device_create_bin_file);
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
+void device_remove_bin_file(struct device *dev,
+			    const struct bin_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_bin_file(&dev->kobj, attr);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10ae..a0a500af24a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
  *	@attr:	attribute descriptor.
  */
 
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+int sysfs_create_bin_file(struct kobject *kobj,
+			  const struct bin_attribute *attr)
 {
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
  *	@attr:	attribute descriptor.
  */
 
-void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr)
 {
 	sysfs_hash_and_remove(kobj->sd, attr->attr.name);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index aa5b3e66a147..10d74ce93a46 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,13 +319,13 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   const struct device_attribute *entry);
+					const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
 			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
-					       struct bin_attribute *attr);
+					const struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-				   struct bin_attribute *attr);
+				   const struct bin_attribute *attr);
 extern int device_schedule_callback_owner(struct device *dev,
 		void (*func)(struct device *dev), struct module *owner);
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9d68fed50f11..cfa83083a2d4 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -99,8 +99,9 @@ int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
-				       struct bin_attribute *attr);
-void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
+				       const struct bin_attribute *attr);
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr);
 
 int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target,
 				   const char *name);
@@ -175,13 +176,13 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 }
 
 static inline int sysfs_create_bin_file(struct kobject *kobj,
-					struct bin_attribute *attr)
+					const struct bin_attribute *attr)
 {
 	return 0;
 }
 
 static inline void sysfs_remove_bin_file(struct kobject *kobj,
-					 struct bin_attribute *attr)
+					 const struct bin_attribute *attr)
 {
 }
 
-- 
cgit v1.2.3


From 099c2f21d8cf0724b85abb2c589d6276953781b7 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:21 +0200
Subject: Driver core: driver_attribute parameters can often be const*

Many struct driver_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore make
the promise not to, which will let those descriptors be put in
a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/driver-model/driver.txt | 4 ++--
 Documentation/filesystems/sysfs.txt   | 4 ++--
 drivers/base/driver.c                 | 4 ++--
 include/linux/device.h                | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
index 60120fb3b961..d2cd6fb8ba9e 100644
--- a/Documentation/driver-model/driver.txt
+++ b/Documentation/driver-model/driver.txt
@@ -226,5 +226,5 @@ struct driver_attribute driver_attr_debug;
 This can then be used to add and remove the attribute from the
 driver's directory using:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index a4ac2b98c613..931c806642c5 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -358,7 +358,7 @@ DRIVER_ATTR(_name, _mode, _show, _store)
 
 Creation/Removal:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
 
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index f367885a7646..90c9fff09ead 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(driver_find_device);
  * @attr: driver attribute descriptor.
  */
 int driver_create_file(struct device_driver *drv,
-		       struct driver_attribute *attr)
+		       const struct driver_attribute *attr)
 {
 	int error;
 	if (drv)
@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_create_file);
  * @attr: driver attribute descriptor.
  */
 void driver_remove_file(struct device_driver *drv,
-			struct driver_attribute *attr)
+			const struct driver_attribute *attr)
 {
 	if (drv)
 		sysfs_remove_file(&drv->p->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 10d74ce93a46..a62799f2ab00 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -166,9 +166,9 @@ struct driver_attribute driver_attr_##_name =		\
 	__ATTR(_name, _mode, _show, _store)
 
 extern int __must_check driver_create_file(struct device_driver *driver,
-					   struct driver_attribute *attr);
+					const struct driver_attribute *attr);
 extern void driver_remove_file(struct device_driver *driver,
-			       struct driver_attribute *attr);
+			       const struct driver_attribute *attr);
 
 extern int __must_check driver_add_kobj(struct device_driver *drv,
 					struct kobject *kobj,
-- 
cgit v1.2.3


From 29d249ed80c80b479df78e456e6809223c648505 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 18 Dec 2009 09:59:48 -0800
Subject: Staging: dst: remove from the tree

DST is dead, no one is using it and upstream
has abandoned it, so remove it from the tree because
it is not going anywhere.

Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/Kconfig           |   2 -
 drivers/staging/Makefile          |   1 -
 drivers/staging/dst/Kconfig       |  67 ---
 drivers/staging/dst/Makefile      |   3 -
 drivers/staging/dst/crypto.c      | 733 -----------------------------
 drivers/staging/dst/dcore.c       | 968 --------------------------------------
 drivers/staging/dst/export.c      | 660 --------------------------
 drivers/staging/dst/state.c       | 844 ---------------------------------
 drivers/staging/dst/thread_pool.c | 348 --------------
 drivers/staging/dst/trans.c       | 337 -------------
 include/linux/dst.h               | 587 -----------------------
 11 files changed, 4550 deletions(-)
 delete mode 100644 drivers/staging/dst/Kconfig
 delete mode 100644 drivers/staging/dst/Makefile
 delete mode 100644 drivers/staging/dst/crypto.c
 delete mode 100644 drivers/staging/dst/dcore.c
 delete mode 100644 drivers/staging/dst/export.c
 delete mode 100644 drivers/staging/dst/state.c
 delete mode 100644 drivers/staging/dst/thread_pool.c
 delete mode 100644 drivers/staging/dst/trans.c
 delete mode 100644 include/linux/dst.h

(limited to 'include')

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index db0de940949e..94eb86319ff3 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -87,8 +87,6 @@ source "drivers/staging/frontier/Kconfig"
 
 source "drivers/staging/dream/Kconfig"
 
-source "drivers/staging/dst/Kconfig"
-
 source "drivers/staging/pohmelfs/Kconfig"
 
 source "drivers/staging/b3dfg/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 73c6a71155e0..b5e67b889f60 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_RTL8192E)		+= rtl8192e/
 obj-$(CONFIG_INPUT_MIMIO)	+= mimio/
 obj-$(CONFIG_TRANZPORT)		+= frontier/
 obj-$(CONFIG_DREAM)		+= dream/
-obj-$(CONFIG_DST)		+= dst/
 obj-$(CONFIG_POHMELFS)		+= pohmelfs/
 obj-$(CONFIG_B3DFG)		+= b3dfg/
 obj-$(CONFIG_IDE_PHISON)	+= phison/
diff --git a/drivers/staging/dst/Kconfig b/drivers/staging/dst/Kconfig
deleted file mode 100644
index 448d342ac2a2..000000000000
--- a/drivers/staging/dst/Kconfig
+++ /dev/null
@@ -1,67 +0,0 @@
-config DST
-	tristate "Distributed storage"
-	depends on NET && CRYPTO && SYSFS && BLK_DEV
-	select CONNECTOR
-	---help---
-	DST is a network block device storage, which can be used to organize
-	exported storage on the remote nodes into the local block device.
-
-	DST works on top of any network media and protocol; it is just a matter
-	of configuration utility to understand the correct addresses. The most
-	common example is TCP over IP, which allows to pass through firewalls and
-	create remote backup storage in a different datacenter. DST requires
-	single port to be enabled on the exporting node and outgoing connections
-	on the local node.
-
-	DST works with in-kernel client and server, which improves performance by
-	eliminating unneded data copies and by not depending on the version
-	of the external IO components. It requires userspace configuration utility
-	though.
-
-	DST uses transaction model, when each store has to be explicitly acked
-	from the remote node to be considered as successfully written. There
-	may be lots of in-flight transactions. When remote host does not ack
-	the transaction it will be resent predefined number of times with specified
-	timeouts between them. All those parameters are configurable. Transactions
-	are marked as failed after all resends complete unsuccessfully; having
-	long enough resend timeout and/or large number of resends allows not to
-	return error to the higher (FS usually) layer in case of short network
-	problems or remote node outages. In case of network RAID setup this means
-	that storage will not degrade until transactions are marked as failed, and
-	thus will not force checksum recalculation and data rebuild. In case of
-	connection failure DST will try to reconnect to the remote node automatically.
-	DST sends ping commands at idle time to detect if remote node is alive.
-
-	Because of transactional model it is possible to use zero-copy sending
-	without worry of data corruption (which in turn could be detected by the
-	strong checksums though).
-
-	DST may fully encrypt the data channel in case of untrusted channel and implement
-	strong checksum of the transferred data. It is possible to configure algorithms
-	and crypto keys; they should match on both sides of the network channel.
-	Crypto processing does not introduce noticeble performance overhead, since DST
-	uses configurable pool of threads to perform crypto processing.
-
-	DST utilizes memory pool model of all its transaction allocations (it is the
-	only additional allocation on the client) and server allocations (bio pools,
-	while pages are allocated from the slab).
-
-	At startup DST performs a simple negotiation with the export node to determine
-	access permissions and size of the exported storage. It can be extended if
-	new parameters should be autonegotiated.
-
-	DST carries block IO flags in the protocol, which allows to transparently implement
-	barriers and sync/flush operations. Those flags are used in the export node where
-	IO against the local storage is performed, which means that sync write will be sync
-	on the remote node too, which in turn improves data integrity and improved resistance
-	to errors and data corruption during power outages or storage damages.
-
-	Homepage: http://www.ioremap.net/projects/dst
-	Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/
-
-config DST_DEBUG
-	bool "DST debug"
-	depends on DST
-	---help---
-	This option will enable HEAVY debugging of the DST.
-	Turn it on ONLY if you have to debug some really obscure problem.
diff --git a/drivers/staging/dst/Makefile b/drivers/staging/dst/Makefile
deleted file mode 100644
index 3a8b0cf9643e..000000000000
--- a/drivers/staging/dst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_DST) += nst.o
-
-nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o
diff --git a/drivers/staging/dst/crypto.c b/drivers/staging/dst/crypto.c
deleted file mode 100644
index 351295c97a4b..000000000000
--- a/drivers/staging/dst/crypto.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/crypto.h>
-#include <linux/dst.h>
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-/*
- * Tricky bastard, but IV can be more complex with time...
- */
-static inline u64 dst_gen_iv(struct dst_trans *t)
-{
-	return t->gen;
-}
-
-/*
- * Crypto machinery: hash/cipher support for the given crypto controls.
- */
-static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key)
-{
-	int err;
-	struct crypto_hash *hash;
-
-	hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hash)) {
-		err = PTR_ERR(hash);
-		dprintk("%s: failed to allocate hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_exit;
-	}
-
-	ctl->crypto_attached_size = crypto_hash_digestsize(hash);
-
-	if (!ctl->hash_keysize)
-		return hash;
-
-	err = crypto_hash_setkey(hash, key, ctl->hash_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_free;
-	}
-
-	return hash;
-
-err_out_free:
-	crypto_free_hash(hash);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl,
-		u8 *key)
-{
-	int err = -EINVAL;
-	struct crypto_ablkcipher *cipher;
-
-	if (!ctl->cipher_keysize)
-		goto err_out_exit;
-
-	cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0);
-	if (IS_ERR(cipher)) {
-		err = PTR_ERR(cipher);
-		dprintk("%s: failed to allocate cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_exit;
-	}
-
-	crypto_ablkcipher_clear_flags(cipher, ~0);
-
-	err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_free;
-	}
-
-	return cipher;
-
-err_out_free:
-	crypto_free_ablkcipher(cipher);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Crypto engine has a pool of pages to encrypt data into before sending
- * it over the network. This pool is freed/allocated here.
- */
-static void dst_crypto_pages_free(struct dst_crypto_engine *e)
-{
-	unsigned int i;
-
-	for (i = 0; i < e->page_num; ++i)
-		__free_page(e->pages[i]);
-	kfree(e->pages);
-}
-
-static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num)
-{
-	int i;
-
-	e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL);
-	if (!e->pages)
-		return -ENOMEM;
-
-	for (i = 0; i < num; ++i) {
-		e->pages[i] = alloc_page(GFP_KERNEL);
-		if (!e->pages[i])
-			goto err_out_free_pages;
-	}
-
-	e->page_num = num;
-	return 0;
-
-err_out_free_pages:
-	while (--i >= 0)
-		__free_page(e->pages[i]);
-
-	kfree(e->pages);
-	return -ENOMEM;
-}
-
-/*
- * Initialize crypto engine for given node.
- * Setup cipher/hash, keys, pool of threads and private data.
- */
-static int dst_crypto_engine_init(struct dst_crypto_engine *e,
-		struct dst_node *n)
-{
-	int err;
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	err = dst_crypto_pages_alloc(e, n->max_pages);
-	if (err)
-		goto err_out_exit;
-
-	e->size = PAGE_SIZE;
-	e->data = kmalloc(e->size, GFP_KERNEL);
-	if (!e->data) {
-		err = -ENOMEM;
-		goto err_out_free_pages;
-	}
-
-	if (ctl->hash_algo[0]) {
-		e->hash = dst_init_hash(ctl, n->hash_key);
-		if (IS_ERR(e->hash)) {
-			err = PTR_ERR(e->hash);
-			e->hash = NULL;
-			goto err_out_free;
-		}
-	}
-
-	if (ctl->cipher_algo[0]) {
-		e->cipher = dst_init_cipher(ctl, n->cipher_key);
-		if (IS_ERR(e->cipher)) {
-			err = PTR_ERR(e->cipher);
-			e->cipher = NULL;
-			goto err_out_free_hash;
-		}
-	}
-
-	return 0;
-
-err_out_free_hash:
-	crypto_free_hash(e->hash);
-err_out_free:
-	kfree(e->data);
-err_out_free_pages:
-	dst_crypto_pages_free(e);
-err_out_exit:
-	return err;
-}
-
-static void dst_crypto_engine_exit(struct dst_crypto_engine *e)
-{
-	if (e->hash)
-		crypto_free_hash(e->hash);
-	if (e->cipher)
-		crypto_free_ablkcipher(e->cipher);
-	dst_crypto_pages_free(e);
-	kfree(e->data);
-}
-
-/*
- * Waiting for cipher processing to be completed.
- */
-struct dst_crypto_completion {
-	struct completion		complete;
-	int				error;
-};
-
-static void dst_crypto_complete(struct crypto_async_request *req, int err)
-{
-	struct dst_crypto_completion *c = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	dprintk("%s: req: %p, err: %d.\n", __func__, req, err);
-	c->error = err;
-	complete(&c->complete);
-}
-
-static int dst_crypto_process(struct ablkcipher_request *req,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src,
-		void *iv, int enc, unsigned long timeout)
-{
-	struct dst_crypto_completion c;
-	int err;
-
-	init_completion(&c.complete);
-	c.error = -EINPROGRESS;
-
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					dst_crypto_complete, &c);
-
-	ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv);
-
-	if (enc)
-		err = crypto_ablkcipher_encrypt(req);
-	else
-		err = crypto_ablkcipher_decrypt(req);
-
-	switch (err) {
-	case -EINPROGRESS:
-	case -EBUSY:
-		err = wait_for_completion_interruptible_timeout(&c.complete,
-				timeout);
-		if (!err)
-			err = -ETIMEDOUT;
-		else
-			err = c.error;
-		break;
-	default:
-		break;
-	}
-
-	return err;
-}
-
-/*
- * DST uses generic iteration approach for data crypto processing.
- * Single block IO request is switched into array of scatterlists,
- * which are submitted to the crypto processing iterator.
- *
- * Input and output iterator initialization are different, since
- * in output case we can not encrypt data in-place and need a
- * temporary storage, which is then being sent to the remote peer.
- */
-static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_crypt_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	struct ablkcipher_request *req = e->data;
-	u8 iv[32];
-
-	memset(iv, 0, sizeof(iv));
-
-	memcpy(iv, &e->iv, sizeof(e->iv));
-
-	return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout);
-}
-
-static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio)
-{
-	struct ablkcipher_request *req = e->data;
-
-	memset(req, 0, sizeof(struct ablkcipher_request));
-	ablkcipher_request_set_tfm(req, e->cipher);
-
-	if (e->enc)
-		return dst_trans_iter_out(bio, e, dst_crypt_iterator);
-	else
-		return dst_trans_iter_in(bio, e, dst_crypt_iterator);
-}
-
-static int dst_hash_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	return crypto_hash_update(e->data, sg_src, sg_src->length);
-}
-
-static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst)
-{
-	struct hash_desc *desc = e->data;
-	int err;
-
-	desc->tfm = e->hash;
-	desc->flags = 0;
-
-	err = crypto_hash_init(desc);
-	if (err)
-		return err;
-
-	err = dst_trans_iter_in(bio, e, dst_hash_iterator);
-	if (err)
-		return err;
-
-	err = crypto_hash_final(desc, dst);
-	if (err)
-		return err;
-
-	return 0;
-}
-
-/*
- * Initialize/cleanup a crypto thread. The only thing it should
- * do is to allocate a pool of pages as temporary storage.
- * And to setup cipher and/or hash.
- */
-static void *dst_crypto_thread_init(void *data)
-{
-	struct dst_node *n = data;
-	struct dst_crypto_engine *e;
-	int err = -ENOMEM;
-
-	e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL);
-	if (!e)
-		goto err_out_exit;
-	e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist),
-			GFP_KERNEL);
-	if (!e->src)
-		goto err_out_free;
-
-	e->dst = e->src + n->max_pages;
-
-	err = dst_crypto_engine_init(e, n);
-	if (err)
-		goto err_out_free_all;
-
-	return e;
-
-err_out_free_all:
-	kfree(e->src);
-err_out_free:
-	kfree(e);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static void dst_crypto_thread_cleanup(void *private)
-{
-	struct dst_crypto_engine *e = private;
-
-	dst_crypto_engine_exit(e);
-	kfree(e->src);
-	kfree(e);
-}
-
-/*
- * Initialize crypto engine for given node: store keys, create pool
- * of threads, initialize each one.
- *
- * Each thread has unique ID, but 0 and 1 are reserved for receiving and
- * accepting threads (if export node), so IDs could start from 2, but starting
- * them from 10 allows easily understand what this thread is for.
- */
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl)
-{
-	void *key = (ctl + 1);
-	int err = -ENOMEM, i;
-	char name[32];
-
-	if (ctl->hash_keysize) {
-		n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL);
-		if (!n->hash_key)
-			goto err_out_exit;
-		memcpy(n->hash_key, key, ctl->hash_keysize);
-	}
-
-	if (ctl->cipher_keysize) {
-		n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL);
-		if (!n->cipher_key)
-			goto err_out_free_hash;
-		memcpy(n->cipher_key, key, ctl->cipher_keysize);
-	}
-	memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl));
-
-	for (i = 0; i < ctl->thread_num; ++i) {
-		snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i);
-		/* Unique ids... */
-		err = thread_pool_add_worker(n->pool, name, i + 10,
-			dst_crypto_thread_init, dst_crypto_thread_cleanup, n);
-		if (err)
-			goto err_out_free_threads;
-	}
-
-	return 0;
-
-err_out_free_threads:
-	while (--i >= 0)
-		thread_pool_del_worker_id(n->pool, i+10);
-
-	if (ctl->cipher_keysize)
-		kfree(n->cipher_key);
-	ctl->cipher_keysize = 0;
-err_out_free_hash:
-	if (ctl->hash_keysize)
-		kfree(n->hash_key);
-	ctl->hash_keysize = 0;
-err_out_exit:
-	return err;
-}
-
-void dst_node_crypto_exit(struct dst_node *n)
-{
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	if (ctl->cipher_algo[0] || ctl->hash_algo[0]) {
-		kfree(n->hash_key);
-		kfree(n->cipher_key);
-	}
-}
-
-/*
- * Thrad pool setup callback. Just stores a transaction in private data.
- */
-static int dst_trans_crypto_setup(void *crypto_engine, void *trans)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = trans;
-	return 0;
-}
-
-#if 0
-static void dst_dump_bio(struct bio *bio)
-{
-	u8 *p;
-	struct bio_vec *bv;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		dprintk("%s: %llu/%u: size: %u, offset: %u, data: ",
-				__func__, bio->bi_sector, bio->bi_size,
-				bv->bv_len, bv->bv_offset);
-
-		p = kmap(bv->bv_page) + bv->bv_offset;
-		for (i = 0; i < bv->bv_len; ++i)
-			printk(KERN_DEBUG "%02x ", p[i]);
-		kunmap(bv->bv_page);
-		printk("\n");
-	}
-}
-#endif
-
-/*
- * Encrypt/hash data and send it to the network.
- */
-static int dst_crypto_process_sending(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash)
-{
-	int err;
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	if (e->hash) {
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-#ifdef CONFIG_DST_DEBUG
-		{
-			unsigned int i;
-
-			/* dst_dump_bio(bio); */
-
-			printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ",
-				__func__, (u64)bio->bi_sector,
-				bio->bi_size, bio_data_dir(bio));
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-					printk("%02x ", hash[i]);
-			printk("\n");
-		}
-#endif
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Check if received data is valid. Decipher if it is.
- */
-static int dst_crypto_process_receiving(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash, u8 *recv_hash)
-{
-	int err;
-
-	if (e->hash) {
-		int mismatch;
-
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-		mismatch = !!memcmp(recv_hash, hash,
-				crypto_hash_digestsize(e->hash));
-#ifdef CONFIG_DST_DEBUG
-		/* dst_dump_bio(bio); */
-
-		printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio_data_dir(bio), mismatch);
-		if (mismatch) {
-			unsigned int i;
-
-			printk(", recv/calc: ");
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-				printk("%02x/%02x ", recv_hash[i], hash[i]);
-
-		}
-		printk("\n");
-#endif
-		err = -1;
-		if (mismatch)
-			goto err_out_exit;
-	}
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Thread pool callback to encrypt data and send it to the netowork.
- */
-static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct dst_trans *t = schedule_data;
-	struct bio *bio = t->bio;
-	int err;
-
-	dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n",
-			__func__, t, t->gen, e->cipher, e->hash);
-
-	e->enc = t->enc;
-	e->iv = dst_gen_iv(t);
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_crypto_process_sending(e, bio, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			t->cmd.csize = crypto_hash_digestsize(e->hash);
-			t->cmd.size += t->cmd.csize;
-		}
-
-		return dst_trans_send(t);
-	} else {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		dst_trans_remove(t);
-		dst_trans_put(t);
-	}
-
-	return 0;
-
-err_out_exit:
-	t->error = err;
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Schedule crypto processing for given transaction.
- */
-int dst_trans_crypto(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_trans_crypto_setup, dst_trans_crypto_action,
-		t, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Crypto machinery for the export node.
- */
-static int dst_export_crypto_setup(void *crypto_engine, void *bio)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = bio;
-	return 0;
-}
-
-static int dst_export_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct bio *bio = schedule_data;
-	struct dst_export_priv *p = bio->bi_private;
-	int err;
-
-	dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n",
-			__func__, e, e->data, (u64)bio->bi_sector,
-			bio->bi_size, bio_data_dir(bio));
-
-	e->enc = (bio_data_dir(bio) == READ);
-	e->iv = p->cmd.id;
-
-	if (bio_data_dir(bio) == WRITE) {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		generic_make_request(bio);
-	} else {
-		err = dst_crypto_process_sending(e, bio, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			p->cmd.csize = crypto_hash_digestsize(e->hash);
-			p->cmd.size += p->cmd.csize;
-		}
-
-		err = dst_export_send_bio(bio);
-	}
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
-
-int dst_export_crypto(struct dst_node *n, struct bio *bio)
-{
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_export_crypto_setup, dst_export_crypto_action,
-		bio, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
deleted file mode 100644
index c83ca7e3d048..000000000000
--- a/drivers/staging/dst/dcore.c
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/jhash.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <linux/in.h>
-#include <linux/in6.h>
-
-#include <net/sock.h>
-
-static int dst_major;
-
-static DEFINE_MUTEX(dst_hash_lock);
-static struct list_head *dst_hashtable;
-static unsigned int dst_hashtable_size = 128;
-module_param(dst_hashtable_size, uint, 0644);
-
-static char dst_name[] = "Dementianting goldfish";
-
-static DEFINE_IDR(dst_index_idr);
-static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL };
-
-/*
- * DST sysfs tree for device called 'storage':
- *
- * /sys/bus/dst/devices/storage/
- * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025
- * /sys/bus/dst/devices/storage/size : 800
- * /sys/bus/dst/devices/storage/name : storage
- */
-
-static int dst_dev_match(struct device *dev, struct device_driver *drv)
-{
-	return 1;
-}
-
-static struct bus_type dst_dev_bus_type = {
-	.name 		= "dst",
-	.match 		= &dst_dev_match,
-};
-
-static void dst_node_release(struct device *dev)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	kfree(info);
-}
-
-static struct device dst_node_dev = {
-	.bus 		= &dst_dev_bus_type,
-	.release 	= &dst_node_release
-};
-
-/*
- * Setting size of the node after it was changed.
- */
-static void dst_node_set_size(struct dst_node *n)
-{
-	struct block_device *bdev;
-
-	set_capacity(n->disk, n->size >> 9);
-
-	bdev = bdget_disk(n->disk, 0);
-	if (bdev) {
-		mutex_lock(&bdev->bd_inode->i_mutex);
-		i_size_write(bdev->bd_inode, n->size);
-		mutex_unlock(&bdev->bd_inode->i_mutex);
-		bdput(bdev);
-	}
-}
-
-/*
- * Distributed storage request processing function.
- */
-static int dst_request(struct request_queue *q, struct bio *bio)
-{
-	struct dst_node *n = q->queuedata;
-	int err = -EIO;
-
-	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
-		/*
-		 * This is a dirty^Wnice hack, but if we complete this
-		 * operation with -EOPNOTSUPP like intended, XFS
-		 * will stuck and freeze the machine. This may be
-		 * not particulary XFS problem though, but it is the
-		 * only FS which sends empty barrier at umount time
-		 * I worked with.
-		 *
-		 * Empty barriers are not allowed anyway, see 51fd77bd9f512
-		 * for example, although later it was changed to
-		 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
-		 * work in this case.
-		 */
-		/* err = -EOPNOTSUPP; */
-		err = 0;
-		goto end_io;
-	}
-
-	bio_get(bio);
-
-	return dst_process_bio(n, bio);
-
-end_io:
-	bio_endio(bio, err);
-	return err;
-}
-
-/*
- * Open/close callbacks for appropriate block device.
- */
-static int dst_bdev_open(struct block_device *bdev, fmode_t mode)
-{
-	struct dst_node *n = bdev->bd_disk->private_data;
-
-	dst_node_get(n);
-	return 0;
-}
-
-static int dst_bdev_release(struct gendisk *disk, fmode_t mode)
-{
-	struct dst_node *n = disk->private_data;
-
-	dst_node_put(n);
-	return 0;
-}
-
-static struct block_device_operations dst_blk_ops = {
-	.open		= dst_bdev_open,
-	.release	= dst_bdev_release,
-	.owner		= THIS_MODULE,
-};
-
-/*
- * Block layer binding - disk is created when array is fully configured
- * by userspace request.
- */
-static int dst_node_create_disk(struct dst_node *n)
-{
-	int err = -ENOMEM;
-	u32 index = 0;
-
-	n->queue = blk_init_queue(NULL, NULL);
-	if (!n->queue)
-		goto err_out_exit;
-
-	n->queue->queuedata = n;
-	blk_queue_make_request(n->queue, dst_request);
-	blk_queue_max_phys_segments(n->queue, n->max_pages);
-	blk_queue_max_hw_segments(n->queue, n->max_pages);
-
-	err = -ENOMEM;
-	n->disk = alloc_disk(1);
-	if (!n->disk)
-		goto err_out_free_queue;
-
-	if (!(n->state->permissions & DST_PERM_WRITE)) {
-		printk(KERN_INFO "DST node %s attached read-only.\n", n->name);
-		set_disk_ro(n->disk, 1);
-	}
-
-	if (!idr_pre_get(&dst_index_idr, GFP_KERNEL))
-		goto err_out_put;
-
-	mutex_lock(&dst_hash_lock);
-	err = idr_get_new(&dst_index_idr, NULL, &index);
-	mutex_unlock(&dst_hash_lock);
-	if (err)
-		goto err_out_put;
-
-	n->disk->major = dst_major;
-	n->disk->first_minor = index;
-	n->disk->fops = &dst_blk_ops;
-	n->disk->queue = n->queue;
-	n->disk->private_data = n;
-	snprintf(n->disk->disk_name, sizeof(n->disk->disk_name),
-			"dst-%s", n->name);
-
-	return 0;
-
-err_out_put:
-	put_disk(n->disk);
-err_out_free_queue:
-	blk_cleanup_queue(n->queue);
-err_out_exit:
-	return err;
-}
-
-/*
- * Sysfs machinery: show device's size.
- */
-static ssize_t dst_show_size(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%llu\n", info->size);
-}
-
-/*
- * Show local exported device.
- */
-static ssize_t dst_show_local(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%s\n", info->local);
-}
-
-/*
- * Shows type of the remote node - device major/minor number
- * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes.
- */
-static ssize_t dst_show_type(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-	int family = info->net.addr.sa_family;
-
-	if (family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr;
-		return sprintf(buf, "%u.%u.%u.%u:%d\n",
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)
-				&info->net.addr;
-		return sprintf(buf,
-			"%pi6:%d\n",
-			&sin->sin6_addr, ntohs(sin->sin6_port));
-	} else {
-		int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */
-		int size, addrlen = info->net.addr.sa_data_len;
-		unsigned char *a = (unsigned char *)&info->net.addr.sa_data;
-		char *buf_orig = buf;
-
-		size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ",
-				family, addrlen);
-		sz -= size;
-		buf += size;
-
-		for (i = 0; i < addrlen; ++i) {
-			if (sz < 3)
-				break;
-
-			size = snprintf(buf, sz, "%02x ", a[i]);
-			sz -= size;
-			buf += size;
-		}
-		buf += sprintf(buf, "\n");
-
-		return buf - buf_orig;
-	}
-	return 0;
-}
-
-static struct device_attribute dst_node_attrs[] = {
-	__ATTR(size, 0444, dst_show_size, NULL),
-	__ATTR(type, 0444, dst_show_type, NULL),
-	__ATTR(local, 0444, dst_show_local, NULL),
-};
-
-static int dst_create_node_attributes(struct dst_node *n)
-{
-	int err, i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) {
-		err = device_create_file(&n->info->device,
-				&dst_node_attrs[i]);
-		if (err)
-			goto err_out_remove_all;
-	}
-	return 0;
-
-err_out_remove_all:
-	while (--i >= 0)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-
-	return err;
-}
-
-static void dst_remove_node_attributes(struct dst_node *n)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-}
-
-/*
- * Sysfs cleanup and initialization.
- * Shows number of useful parameters.
- */
-static void dst_node_sysfs_exit(struct dst_node *n)
-{
-	if (n->info) {
-		dst_remove_node_attributes(n);
-		device_unregister(&n->info->device);
-		n->info = NULL;
-	}
-}
-
-static int dst_node_sysfs_init(struct dst_node *n)
-{
-	int err;
-
-	n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL);
-	if (!n->info)
-		return -ENOMEM;
-
-	memcpy(&n->info->device, &dst_node_dev, sizeof(struct device));
-	n->info->size = n->size;
-
-	dev_set_name(&n->info->device, "dst-%s", n->name);
-	err = device_register(&n->info->device);
-	if (err) {
-		dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n",
-				n->name, err);
-		goto err_out_exit;
-	}
-
-	dst_create_node_attributes(n);
-
-	return 0;
-
-err_out_exit:
-	kfree(n->info);
-	n->info = NULL;
-	return err;
-}
-
-/*
- * DST node hash tables machinery.
- */
-static inline unsigned int dst_hash(char *str, unsigned int size)
-{
-	return jhash(str, size, 0) % dst_hashtable_size;
-}
-
-static void dst_node_remove(struct dst_node *n)
-{
-	mutex_lock(&dst_hash_lock);
-	list_del_init(&n->node_entry);
-	mutex_unlock(&dst_hash_lock);
-}
-
-static void dst_node_add(struct dst_node *n)
-{
-	unsigned hash = dst_hash(n->name, sizeof(n->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_add_tail(&n->node_entry, &dst_hashtable[hash]);
-	mutex_unlock(&dst_hash_lock);
-}
-
-/*
- * Cleaning node when it is about to be freed.
- * There are still users of the socket though,
- * so connection cleanup should be protected.
- */
-static void dst_node_cleanup(struct dst_node *n)
-{
-	struct dst_state *st = n->state;
-
-	if (!st)
-		return;
-
-	if (n->queue) {
-		blk_cleanup_queue(n->queue);
-
-		mutex_lock(&dst_hash_lock);
-		idr_remove(&dst_index_idr, n->disk->first_minor);
-		mutex_unlock(&dst_hash_lock);
-
-		put_disk(n->disk);
-	}
-
-	if (n->bdev) {
-		sync_blockdev(n->bdev);
-		close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	}
-
-	dst_state_lock(st);
-	st->need_exit = 1;
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-
-	wake_up(&st->thread_wait);
-
-	dst_state_put(st);
-	n->state = NULL;
-}
-
-/*
- * Free security attributes attached to given node.
- */
-static void dst_security_exit(struct dst_node *n)
-{
-	struct dst_secure *s, *tmp;
-
-	list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) {
-		list_del(&s->sec_entry);
-		kfree(s);
-	}
-}
-
-/*
- * Free node when there are no more users.
- * Actually node has to be freed on behalf od userspace process,
- * since there are number of threads, which are embedded in the
- * node, so they can not exit and free node from there, that is
- * why there is a wakeup if reference counter is not equal to zero.
- */
-void dst_node_put(struct dst_node *n)
-{
-	if (unlikely(!n))
-		return;
-
-	dprintk("%s: n: %p, refcnt: %d.\n",
-			__func__, n, atomic_read(&n->refcnt));
-
-	if (atomic_dec_and_test(&n->refcnt)) {
-		dst_node_remove(n);
-		n->trans_scan_timeout = 0;
-		dst_node_cleanup(n);
-		thread_pool_destroy(n->pool);
-		dst_node_sysfs_exit(n);
-		dst_node_crypto_exit(n);
-		dst_security_exit(n);
-		dst_node_trans_exit(n);
-
-		kfree(n);
-
-		dprintk("%s: freed n: %p.\n", __func__, n);
-	} else {
-		wake_up(&n->wait);
-	}
-}
-
-/*
- * Setting up export device: lookup by the name, get its size
- * and setup listening socket, which will accept clients, which
- * will submit IO for given storage.
- */
-static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl,
-		struct dst_export_ctl *le)
-{
-	int err;
-
-	snprintf(n->info->local, sizeof(n->info->local), "%s", le->device);
-
-	n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL);
-	if (IS_ERR(n->bdev))
-		return PTR_ERR(n->bdev);
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size);
-	else
-		n->size = n->bdev->bd_inode->i_size;
-
-	n->info->size = n->size;
-	err = dst_node_init_listened(n, le);
-	if (err)
-		goto err_out_cleanup;
-
-	return 0;
-
-err_out_cleanup:
-	close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	n->bdev = NULL;
-
-	return err;
-}
-
-/* Empty thread pool callbacks for the network processing threads. */
-static inline void *dst_thread_network_init(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-	return data;
-}
-
-static inline void dst_thread_network_cleanup(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-}
-
-/*
- * Allocate DST node and initialize some of its parameters.
- */
-static struct dst_node *dst_alloc_node(struct dst_ctl *ctl,
-		int (*start)(struct dst_node *),
-		int num)
-{
-	struct dst_node *n;
-	int err;
-
-	n = kzalloc(sizeof(struct dst_node), GFP_KERNEL);
-	if (!n)
-		return NULL;
-
-	INIT_LIST_HEAD(&n->node_entry);
-
-	INIT_LIST_HEAD(&n->security_list);
-	mutex_init(&n->security_lock);
-
-	init_waitqueue_head(&n->wait);
-
-	n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout);
-	if (!n->trans_scan_timeout)
-		n->trans_scan_timeout = HZ;
-
-	n->trans_max_retries = ctl->trans_max_retries;
-	if (!n->trans_max_retries)
-		n->trans_max_retries = 10;
-
-	/*
-	 * Pretty much arbitrary default numbers.
-	 * 32 matches maximum number of pages in bio originated from ext3 (31).
-	 */
-	n->max_pages = ctl->max_pages;
-	if (!n->max_pages)
-		n->max_pages = 32;
-
-	if (n->max_pages > 1024)
-		n->max_pages = 1024;
-
-	n->start = start;
-	n->size = ctl->size;
-
-	atomic_set(&n->refcnt, 1);
-	atomic_long_set(&n->gen, 0);
-	snprintf(n->name, sizeof(n->name), "%s", ctl->name);
-
-	err = dst_node_sysfs_init(n);
-	if (err)
-		goto err_out_free;
-
-	n->pool = thread_pool_create(num, n->name, dst_thread_network_init,
-			dst_thread_network_cleanup, n);
-	if (IS_ERR(n->pool)) {
-		err = PTR_ERR(n->pool);
-		goto err_out_sysfs_exit;
-	}
-
-	dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name);
-
-	return n;
-
-err_out_sysfs_exit:
-	dst_node_sysfs_exit(n);
-err_out_free:
-	kfree(n);
-	return NULL;
-}
-
-/*
- * Starting a node, connected to the remote server:
- * register block device and initialize transaction mechanism.
- * In revers order though.
- *
- * It will autonegotiate some parameters with the remote node
- * and update local if needed.
- *
- * Transaction initialization should be the last thing before
- * starting the node, since transaction should include not only
- * block IO, but also crypto related data (if any), which are
- * initialized separately.
- */
-static int dst_start_remote(struct dst_node *n)
-{
-	int err;
-
-	err = dst_node_trans_init(n, sizeof(struct dst_trans));
-	if (err)
-		return err;
-
-	err = dst_node_create_disk(n);
-	if (err)
-		return err;
-
-	dst_node_set_size(n);
-	add_disk(n->disk);
-
-	dprintk("DST: started remote node '%s', minor: %d.\n",
-			n->name, n->disk->first_minor);
-
-	return 0;
-}
-
-/*
- * Adding remote node and initialize connection.
- */
-static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_network_ctl *rctl = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_network_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_remote, 1);
-	if (!n)
-		return -ENOMEM;
-
-	memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl));
-	err = dst_node_init_connected(n, rctl);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-/*
- * Adding export node: initializing block device and listening socket.
- */
-static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_export_ctl *le = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_export_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_export, 2);
-	if (!n)
-		return -EINVAL;
-
-	err = dst_setup_export(n, ctl, le);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-static int dst_node_remove_unload(struct dst_node *n)
-{
-	printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n",
-			n->name, n->size);
-
-	if (n->disk)
-		del_gendisk(n->disk);
-
-	dst_node_remove(n);
-	dst_node_sysfs_exit(n);
-
-	/*
-	 * This is not a hack. Really.
-	 * Node's reference counter allows to implement fine grained
-	 * node freeing, but since all transactions (which hold node's
-	 * reference counter) are processed in the dedicated thread,
-	 * it is possible that reference will hit zero in that thread,
-	 * so we will not be able to exit thread and cleanup the node.
-	 *
-	 * So, we remove disk, so no new activity is possible, and
-	 * wait until all pending transaction are completed (either
-	 * in receiving thread or by timeout in workqueue), in this
-	 * case reference counter will be less or equal to 2 (once set in
-	 * dst_alloc_node() and then in connector message parser;
-	 * or when we force module unloading, and connector message
-	 * parser does not hold a reference, in this case reference
-	 * counter will be equal to 1),
-	 * and subsequent dst_node_put() calls will free the node.
-	 */
-	dprintk("%s: going to sleep with %d refcnt.\n",
-			__func__, atomic_read(&n->refcnt));
-	wait_event(n->wait, atomic_read(&n->refcnt) <= 2);
-
-	dst_node_put(n);
-	return 0;
-}
-
-/*
- * Remove node from the hash table.
- */
-static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	if (!n)
-		return -ENODEV;
-
-	return dst_node_remove_unload(n);
-}
-
-/*
- * Initialize crypto processing for given node.
- */
-static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_crypto_ctl *crypto = data;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize +
-			crypto->cipher_keysize)
-		return -EINVAL;
-
-	if (n->trans_cache)
-		return -EEXIST;
-
-	return dst_node_crypto_init(n, crypto);
-}
-
-/*
- * Security attributes for given node.
- */
-static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_secure *s;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_secure_user))
-		return -EINVAL;
-
-	s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	memcpy(&s->sec, data, size);
-
-	mutex_lock(&n->security_lock);
-	list_add_tail(&s->sec_entry, &n->security_list);
-	mutex_unlock(&n->security_lock);
-
-	return 0;
-}
-
-/*
- * Kill'em all!
- */
-static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-
-	if (!n)
-		return -ENODEV;
-
-	if (n->trans_cache)
-		return 0;
-
-	err = n->start(n);
-	if (err)
-		return err;
-
-	printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size);
-	return 0;
-}
-
-typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size);
-
-/*
- * List of userspace commands.
- */
-static dst_command_func dst_commands[] = {
-	[DST_ADD_REMOTE] = &dst_add_remote,
-	[DST_ADD_EXPORT] = &dst_add_export,
-	[DST_DEL_NODE] = &dst_del_node,
-	[DST_CRYPTO] = &dst_crypto_init,
-	[DST_SECURITY] = &dst_security_init,
-	[DST_START] = &dst_start_node,
-};
-
-/*
- * Configuration parser.
- */
-static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct dst_ctl *ctl;
-	int err;
-	struct dst_ctl_ack ack;
-	struct dst_node *n = NULL, *tmp;
-	unsigned int hash;
-
-	if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	if (msg->len < sizeof(struct dst_ctl)) {
-		err = -EBADMSG;
-		goto out;
-	}
-
-	ctl = (struct dst_ctl *)msg->data;
-
-	if (ctl->cmd >= DST_CMD_MAX) {
-		err = -EINVAL;
-		goto out;
-	}
-	hash = dst_hash(ctl->name, sizeof(ctl->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) {
-		if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) {
-			n = tmp;
-			dst_node_get(n);
-			break;
-		}
-	}
-	mutex_unlock(&dst_hash_lock);
-
-	err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl),
-			msg->len - sizeof(struct dst_ctl));
-
-	dst_node_put(n);
-out:
-	memcpy(&ack.msg, msg, sizeof(struct cn_msg));
-
-	ack.msg.ack = msg->ack + 1;
-	ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg);
-
-	ack.error = err;
-
-	cn_netlink_send(&ack.msg, 0, GFP_KERNEL);
-}
-
-/*
- * Global initialization: sysfs, hash table, block device registration,
- * connector and various caches.
- */
-static int __init dst_sysfs_init(void)
-{
-	return bus_register(&dst_dev_bus_type);
-}
-
-static void dst_sysfs_exit(void)
-{
-	bus_unregister(&dst_dev_bus_type);
-}
-
-static int __init dst_hashtable_init(void)
-{
-	unsigned int i;
-
-	dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head),
-			GFP_KERNEL);
-	if (!dst_hashtable)
-		return -ENOMEM;
-
-	for (i = 0; i < dst_hashtable_size; ++i)
-		INIT_LIST_HEAD(&dst_hashtable[i]);
-
-	return 0;
-}
-
-static void dst_hashtable_exit(void)
-{
-	unsigned int i;
-	struct dst_node *n, *tmp;
-
-	for (i = 0; i < dst_hashtable_size; ++i) {
-		list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) {
-			dst_node_remove_unload(n);
-		}
-	}
-
-	kfree(dst_hashtable);
-}
-
-static int __init dst_sys_init(void)
-{
-	int err = -ENOMEM;
-
-	err = dst_hashtable_init();
-	if (err)
-		goto err_out_exit;
-
-	err = dst_export_init();
-	if (err)
-		goto err_out_hashtable_exit;
-
-	err = register_blkdev(dst_major, DST_NAME);
-	if (err < 0)
-		goto err_out_export_exit;
-	if (err)
-		dst_major = err;
-
-	err = dst_sysfs_init();
-	if (err)
-		goto err_out_unregister;
-
-	err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback);
-	if (err)
-		goto err_out_sysfs_exit;
-
-	printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name);
-
-	return 0;
-
-err_out_sysfs_exit:
-	dst_sysfs_exit();
-err_out_unregister:
-	unregister_blkdev(dst_major, DST_NAME);
-err_out_export_exit:
-	dst_export_exit();
-err_out_hashtable_exit:
-	dst_hashtable_exit();
-err_out_exit:
-	return err;
-}
-
-static void __exit dst_sys_exit(void)
-{
-	cn_del_callback(&cn_dst_id);
-	unregister_blkdev(dst_major, DST_NAME);
-	dst_hashtable_exit();
-	dst_sysfs_exit();
-	dst_export_exit();
-}
-
-module_init(dst_sys_init);
-module_exit(dst_sys_exit);
-
-MODULE_DESCRIPTION("Distributed storage");
-MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/dst/export.c b/drivers/staging/dst/export.c
deleted file mode 100644
index c324230e8b60..000000000000
--- a/drivers/staging/dst/export.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <net/sock.h>
-
-/*
- * Export bioset is used for server block IO requests.
- */
-static struct bio_set *dst_bio_set;
-
-int __init dst_export_init(void)
-{
-	int err = -ENOMEM;
-
-	dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
-	if (!dst_bio_set)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-void dst_export_exit(void)
-{
-	bioset_free(dst_bio_set);
-}
-
-/*
- * When client connects and autonegotiates with the server node,
- * its permissions are checked in a security attributes and sent
- * back.
- */
-static unsigned int dst_check_permissions(struct dst_state *main,
-		struct dst_state *st)
-{
-	struct dst_node *n = main->node;
-	struct dst_secure *sentry;
-	struct dst_secure_user *s;
-	struct saddr *sa = &st->ctl.addr;
-	unsigned int perm = 0;
-
-	mutex_lock(&n->security_lock);
-	list_for_each_entry(sentry, &n->security_list, sec_entry) {
-		s = &sentry->sec;
-
-		if (s->addr.sa_family != sa->sa_family)
-			continue;
-
-		if (s->addr.sa_data_len != sa->sa_data_len)
-			continue;
-
-		/*
-		 * This '2' below is a port field. This may be very wrong to do
-		 * in atalk for example though. If there will be any need
-		 * to extent protocol to something else, I can create
-		 * per-family helpers and use them instead of this memcmp.
-		 */
-		if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
-					sa->sa_data_len - 2))
-			continue;
-
-		perm = s->permissions;
-	}
-	mutex_unlock(&n->security_lock);
-
-	return perm;
-}
-
-/*
- * Accept new client: allocate appropriate network state and check permissions.
- */
-static struct dst_state *dst_accept_client(struct dst_state *st)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-	struct socket *sock = NULL;
-	struct dst_state *new;
-
-	while (!err && !sock) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait,
-						&wait, TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				/*
-				 * Magic HZ? Polling check above is not safe in
-				 * all cases (like socket reset in BH context),
-				 * so it is simpler just to postpone it to the
-				 * process context instead of implementing
-				 * special locking there.
-				 */
-				schedule_timeout(HZ);
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
-			__func__, st, revents, revents & err_mask,
-			revents & POLLIN);
-
-		if (revents & err_mask) {
-			dprintk("%s: revents: %x, socket: %p, err: %d.\n",
-					__func__, revents, st->socket, err);
-			err = -ECONNRESET;
-		}
-
-		if (!n->trans_scan_timeout || st->need_exit)
-			err = -ENODEV;
-
-		if (st->socket && (revents & POLLIN))
-			err = kernel_accept(st->socket, &sock, 0);
-
-		dst_state_unlock(st);
-	}
-
-	if (err)
-		goto err_out_exit;
-
-	new = dst_state_alloc(st->node);
-	if (IS_ERR(new)) {
-		err = -ENOMEM;
-		goto err_out_release;
-	}
-	new->socket = sock;
-
-	new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
-	err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
-			(int *)&new->ctl.addr.sa_data_len);
-	if (err)
-		goto err_out_put;
-
-	new->permissions = dst_check_permissions(st, new);
-	if (new->permissions == 0) {
-		err = -EPERM;
-		dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-				"Client is not allowed to connect");
-		goto err_out_put;
-	}
-
-	err = dst_poll_init(new);
-	if (err)
-		goto err_out_put;
-
-	dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-			"Connected client");
-
-	return new;
-
-err_out_put:
-	dst_state_put(new);
-err_out_release:
-	sock_release(sock);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Each server's block request sometime finishes.
- * Usually it happens in hard irq context of the appropriate controller,
- * so to play good with all cases we just queue BIO into the queue
- * and wake up processing thread, which gets completed request and
- * send (encrypting if needed) it back to the client (if it was a read
- * request), or sends back reply that writing successfully completed.
- */
-static int dst_export_process_request_queue(struct dst_state *st)
-{
-	unsigned long flags;
-	struct dst_export_priv *p = NULL;
-	struct bio *bio;
-	int err = 0;
-
-	while (!list_empty(&st->request_list)) {
-		spin_lock_irqsave(&st->request_lock, flags);
-		if (!list_empty(&st->request_list)) {
-			p = list_first_entry(&st->request_list,
-				struct dst_export_priv, request_entry);
-			list_del(&p->request_entry);
-		}
-		spin_unlock_irqrestore(&st->request_lock, flags);
-
-		if (!p)
-			break;
-
-		bio = p->bio;
-
-		if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
-			err = dst_export_crypto(st->node, bio);
-		else
-			err = dst_export_send_bio(bio);
-
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-/*
- * Cleanup export state.
- * It has to wait until all requests are finished,
- * and then free them all.
- */
-static void dst_state_cleanup_export(struct dst_state *st)
-{
-	struct dst_export_priv *p;
-	unsigned long flags;
-
-	/*
-	 * This loop waits for all pending bios to be completed and freed.
-	 */
-	while (atomic_read(&st->refcnt) > 1) {
-		dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
-				__func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list));
-		wait_event_timeout(st->thread_wait,
-				(atomic_read(&st->refcnt) == 1) ||
-				!list_empty(&st->request_list),
-				HZ/2);
-
-		while (!list_empty(&st->request_list)) {
-			p = NULL;
-			spin_lock_irqsave(&st->request_lock, flags);
-			if (!list_empty(&st->request_list)) {
-				p = list_first_entry(&st->request_list,
-					struct dst_export_priv, request_entry);
-				list_del(&p->request_entry);
-			}
-			spin_unlock_irqrestore(&st->request_lock, flags);
-
-			if (p)
-				bio_put(p->bio);
-
-			dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
-				"%p.\n", __func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list), p);
-		}
-	}
-
-	dst_state_put(st);
-}
-
-/*
- * Client accepting thread.
- * Not only accepts new connection, but also schedules receiving thread
- * and performs request completion described above.
- */
-static int dst_accept(void *init_data, void *schedule_data)
-{
-	struct dst_state *main_st = schedule_data;
-	struct dst_node *n = init_data;
-	struct dst_state *st;
-	int err;
-
-	while (n->trans_scan_timeout && !main_st->need_exit) {
-		dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
-		st = dst_accept_client(main_st);
-		if (IS_ERR(st))
-			continue;
-
-		err = dst_state_schedule_receiver(st);
-		if (!err) {
-			while (n->trans_scan_timeout) {
-				err = wait_event_interruptible_timeout(st->thread_wait,
-					!list_empty(&st->request_list) ||
-					!n->trans_scan_timeout ||
-					st->need_exit,
-					HZ);
-
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				if (list_empty(&st->request_list))
-					continue;
-
-				err = dst_export_process_request_queue(st);
-				if (err)
-					break;
-			}
-
-			st->need_exit = 1;
-			wake_up(&st->thread_wait);
-		}
-
-		dst_state_cleanup_export(st);
-	}
-
-	dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
-
-	dst_state_lock(main_st);
-	dst_poll_exit(main_st);
-	dst_state_socket_release(main_st);
-	dst_state_unlock(main_st);
-	dst_state_put(main_st);
-	dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
-
-	return 0;
-}
-
-int dst_start_export(struct dst_node *n)
-{
-	if (list_empty(&n->security_list)) {
-		printk(KERN_ERR "You are trying to export node '%s' "
-				"without security attributes.\nNo clients will "
-				"be allowed to connect. Exiting.\n", n->name);
-		return -EINVAL;
-	}
-	return dst_node_trans_init(n, sizeof(struct dst_export_priv));
-}
-
-/*
- * Initialize listening state and schedule accepting thread.
- */
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-	struct dst_network_ctl *ctl = &le->ctl;
-
-	memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_put;
-
-	st->socket->sk->sk_reuse = 1;
-
-	err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
-			ctl->addr.sa_data_len);
-	if (err)
-		goto err_out_socket_release;
-
-	err = kernel_listen(st->socket, 1024);
-	if (err)
-		goto err_out_socket_release;
-	n->state = st;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_socket_release;
-
-	dst_state_get(st);
-
-	err = thread_pool_schedule(n->pool, dst_thread_setup,
-			dst_accept, st, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_poll_exit;
-
-	return 0;
-
-err_out_poll_exit:
-	dst_poll_exit(st);
-err_out_socket_release:
-	dst_state_socket_release(st);
-err_out_put:
-	dst_state_put(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-/*
- * Free bio and related private data.
- * Also drop a reference counter for appropriate state,
- * which waits when there are no more block IOs in-flight.
- */
-static void dst_bio_destructor(struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_export_priv *priv = bio->bi_private;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (!bv->bv_page)
-			break;
-
-		__free_page(bv->bv_page);
-	}
-
-	if (priv)
-		dst_state_put(priv->state);
-	bio_free(bio, dst_bio_set);
-}
-
-/*
- * Block IO completion. Queue request to be sent back to
- * the client (or just confirmation).
- */
-static void dst_bio_end_io(struct bio *bio, int err)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	unsigned long flags;
-
-	spin_lock_irqsave(&st->request_lock, flags);
-	list_add_tail(&p->request_entry, &st->request_list);
-	spin_unlock_irqrestore(&st->request_lock, flags);
-
-	wake_up(&st->thread_wait);
-}
-
-/*
- * Allocate read request for the server.
- */
-static int dst_export_read_request(struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = bio_add_page(bio, page, size, 0);
-		dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
-				__func__, (u64)bio->bi_sector, bio->bi_size,
-				size, err);
-		if (err <= 0)
-			goto err_out_free_page;
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Allocate write request for the server.
- * Should not only get pages, but also read data from the network.
- */
-static int dst_export_write_request(struct dst_state *st,
-		struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	void *data;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		data = kmap(page);
-		if (!data)
-			goto err_out_free_page;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = dst_data_recv(st, data, size);
-		if (err)
-			goto err_out_unmap_page;
-
-		err = bio_add_page(bio, page, size, 0);
-		if (err <= 0)
-			goto err_out_unmap_page;
-
-		kunmap(page);
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_unmap_page:
-	kunmap(page);
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Groovy, we've gotten an IO request from the client.
- * Allocate BIO from the bioset, private data from the mempool
- * and lots of pages for IO.
- */
-int dst_process_io(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct bio *bio;
-	struct dst_export_priv *priv;
-	int err = -ENOMEM;
-
-	if (unlikely(!n->bdev)) {
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	bio = bio_alloc_bioset(GFP_KERNEL,
-			PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
-			dst_bio_set);
-	if (!bio)
-		goto err_out_exit;
-
-	priv = (struct dst_export_priv *)(((void *)bio) -
-			sizeof (struct dst_export_priv));
-
-	priv->state = dst_state_get(st);
-	priv->bio = bio;
-
-	bio->bi_private = priv;
-	bio->bi_end_io = dst_bio_end_io;
-	bio->bi_destructor = dst_bio_destructor;
-	bio->bi_bdev = n->bdev;
-
-	/*
-	 * Server side is only interested in two low bits:
-	 * uptodate (set by itself actually) and rw block
-	 */
-	bio->bi_flags |= cmd->flags & 3;
-
-	bio->bi_rw = cmd->rw;
-	bio->bi_size = 0;
-	bio->bi_sector = cmd->sector;
-
-	dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
-
-	priv->cmd.flags = 0;
-	priv->cmd.size = cmd->size;
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_recv_cdata(st, priv->cmd.hash);
-		if (err)
-			goto err_out_free;
-
-		err = dst_export_write_request(st, bio, cmd->size);
-		if (err)
-			goto err_out_free;
-
-		if (dst_need_crypto(n))
-			return dst_export_crypto(n, bio);
-	} else {
-		err = dst_export_read_request(bio, cmd->size);
-		if (err)
-			goto err_out_free;
-	}
-
-	dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio->bi_rw, bio_data_dir(bio),
-			bio->bi_flags, bio->bi_phys_segments);
-
-	generic_make_request(bio);
-
-	return 0;
-
-err_out_free:
-	bio_put(bio);
-err_out_exit:
-	return err;
-}
-
-/*
- * Ok, block IO is ready, let's send it back to the client...
- */
-int dst_export_send_bio(struct bio *bio)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	struct dst_cmd *cmd = &p->cmd;
-	int err;
-
-	dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
-			__func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
-			cmd->csize, bio->bi_flags, bio->bi_rw);
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = -ECONNRESET;
-		goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		/* ... or just confirmation that writing has completed. */
-		cmd->size = cmd->csize = 0;
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-		if (err)
-			goto err_out_unlock;
-	} else {
-		err = dst_send_bio(st, cmd, bio);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return 0;
-
-err_out_unlock:
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/state.c b/drivers/staging/dst/state.c
deleted file mode 100644
index 02a05e6c48c3..000000000000
--- a/drivers/staging/dst/state.c
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-
-/*
- * Polling machinery.
- */
-
-struct dst_poll_helper {
-	poll_table		pt;
-	struct dst_state	*st;
-};
-
-static int dst_queue_wake(wait_queue_t *wait, unsigned mode,
-		int sync, void *key)
-{
-	struct dst_state *st = container_of(wait, struct dst_state, wait);
-
-	wake_up(&st->thread_wait);
-	return 1;
-}
-
-static void dst_queue_func(struct file *file, wait_queue_head_t *whead,
-				 poll_table *pt)
-{
-	struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st;
-
-	st->whead = whead;
-	init_waitqueue_func_entry(&st->wait, dst_queue_wake);
-	add_wait_queue(whead, &st->wait);
-}
-
-void dst_poll_exit(struct dst_state *st)
-{
-	if (st->whead) {
-		remove_wait_queue(st->whead, &st->wait);
-		st->whead = NULL;
-	}
-}
-
-int dst_poll_init(struct dst_state *st)
-{
-	struct dst_poll_helper ph;
-
-	ph.st = st;
-	init_poll_funcptr(&ph.pt, &dst_queue_func);
-
-	st->socket->ops->poll(NULL, st->socket, &ph.pt);
-	return 0;
-}
-
-/*
- * Header receiving function - may block.
- */
-static int dst_data_recv_header(struct socket *sock,
-		void *data, unsigned int size, int block)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT;
-
-	err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err != size)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Header sending function - may block.
- */
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0);
-
-	err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
-	if (err != size) {
-		dprintk("%s: size: %u, more: %d, err: %d.\n",
-				__func__, size, more, err);
-		return -1;
-	}
-
-	return 0;
-}
-
-/*
- * Block autoconfiguration: request size of the storage and permissions.
- */
-static int dst_request_remote_config(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	int err = -EINVAL;
-	struct dst_cmd *cmd = st->data;
-
-	memset(cmd, 0, sizeof(struct dst_cmd));
-	cmd->cmd = DST_CFG;
-
-	dst_convert_cmd(cmd);
-
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	if (err)
-		goto out;
-
-	err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1);
-	if (err)
-		goto out;
-
-	dst_convert_cmd(cmd);
-
-	if (cmd->cmd != DST_CFG) {
-		err = -EINVAL;
-		dprintk("%s: checking result: cmd: %d, size reported: %llu.\n",
-			__func__, cmd->cmd, cmd->sector);
-		goto out;
-	}
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->size, cmd->sector);
-	else
-		n->size = cmd->sector;
-
-	n->info->size = n->size;
-	st->permissions = cmd->rw;
-
-out:
-	dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n",
-			__func__, n, err, n->size, st->permissions);
-	return err;
-}
-
-/*
- * Socket machinery.
- */
-
-#define DST_DEFAULT_TIMEO	20000
-
-int dst_state_socket_create(struct dst_state *st)
-{
-	int err;
-	struct socket *sock;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock);
-	if (err < 0)
-		return err;
-
-	sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo =
-		msecs_to_jiffies(DST_DEFAULT_TIMEO);
-	sock->sk->sk_allocation = GFP_NOIO;
-
-	st->socket = st->read_socket = sock;
-	return 0;
-}
-
-void dst_state_socket_release(struct dst_state *st)
-{
-	dprintk("%s: st: %p, socket: %p, n: %p.\n",
-			__func__, st, st->socket, st->node);
-	if (st->socket) {
-		sock_release(st->socket);
-		st->socket = NULL;
-		st->read_socket = NULL;
-	}
-}
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str)
-{
-	if (sk->ops->family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
-		printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str,
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (sk->ops->family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa;
-		printk(KERN_INFO "%s %pi6:%d",
-			str, &sin->sin6_addr, ntohs(sin->sin6_port));
-	}
-}
-
-void dst_state_exit_connected(struct dst_state *st)
-{
-	if (st->socket) {
-		dst_poll_exit(st);
-		st->socket->ops->shutdown(st->socket, 2);
-
-		dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr,
-				"Disconnected peer");
-		dst_state_socket_release(st);
-	}
-}
-
-static int dst_state_init_connected(struct dst_state *st)
-{
-	int err;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_exit;
-
-	err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr,
-			st->ctl.addr.sa_data_len, 0);
-	if (err)
-		goto err_out_release;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_release;
-
-	dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr,
-			"Connected to peer");
-
-	return 0;
-
-err_out_release:
-	dst_state_socket_release(st);
-err_out_exit:
-	return err;
-}
-
-/*
- * State reset is used to reconnect to the remote peer.
- * May fail, but who cares, we will try again later.
- */
-static inline void dst_state_reset_nolock(struct dst_state *st)
-{
-	dst_state_exit_connected(st);
-	dst_state_init_connected(st);
-}
-
-static inline void dst_state_reset(struct dst_state *st)
-{
-	dst_state_lock(st);
-	dst_state_reset_nolock(st);
-	dst_state_unlock(st);
-}
-
-/*
- * Basic network sending/receiving functions.
- * Blocked mode is used.
- */
-static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	BUG_ON(!size);
-
-	iov.iov_base = buf;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_DONTWAIT;
-
-	err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err <= 0) {
-		dprintk("%s: failed to recv data: size: %llu, err: %d.\n",
-				__func__, size, err);
-		if (err == 0)
-			err = -ECONNRESET;
-
-		dst_state_exit_connected(st);
-	}
-
-	return err;
-}
-
-/*
- * Ping command to early detect failed nodes.
- */
-static int dst_send_ping(struct dst_state *st)
-{
-	struct dst_cmd *cmd = st->data;
-	int err = -ECONNRESET;
-
-	dst_state_lock(st);
-	if (st->socket) {
-		memset(cmd, 0, sizeof(struct dst_cmd));
-
-		cmd->cmd = __cpu_to_be32(DST_PING);
-
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__,
-			st, st->socket, err);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receiving function, which should either return error or read
- * whole block request. If there was no traffic for a one second,
- * send a ping, since remote node may die.
- */
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-
-	while (size && !err) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait, &wait,
-						TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				if (!schedule_timeout(HZ)) {
-					err = dst_send_ping(st);
-					if (err)
-						return err;
-				}
-
-				continue;
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		if (st->socket && (st->read_socket == st->socket) &&
-				(revents & POLLIN)) {
-			err = dst_data_recv_raw(st, data, size);
-			if (err > 0) {
-				data += err;
-				size -= err;
-				err = 0;
-			}
-		}
-
-		if (revents & err_mask || !st->socket) {
-			dprintk("%s: revents: %x, socket: %p, size: %u, "
-					"err: %d.\n", __func__, revents,
-					st->socket, size, err);
-			err = -ECONNRESET;
-		}
-
-		dst_state_unlock(st);
-
-		if (!n->trans_scan_timeout)
-			err = -ENODEV;
-	}
-
-	return err;
-}
-
-/*
- * Send block autoconf reply.
- */
-static int dst_process_cfg(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	int err;
-
-	cmd->sector = n->size;
-	cmd->rw = st->permissions;
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receive block IO from the network.
- */
-static int dst_recv_bio(struct dst_state *st, struct bio *bio,
-		unsigned int total_size)
-{
-	struct bio_vec *bv;
-	int i, err;
-	void *data;
-	unsigned int sz;
-
-	bio_for_each_segment(bv, bio, i) {
-		sz = min(total_size, bv->bv_len);
-
-		dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, "
-				"off: %u.\n", __func__, (u64)bio->bi_sector,
-				bio->bi_size, total_size, bv->bv_len, sz,
-				bv->bv_offset);
-
-		data = kmap(bv->bv_page) + bv->bv_offset;
-		err = dst_data_recv(st, data, sz);
-		kunmap(bv->bv_page);
-
-		bv->bv_len = sz;
-
-		if (err)
-			return err;
-
-		total_size -= sz;
-		if (total_size == 0)
-			break;
-	}
-
-	return 0;
-}
-
-/*
- * Our block IO has just completed and arrived: get it.
- */
-static int dst_process_io_response(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct dst_trans *t;
-	int err = 0;
-	struct bio *bio;
-
-	mutex_lock(&n->trans_lock);
-	t = dst_trans_search(n, cmd->id);
-	mutex_unlock(&n->trans_lock);
-
-	if (!t)
-		goto err_out_exit;
-
-	bio = t->bio;
-
-	dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n",
-		__func__, (u64)bio->bi_sector, bio->bi_size, cmd->size,
-		cmd->csize, bio_data_dir(bio));
-
-	if (bio_data_dir(bio) == READ) {
-		if (bio->bi_size != cmd->size - cmd->csize)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n)) {
-			err = dst_recv_cdata(st, t->cmd.hash);
-			if (err)
-				goto err_out_exit;
-		}
-
-		err = dst_recv_bio(st, t->bio, bio->bi_size);
-		if (err)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n))
-			return dst_trans_crypto(t);
-	} else {
-		err = -EBADMSG;
-		if (cmd->size || cmd->csize)
-			goto err_out_exit;
-	}
-
-	dst_trans_remove(t);
-	dst_trans_put(t);
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive crypto data.
- */
-int dst_recv_cdata(struct dst_state *st, void *cdata)
-{
-	struct dst_cmd *cmd = st->data;
-	struct dst_node *n = st->node;
-	struct dst_crypto_ctl *c = &n->crypto;
-	int err;
-
-	if (cmd->csize != c->crypto_attached_size) {
-		dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, "
-				"csize: %u != digest size %u.\n",
-				__func__, cmd->cmd, cmd->sector, cmd->size,
-				cmd->csize, c->crypto_attached_size);
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	err = dst_data_recv(st, cdata, cmd->csize);
-	if (err)
-		goto err_out_exit;
-
-	cmd->size -= cmd->csize;
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive the command and start its processing.
- */
-static int dst_recv_processing(struct dst_state *st)
-{
-	int err = -EINTR;
-	struct dst_cmd *cmd = st->data;
-
-	/*
-	 * If socket will be reset after this statement, then
-	 * dst_data_recv() will just fail and loop will
-	 * start again, so it can be done without any locks.
-	 *
-	 * st->read_socket is needed to prevents state machine
-	 * breaking between this data reading and subsequent one
-	 * in protocol specific functions during connection reset.
-	 * In case of reset we have to read next command and do
-	 * not expect data for old command to magically appear in
-	 * new connection.
-	 */
-	st->read_socket = st->socket;
-	err = dst_data_recv(st, cmd, sizeof(struct dst_cmd));
-	if (err)
-		goto out_exit;
-
-	dst_convert_cmd(cmd);
-
-	dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, "
-			"sector: %llu, flags: %llx, rw: %llx.\n",
-			__func__, cmd->cmd, cmd->size,
-			cmd->csize, cmd->id, cmd->sector,
-			cmd->flags, cmd->rw);
-
-	/*
-	 * This should catch protocol breakage and random garbage
-	 * instead of commands.
-	 */
-	if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) {
-		err = -EBADMSG;
-		goto out_exit;
-	}
-
-	err = -EPROTO;
-	switch (cmd->cmd) {
-	case DST_IO_RESPONSE:
-		err = dst_process_io_response(st);
-		break;
-	case DST_IO:
-		err = dst_process_io(st);
-		break;
-	case DST_CFG:
-		err = dst_process_cfg(st);
-		break;
-	case DST_PING:
-		err = 0;
-		break;
-	default:
-		break;
-	}
-
-out_exit:
-	return err;
-}
-
-/*
- * Receiving thread. For the client node we should try to reconnect,
- * for accepted client we just drop the state and expect it to reconnect.
- */
-static int dst_recv(void *init_data, void *schedule_data)
-{
-	struct dst_state *st = schedule_data;
-	struct dst_node *n = init_data;
-	int err = 0;
-
-	dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n",
-			__func__, st, n, n->trans_scan_timeout, st->need_exit);
-
-	while (n->trans_scan_timeout && !st->need_exit) {
-		err = dst_recv_processing(st);
-		if (err < 0) {
-			if (!st->ctl.type)
-				break;
-
-			if (!n->trans_scan_timeout || st->need_exit)
-				break;
-
-			dst_state_reset(st);
-			msleep(1000);
-		}
-	}
-
-	st->need_exit = 1;
-	wake_up(&st->thread_wait);
-
-	dprintk("%s: freeing receiving socket st: %p.\n", __func__, st);
-	dst_state_lock(st);
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-	dst_state_put(st);
-
-	dprintk("%s: freed receiving socket st: %p.\n", __func__, st);
-
-	return err;
-}
-
-/*
- * Network state dies here and borns couple of lines below.
- * This object is the main network state processing engine:
- * sending, receiving, reconnections, all network related
- * tasks are handled on behalf of the state.
- */
-static void dst_state_free(struct dst_state *st)
-{
-	dprintk("%s: st: %p.\n", __func__, st);
-	if (st->cleanup)
-		st->cleanup(st);
-	kfree(st->data);
-	kfree(st);
-}
-
-struct dst_state *dst_state_alloc(struct dst_node *n)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = kzalloc(sizeof(struct dst_state), GFP_KERNEL);
-	if (!st)
-		goto err_out_exit;
-
-	st->node = n;
-	st->need_exit = 0;
-
-	st->size = PAGE_SIZE;
-	st->data = kmalloc(st->size, GFP_KERNEL);
-	if (!st->data)
-		goto err_out_free;
-
-	spin_lock_init(&st->request_lock);
-	INIT_LIST_HEAD(&st->request_list);
-
-	mutex_init(&st->state_lock);
-	init_waitqueue_head(&st->thread_wait);
-
-	/*
-	 * One for processing thread, another one for node itself.
-	 */
-	atomic_set(&st->refcnt, 2);
-
-	dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node);
-
-	return st;
-
-err_out_free:
-	kfree(st);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-int dst_state_schedule_receiver(struct dst_state *st)
-{
-	return thread_pool_schedule_private(st->node->pool, dst_thread_setup,
-			dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node);
-}
-
-/*
- * Initialize client's connection to the remote peer: allocate state,
- * connect and perform block IO autoconfiguration.
- */
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, r, sizeof(struct dst_network_ctl));
-
-	err = dst_state_init_connected(st);
-	if (err)
-		goto err_out_free_data;
-
-	err = dst_request_remote_config(st);
-	if (err)
-		goto err_out_exit_connected;
-	n->state = st;
-
-	err = dst_state_schedule_receiver(st);
-	if (err)
-		goto err_out_exit_connected;
-
-	return 0;
-
-err_out_exit_connected:
-	dst_state_exit_connected(st);
-err_out_free_data:
-	dst_state_free(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-void dst_state_put(struct dst_state *st)
-{
-	dprintk("%s: st: %p, refcnt: %d.\n",
-			__func__, st, atomic_read(&st->refcnt));
-	if (atomic_dec_and_test(&st->refcnt))
-		dst_state_free(st);
-}
-
-/*
- * Send block IO to the network one by one using zero-copy ->sendpage().
- */
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_crypto_ctl *c = &st->node->crypto;
-	int err, i = 0;
-	int flags = MSG_WAITALL;
-
-	err = dst_data_send_header(st->socket, cmd,
-		sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt);
-	if (err)
-		goto err_out_exit;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (i < bio->bi_vcnt - 1)
-			flags |= MSG_MORE;
-
-		err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset,
-				bv->bv_len, flags);
-		if (err <= 0)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	dprintk("%s: %d/%d, flags: %x, err: %d.\n",
-			__func__, i, bio->bi_vcnt, flags, err);
-	return err;
-}
-
-/*
- * Send transaction to the remote peer.
- */
-int dst_trans_send(struct dst_trans *t)
-{
-	int err;
-	struct dst_state *st = t->n->state;
-	struct bio *bio = t->bio;
-
-	dst_convert_cmd(&t->cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = dst_state_init_connected(st);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_send_bio(st, &t->cmd, t->bio);
-	} else {
-		err = dst_data_send_header(st->socket, &t->cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	if (err)
-		goto err_out_reset;
-
-	dst_state_unlock(st);
-	return 0;
-
-err_out_reset:
-	dst_state_reset_nolock(st);
-err_out_unlock:
-	dst_state_unlock(st);
-
-	return err;
-}
diff --git a/drivers/staging/dst/thread_pool.c b/drivers/staging/dst/thread_pool.c
deleted file mode 100644
index 29a82b2602f3..000000000000
--- a/drivers/staging/dst/thread_pool.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/dst.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-/*
- * Thread pool abstraction allows to schedule a work to be performed
- * on behalf of kernel thread. One does not operate with threads itself,
- * instead user provides setup and cleanup callbacks for thread pool itself,
- * and action and cleanup callbacks for each submitted work.
- *
- * Each worker has private data initialized at creation time and data,
- * provided by user at scheduling time.
- *
- * When action is being performed, thread can not be used by other users,
- * instead they will sleep until there is free thread to pick their work.
- */
-struct thread_pool_worker {
-	struct list_head	worker_entry;
-
-	struct task_struct	*thread;
-
-	struct thread_pool	*pool;
-
-	int			error;
-	int			has_data;
-	int			need_exit;
-	unsigned int		id;
-
-	wait_queue_head_t	wait;
-
-	void			*private;
-	void			*schedule_data;
-
-	int			(*action)(void *private, void *schedule_data);
-	void			(*cleanup)(void *private);
-};
-
-static void thread_pool_exit_worker(struct thread_pool_worker *w)
-{
-	kthread_stop(w->thread);
-
-	w->cleanup(w->private);
-	kfree(w);
-}
-
-/*
- * Called to mark thread as ready and allow users to schedule new work.
- */
-static void thread_pool_worker_make_ready(struct thread_pool_worker *w)
-{
-	struct thread_pool *p = w->pool;
-
-	mutex_lock(&p->thread_lock);
-
-	if (!w->need_exit) {
-		list_move_tail(&w->worker_entry, &p->ready_list);
-		w->has_data = 0;
-		mutex_unlock(&p->thread_lock);
-
-		wake_up(&p->wait);
-	} else {
-		p->thread_num--;
-		list_del(&w->worker_entry);
-		mutex_unlock(&p->thread_lock);
-
-		thread_pool_exit_worker(w);
-	}
-}
-
-/*
- * Thread action loop: waits until there is new work.
- */
-static int thread_pool_worker_func(void *data)
-{
-	struct thread_pool_worker *w = data;
-
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(w->wait,
-			kthread_should_stop() || w->has_data);
-
-		if (kthread_should_stop())
-			break;
-
-		if (!w->has_data)
-			continue;
-
-		w->action(w->private, w->schedule_data);
-		thread_pool_worker_make_ready(w);
-	}
-
-	return 0;
-}
-
-/*
- * Remove single worker without specifying which one.
- */
-void thread_pool_del_worker(struct thread_pool *p)
-{
-	struct thread_pool_worker *w = NULL;
-
-	while (!w && p->thread_num) {
-		wait_event(p->wait, !list_empty(&p->ready_list) ||
-				!p->thread_num);
-
-		dprintk("%s: locking list_empty: %d, thread_num: %d.\n",
-				__func__, list_empty(&p->ready_list),
-				p->thread_num);
-
-		mutex_lock(&p->thread_lock);
-		if (!list_empty(&p->ready_list)) {
-			w = list_first_entry(&p->ready_list,
-					struct thread_pool_worker,
-					worker_entry);
-
-			dprintk("%s: deleting w: %p, thread_num: %d, "
-					"list: %p [%p.%p].\n", __func__,
-					w, p->thread_num, &p->ready_list,
-					p->ready_list.prev, p->ready_list.next);
-
-			p->thread_num--;
-			list_del(&w->worker_entry);
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	if (w)
-		thread_pool_exit_worker(w);
-	dprintk("%s: deleted w: %p, thread_num: %d.\n",
-			__func__, w, p->thread_num);
-}
-
-/*
- * Remove a worker with given ID.
- */
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id)
-{
-	struct thread_pool_worker *w;
-	int found = 0;
-
-	mutex_lock(&p->thread_lock);
-	list_for_each_entry(w, &p->ready_list, worker_entry) {
-		if (w->id == id) {
-			found = 1;
-			p->thread_num--;
-			list_del(&w->worker_entry);
-			break;
-		}
-	}
-
-	if (!found) {
-		list_for_each_entry(w, &p->active_list, worker_entry) {
-			if (w->id == id) {
-				w->need_exit = 1;
-				break;
-			}
-		}
-	}
-	mutex_unlock(&p->thread_lock);
-
-	if (found)
-		thread_pool_exit_worker(w);
-}
-
-/*
- * Add new worker thread with given parameters.
- * If initialization callback fails, return error.
- */
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w;
-	int err = -ENOMEM;
-
-	w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL);
-	if (!w)
-		goto err_out_exit;
-
-	w->pool = p;
-	init_waitqueue_head(&w->wait);
-	w->cleanup = cleanup;
-	w->id = id;
-
-	w->thread = kthread_run(thread_pool_worker_func, w, "%s", name);
-	if (IS_ERR(w->thread)) {
-		err = PTR_ERR(w->thread);
-		goto err_out_free;
-	}
-
-	w->private = init(private);
-	if (IS_ERR(w->private)) {
-		err = PTR_ERR(w->private);
-		goto err_out_stop_thread;
-	}
-
-	mutex_lock(&p->thread_lock);
-	list_add_tail(&w->worker_entry, &p->ready_list);
-	p->thread_num++;
-	mutex_unlock(&p->thread_lock);
-
-	return 0;
-
-err_out_stop_thread:
-	kthread_stop(w->thread);
-err_out_free:
-	kfree(w);
-err_out_exit:
-	return err;
-}
-
-/*
- * Destroy the whole pool.
- */
-void thread_pool_destroy(struct thread_pool *p)
-{
-	while (p->thread_num) {
-		dprintk("%s: num: %d.\n", __func__, p->thread_num);
-		thread_pool_del_worker(p);
-	}
-
-	kfree(p);
-}
-
-/*
- * Create a pool with given number of threads.
- * They will have sequential IDs started from zero.
- */
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w, *tmp;
-	struct thread_pool *p;
-	int err = -ENOMEM;
-	int i;
-
-	p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL);
-	if (!p)
-		goto err_out_exit;
-
-	init_waitqueue_head(&p->wait);
-	mutex_init(&p->thread_lock);
-	INIT_LIST_HEAD(&p->ready_list);
-	INIT_LIST_HEAD(&p->active_list);
-	p->thread_num = 0;
-
-	for (i = 0; i < num; ++i) {
-		err = thread_pool_add_worker(p, name, i, init,
-				cleanup, private);
-		if (err)
-			goto err_out_free_all;
-	}
-
-	return p;
-
-err_out_free_all:
-	list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-		list_del(&w->worker_entry);
-		thread_pool_exit_worker(w);
-	}
-	kfree(p);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Schedule execution of the action on a given thread,
- * provided ID pointer has to match previously stored
- * private data.
- */
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout, void *id)
-{
-	struct thread_pool_worker *w, *tmp, *worker = NULL;
-	int err = 0;
-
-	while (!worker && !err) {
-		timeout = wait_event_interruptible_timeout(p->wait,
-				!list_empty(&p->ready_list),
-				timeout);
-
-		if (!timeout) {
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		worker = NULL;
-		mutex_lock(&p->thread_lock);
-		list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-			if (id && id != w->private)
-				continue;
-
-			worker = w;
-
-			list_move_tail(&w->worker_entry, &p->active_list);
-
-			err = setup(w->private, data);
-			if (!err) {
-				w->schedule_data = data;
-				w->action = action;
-				w->has_data = 1;
-				wake_up(&w->wait);
-			} else {
-				list_move_tail(&w->worker_entry,
-						&p->ready_list);
-			}
-
-			break;
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	return err;
-}
-
-/*
- * Schedule execution on arbitrary thread from the pool.
- */
-int thread_pool_schedule(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout)
-{
-	return thread_pool_schedule_private(p, setup,
-			action, data, timeout, NULL);
-}
diff --git a/drivers/staging/dst/trans.c b/drivers/staging/dst/trans.c
deleted file mode 100644
index 1c36a6bc31d5..000000000000
--- a/drivers/staging/dst/trans.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/slab.h>
-#include <linux/mempool.h>
-
-/*
- * Transaction memory pool size.
- */
-static int dst_mempool_num = 32;
-module_param(dst_mempool_num, int, 0644);
-
-/*
- * Transaction tree management.
- */
-static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
-{
-	if (gen < new)
-		return 1;
-	if (gen > new)
-		return -1;
-	return 0;
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
-{
-	struct rb_root *root = &node->trans_root;
-	struct rb_node *n = root->rb_node;
-	struct dst_trans *t, *ret = NULL;
-	int cmp;
-
-	while (n) {
-		t = rb_entry(n, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, gen);
-		if (cmp < 0)
-			n = n->rb_left;
-		else if (cmp > 0)
-			n = n->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	dprintk("%s: %s transaction: id: %llu.\n", __func__,
-			(ret) ? "found" : "not found", gen);
-
-	return ret;
-}
-
-static int dst_trans_insert(struct dst_trans *new)
-{
-	struct rb_root *root = &new->n->trans_root;
-	struct rb_node **n = &root->rb_node, *parent = NULL;
-	struct dst_trans *ret = NULL, *t;
-	int cmp;
-
-	while (*n) {
-		parent = *n;
-
-		t = rb_entry(parent, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, new->gen);
-		if (cmp < 0)
-			n = &parent->rb_left;
-		else if (cmp > 0)
-			n = &parent->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	new->send_time = jiffies;
-	if (ret) {
-		printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu, new: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu.\n", __func__,
-			ret->gen, (u64)ret->bio->bi_sector,
-			ret->bio->bi_size, ret->send_time,
-			new->gen, (u64)new->bio->bi_sector,
-			new->bio->bi_size, new->send_time);
-		return -EEXIST;
-	}
-
-	rb_link_node(&new->trans_entry, parent, n);
-	rb_insert_color(&new->trans_entry, root);
-
-	dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
-		__func__, new->gen, (u64)new->bio->bi_sector,
-		new->bio->bi_size, new->send_time);
-
-	return 0;
-}
-
-int dst_trans_remove_nolock(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-
-	if (t->trans_entry.rb_parent_color) {
-		rb_erase(&t->trans_entry, &n->trans_root);
-		t->trans_entry.rb_parent_color = 0;
-	}
-	return 0;
-}
-
-int dst_trans_remove(struct dst_trans *t)
-{
-	int ret;
-	struct dst_node *n = t->n;
-
-	mutex_lock(&n->trans_lock);
-	ret = dst_trans_remove_nolock(t);
-	mutex_unlock(&n->trans_lock);
-
-	return ret;
-}
-
-/*
- * When transaction is completed and there are no more users,
- * we complete appriate block IO request with given error status.
- */
-void dst_trans_put(struct dst_trans *t)
-{
-	if (atomic_dec_and_test(&t->refcnt)) {
-		struct bio *bio = t->bio;
-
-		dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
-				__func__, t, t->gen, bio);
-
-		bio_endio(bio, t->error);
-		bio_put(bio);
-
-		dst_node_put(t->n);
-		mempool_free(t, t->n->trans_pool);
-	}
-}
-
-/*
- * Process given block IO request: allocate transaction, insert it into the tree
- * and send/schedule crypto processing.
- */
-int dst_process_bio(struct dst_node *n, struct bio *bio)
-{
-	struct dst_trans *t;
-	int err = -ENOMEM;
-
-	t = mempool_alloc(n->trans_pool, GFP_NOFS);
-	if (!t)
-		goto err_out_exit;
-
-	t->n = dst_node_get(n);
-	t->bio = bio;
-	t->error = 0;
-	t->retries = 0;
-	atomic_set(&t->refcnt, 1);
-	t->gen = atomic_long_inc_return(&n->gen);
-
-	t->enc = bio_data_dir(bio);
-	dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
-
-	mutex_lock(&n->trans_lock);
-	err = dst_trans_insert(t);
-	mutex_unlock(&n->trans_lock);
-	if (err)
-		goto err_out_free;
-
-	dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
-			__func__, t->gen, (u64)bio->bi_sector,
-			bio->bi_size, t->enc, dst_need_crypto(n));
-
-	if (dst_need_crypto(n) && t->enc)
-		dst_trans_crypto(t);
-	else
-		dst_trans_send(t);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	mempool_free(t, n->trans_pool);
-err_out_exit:
-	bio_endio(bio, err);
-	bio_put(bio);
-	return err;
-}
-
-/*
- * Scan for timeout/stale transactions.
- * Each transaction is being resent multiple times before error completion.
- */
-static void dst_trans_scan(struct work_struct *work)
-{
-	struct dst_node *n = container_of(work, struct dst_node,
-			trans_work.work);
-	struct rb_node *rb_node;
-	struct dst_trans *t;
-	unsigned long timeout = n->trans_scan_timeout;
-	int num = 10 * n->trans_max_retries;
-
-	mutex_lock(&n->trans_lock);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		if (timeout && time_after(t->send_time + timeout, jiffies)
-				&& t->retries == 0)
-			break;
-#if 0
-		dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
-			__func__, t, t->gen, n->name,
-			t->retries, n->trans_max_retries);
-#endif
-		if (--num == 0)
-			break;
-
-		dst_trans_get(t);
-
-		rb_node = rb_next(rb_node);
-
-		if (timeout && (++t->retries < n->trans_max_retries)) {
-			dst_trans_send(t);
-		} else {
-			t->error = -ETIMEDOUT;
-			dst_trans_remove_nolock(t);
-			dst_trans_put(t);
-		}
-
-		dst_trans_put(t);
-	}
-
-	mutex_unlock(&n->trans_lock);
-
-	/*
-	 * If no timeout specified then system is in the middle of exiting
-	 * process, so no need to reschedule scanning process again.
-	 */
-	if (timeout) {
-		if (!num)
-			timeout = HZ;
-		schedule_delayed_work(&n->trans_work, timeout);
-	}
-}
-
-/*
- * Flush all transactions and mark them as timed out.
- * Destroy transaction pools.
- */
-void dst_node_trans_exit(struct dst_node *n)
-{
-	struct dst_trans *t;
-	struct rb_node *rb_node;
-
-	if (!n->trans_cache)
-		return;
-
-	dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
-	cancel_delayed_work_sync(&n->trans_work);
-	flush_scheduled_work();
-	dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		dprintk("%s: t: %p, gen: %llu, n: %s.\n",
-			__func__, t, t->gen, n->name);
-
-		rb_node = rb_next(rb_node);
-
-		t->error = -ETIMEDOUT;
-		dst_trans_remove_nolock(t);
-		dst_trans_put(t);
-	}
-
-	mempool_destroy(n->trans_pool);
-	kmem_cache_destroy(n->trans_cache);
-}
-
-/*
- * Initialize transaction storage for given node.
- * Transaction stores not only control information,
- * but also network command and crypto data (if needed)
- * to reduce number of allocations. Thus transaction size
- * differs from node to node.
- */
-int dst_node_trans_init(struct dst_node *n, unsigned int size)
-{
-	/*
-	 * We need this, since node with given name can be dropped from the
-	 * hash table, but be still alive, so subsequent creation of the node
-	 * with the same name may collide with existing cache name.
-	 */
-
-	snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
-
-	n->trans_cache = kmem_cache_create(n->cache_name,
-			size + n->crypto.crypto_attached_size,
-			0, 0, NULL);
-	if (!n->trans_cache)
-		goto err_out_exit;
-
-	n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
-			n->trans_cache);
-	if (!n->trans_pool)
-		goto err_out_cache_destroy;
-
-	mutex_init(&n->trans_lock);
-	n->trans_root = RB_ROOT;
-
-	INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
-	schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
-
-	dprintk("%s: n: %p, size: %u, crypto: %u.\n",
-		__func__, n, size, n->crypto.crypto_attached_size);
-
-	return 0;
-
-err_out_cache_destroy:
-	kmem_cache_destroy(n->trans_cache);
-err_out_exit:
-	return -ENOMEM;
-}
diff --git a/include/linux/dst.h b/include/linux/dst.h
deleted file mode 100644
index e26fed84b1aa..000000000000
--- a/include/linux/dst.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __DST_H
-#define __DST_H
-
-#include <linux/types.h>
-#include <linux/connector.h>
-
-#define DST_NAMELEN		32
-#define DST_NAME		"dst"
-
-enum {
-	/* Remove node with given id from storage */
-	DST_DEL_NODE	= 0,
-	/* Add remote node with given id to the storage */
-	DST_ADD_REMOTE,
-	/* Add local node with given id to the storage to be exported and used by remote peers */
-	DST_ADD_EXPORT,
-	/* Crypto initialization command (hash/cipher used to protect the connection) */
-	DST_CRYPTO,
-	/* Security attributes for given connection (permissions for example) */
-	DST_SECURITY,
-	/* Register given node in the block layer subsystem */
-	DST_START,
-	DST_CMD_MAX
-};
-
-struct dst_ctl
-{
-	/* Storage name */
-	char			name[DST_NAMELEN];
-	/* Command flags */
-	__u32			flags;
-	/* Command itself (see above) */
-	__u32			cmd;
-	/* Maximum number of pages per single request in this device */
-	__u32			max_pages;
-	/* Stale/error transaction scanning timeout in milliseconds */
-	__u32			trans_scan_timeout;
-	/* Maximum number of retry sends before completing transaction as broken */
-	__u32			trans_max_retries;
-	/* Storage size */
-	__u64			size;
-};
-
-/* Reply command carries completion status */
-struct dst_ctl_ack
-{
-	struct cn_msg		msg;
-	int			error;
-	int			unused[3];
-};
-
-/*
- * Unfortunaltely socket address structure is not exported to userspace
- * and is redefined there.
- */
-#define SADDR_MAX_DATA	128
-
-struct saddr {
-	/* address family, AF_xxx	*/
-	unsigned short		sa_family;
-	/* 14 bytes of protocol address	*/
-	char			sa_data[SADDR_MAX_DATA];
-	/* Number of bytes used in sa_data */
-	unsigned short		sa_data_len;
-};
-
-/* Address structure */
-struct dst_network_ctl
-{
-	/* Socket type: datagram, stream...*/
-	unsigned int		type;
-	/* Let me guess, is it a Jupiter diameter? */
-	unsigned int		proto;
-	/* Peer's address */
-	struct saddr		addr;
-};
-
-struct dst_crypto_ctl
-{
-	/* Cipher and hash names */
-	char			cipher_algo[DST_NAMELEN];
-	char			hash_algo[DST_NAMELEN];
-
-	/* Key sizes. Can be zero for digest for example */
-	unsigned int		cipher_keysize, hash_keysize;
-	/* Alignment. Calculated by the DST itself. */
-	unsigned int		crypto_attached_size;
-	/* Number of threads to perform crypto operations */
-	int			thread_num;
-};
-
-/* Export security attributes have this bits checked in when client connects */
-#define DST_PERM_READ		(1<<0)
-#define DST_PERM_WRITE		(1<<1)
-
-/*
- * Right now it is simple model, where each remote address
- * is assigned to set of permissions it is allowed to perform.
- * In real world block device does not know anything but
- * reading and writing, so it should be more than enough.
- */
-struct dst_secure_user
-{
-	unsigned int		permissions;
-	struct saddr		addr;
-};
-
-/*
- * Export control command: device to export and network address to accept
- * clients to work with given device
- */
-struct dst_export_ctl
-{
-	char			device[DST_NAMELEN];
-	struct dst_network_ctl	ctl;
-};
-
-enum {
-	DST_CFG	= 1, 		/* Request remote configuration */
-	DST_IO,			/* IO command */
-	DST_IO_RESPONSE,	/* IO response */
-	DST_PING,		/* Keepalive message */
-	DST_NCMD_MAX,
-};
-
-struct dst_cmd
-{
-	/* Network command itself, see above */
-	__u32			cmd;
-	/*
-	 * Size of the attached data
-	 * (in most cases, for READ command it means how many bytes were requested)
-	 */
-	__u32			size;
-	/* Crypto size: number of attached bytes with digest/hmac */
-	__u32			csize;
-	/* Here we can carry secret data */
-	__u32			reserved;
-	/* Read/write bits, see how they are encoded in bio structure */
-	__u64			rw;
-	/* BIO flags */
-	__u64			flags;
-	/* Unique command id (like transaction ID) */
-	__u64			id;
-	/* Sector to start IO from */
-	__u64			sector;
-	/* Hash data is placed after this header */
-	__u8			hash[0];
-};
-
-/*
- * Convert command to/from network byte order.
- * We do not use hton*() functions, since there is
- * no 64-bit implementation.
- */
-static inline void dst_convert_cmd(struct dst_cmd *c)
-{
-	c->cmd = __cpu_to_be32(c->cmd);
-	c->csize = __cpu_to_be32(c->csize);
-	c->size = __cpu_to_be32(c->size);
-	c->sector = __cpu_to_be64(c->sector);
-	c->id = __cpu_to_be64(c->id);
-	c->flags = __cpu_to_be64(c->flags);
-	c->rw = __cpu_to_be64(c->rw);
-}
-
-/* Transaction id */
-typedef __u64 dst_gen_t;
-
-#ifdef __KERNEL__
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/device.h>
-#include <linux/mempool.h>
-#include <linux/net.h>
-#include <linux/poll.h>
-#include <linux/rbtree.h>
-
-#ifdef CONFIG_DST_DEBUG
-#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
-#else
-static inline void __attribute__ ((format (printf, 1, 2)))
-	dprintk(const char *fmt, ...) {}
-#endif
-
-struct dst_node;
-
-struct dst_trans
-{
-	/* DST node we are working with */
-	struct dst_node		*n;
-
-	/* Entry inside transaction tree */
-	struct rb_node		trans_entry;
-
-	/* Merlin kills this transaction when this memory cell equals zero */
-	atomic_t		refcnt;
-
-	/* How this transaction should be processed by crypto engine */
-	short			enc;
-	/* How many times this transaction was resent */
-	short			retries;
-	/* Completion status */
-	int			error;
-
-	/* When did we send it to the remote peer */
-	long			send_time;
-
-	/* My name is...
-	 * Well, computers does not speak, they have unique id instead */
-	dst_gen_t		gen;
-
-	/* Block IO we are working with */
-	struct bio		*bio;
-
-	/* Network command for above block IO request */
-	struct dst_cmd		cmd;
-};
-
-struct dst_crypto_engine
-{
-	/* What should we do with all block requests */
-	struct crypto_hash	*hash;
-	struct crypto_ablkcipher	*cipher;
-
-	/* Pool of pages used to encrypt data into before sending */
-	int			page_num;
-	struct page		**pages;
-
-	/* What to do with current request */
-	int			enc;
-	/* Who we are and where do we go */
-	struct scatterlist	*src, *dst;
-
-	/* Maximum timeout waiting for encryption to be completed */
-	long			timeout;
-	/* IV is a 64-bit sequential counter */
-	u64			iv;
-
-	/* Secret data */
-	void			*private;
-
-	/* Cached temporary data lives here */
-	int			size;
-	void			*data;
-};
-
-struct dst_state
-{
-	/* The main state protection */
-	struct mutex		state_lock;
-
-	/* Polling machinery for sockets */
-	wait_queue_t 		wait;
-	wait_queue_head_t 	*whead;
-	/* Most of events are being waited here */
-	wait_queue_head_t 	thread_wait;
-
-	/* Who owns this? */
-	struct dst_node		*node;
-
-	/* Network address for this state */
-	struct dst_network_ctl	ctl;
-
-	/* Permissions to work with: read-only or rw connection */
-	u32			permissions;
-
-	/* Called when we need to clean private data */
-	void			(* cleanup)(struct dst_state *st);
-
-	/* Used by the server: BIO completion queues BIOs here */
-	struct list_head	request_list;
-	spinlock_t		request_lock;
-
-	/* Guess what? No, it is not number of planets */
-	atomic_t		refcnt;
-
-	/* This flags is set when connection should be dropped */
-	int			need_exit;
-
-	/*
-	 * Socket to work with. Second pointer is used for
-	 * lockless check if socket was changed before performing
-	 * next action (like working with cached polling result)
-	 */
-	struct socket		*socket, *read_socket;
-
-	/* Cached preallocated data */
-	void			*data;
-	unsigned int		size;
-
-	/* Currently processed command */
-	struct dst_cmd		cmd;
-};
-
-struct dst_info
-{
-	/* Device size */
-	u64			size;
-
-	/* Local device name for export devices */
-	char			local[DST_NAMELEN];
-
-	/* Network setup */
-	struct dst_network_ctl	net;
-
-	/* Sysfs bits use this */
-	struct device		device;
-};
-
-struct dst_node
-{
-	struct list_head	node_entry;
-
-	/* Hi, my name is stored here */
-	char			name[DST_NAMELEN];
-	/* My cache name is stored here */
-	char			cache_name[DST_NAMELEN];
-
-	/* Block device attached to given node.
-	 * Only valid for exporting nodes */
-	struct block_device 	*bdev;
-	/* Network state machine for given peer */
-	struct dst_state	*state;
-
-	/* Block IO machinery */
-	struct request_queue	*queue;
-	struct gendisk		*disk;
-
-	/* Number of threads in processing pool */
-	int			thread_num;
-	/* Maximum number of pages in single IO */
-	int			max_pages;
-
-	/* I'm that big in bytes */
-	loff_t			size;
-
-	/* Exported to userspace node information */
-	struct dst_info		*info;
-
-	/*
-	 * Security attribute list.
-	 * Used only by exporting node currently.
-	 */
-	struct list_head	security_list;
-	struct mutex		security_lock;
-
-	/*
-	 * When this unerflows below zero, university collapses.
-	 * But this will not happen, since node will be freed,
-	 * when reference counter reaches zero.
-	 */
-	atomic_t		refcnt;
-
-	/* How precisely should I be started? */
-	int 			(*start)(struct dst_node *);
-
-	/* Crypto capabilities */
-	struct dst_crypto_ctl	crypto;
-	u8			*hash_key;
-	u8			*cipher_key;
-
-	/* Pool of processing thread */
-	struct thread_pool	*pool;
-
-	/* Transaction IDs live here */
-	atomic_long_t		gen;
-
-	/*
-	 * How frequently and how many times transaction
-	 * tree should be scanned to drop stale objects.
-	 */
-	long			trans_scan_timeout;
-	int			trans_max_retries;
-
-	/* Small gnomes live here */
-	struct rb_root		trans_root;
-	struct mutex		trans_lock;
-
-	/*
-	 * Transaction cache/memory pool.
-	 * It is big enough to contain not only transaction
-	 * itself, but additional crypto data (digest/hmac).
-	 */
-	struct kmem_cache	*trans_cache;
-	mempool_t		*trans_pool;
-
-	/* This entity scans transaction tree */
-	struct delayed_work 	trans_work;
-
-	wait_queue_head_t	wait;
-};
-
-/* Kernel representation of the security attribute */
-struct dst_secure
-{
-	struct list_head	sec_entry;
-	struct dst_secure_user	sec;
-};
-
-int dst_process_bio(struct dst_node *n, struct bio *bio);
-
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
-
-static inline struct dst_state *dst_state_get(struct dst_state *st)
-{
-	BUG_ON(atomic_read(&st->refcnt) == 0);
-	atomic_inc(&st->refcnt);
-	return st;
-}
-
-void dst_state_put(struct dst_state *st);
-
-struct dst_state *dst_state_alloc(struct dst_node *n);
-int dst_state_socket_create(struct dst_state *st);
-void dst_state_socket_release(struct dst_state *st);
-
-void dst_state_exit_connected(struct dst_state *st);
-
-int dst_state_schedule_receiver(struct dst_state *st);
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
-
-static inline void dst_state_lock(struct dst_state *st)
-{
-	mutex_lock(&st->state_lock);
-}
-
-static inline void dst_state_unlock(struct dst_state *st)
-{
-	mutex_unlock(&st->state_lock);
-}
-
-void dst_poll_exit(struct dst_state *st);
-int dst_poll_init(struct dst_state *st);
-
-static inline unsigned int dst_state_poll(struct dst_state *st)
-{
-	unsigned int revents = POLLHUP | POLLERR;
-
-	dst_state_lock(st);
-	if (st->socket)
-		revents = st->socket->ops->poll(NULL, st->socket, NULL);
-	dst_state_unlock(st);
-
-	return revents;
-}
-
-static inline int dst_thread_setup(void *private, void *data)
-{
-	return 0;
-}
-
-void dst_node_put(struct dst_node *n);
-
-static inline struct dst_node *dst_node_get(struct dst_node *n)
-{
-	atomic_inc(&n->refcnt);
-	return n;
-}
-
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
-int dst_recv_cdata(struct dst_state *st, void *cdata);
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more);
-
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
-
-int dst_process_io(struct dst_state *st);
-int dst_export_crypto(struct dst_node *n, struct bio *bio);
-int dst_export_send_bio(struct bio *bio);
-int dst_start_export(struct dst_node *n);
-
-int __init dst_export_init(void);
-void dst_export_exit(void);
-
-/* Private structure for export block IO requests */
-struct dst_export_priv
-{
-	struct list_head		request_entry;
-	struct dst_state		*state;
-	struct bio			*bio;
-	struct dst_cmd			cmd;
-};
-
-static inline void dst_trans_get(struct dst_trans *t)
-{
-	atomic_inc(&t->refcnt);
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
-int dst_trans_remove(struct dst_trans *t);
-int dst_trans_remove_nolock(struct dst_trans *t);
-void dst_trans_put(struct dst_trans *t);
-
-/*
- * Convert bio into network command.
- */
-static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
-		u32 command, u64 id)
-{
-	cmd->cmd = command;
-	cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
-	cmd->rw = bio->bi_rw;
-	cmd->size = bio->bi_size;
-	cmd->csize = 0;
-	cmd->id = id;
-	cmd->sector = bio->bi_sector;
-};
-
-int dst_trans_send(struct dst_trans *t);
-int dst_trans_crypto(struct dst_trans *t);
-
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
-void dst_node_crypto_exit(struct dst_node *n);
-
-static inline int dst_need_crypto(struct dst_node *n)
-{
-	struct dst_crypto_ctl *c = &n->crypto;
-	/*
-	 * Logical OR is appropriate here, but boolean one produces
-	 * more optimal code, so it is used instead.
-	 */
-	return (c->hash_algo[0] | c->cipher_algo[0]);
-}
-
-int dst_node_trans_init(struct dst_node *n, unsigned int size);
-void dst_node_trans_exit(struct dst_node *n);
-
-/*
- * Pool of threads.
- * Ready list contains threads currently free to be used,
- * active one contains threads with some work scheduled for them.
- * Caller can wait in given queue when thread is ready.
- */
-struct thread_pool
-{
-	int			thread_num;
-	struct mutex		thread_lock;
-	struct list_head	ready_list, active_list;
-
-	wait_queue_head_t	wait;
-};
-
-void thread_pool_del_worker(struct thread_pool *p);
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-void thread_pool_destroy(struct thread_pool *p);
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-int thread_pool_schedule(struct thread_pool *p,
-		int (* setup)(void *stored_private, void *setup_data),
-		int (* action)(void *stored_private, void *setup_data),
-		void *setup_data, long timeout);
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (* setup)(void *private, void *data),
-		int (* action)(void *private, void *data),
-		void *data, long timeout, void *id);
-
-#endif /* __KERNEL__ */
-#endif /* __DST_H */
-- 
cgit v1.2.3


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 include/linux/sysctl.h     | 1 +
 net/ipv4/devinet.c         | 1 +
 net/ipv4/fib_frontend.c    | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..b2304929434e 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
+#define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..bd27fbc9db62 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_SRC_VMARK=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 671adc93b6472eaa0142a88d096c945f7b07893a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:12:04 +0100
Subject: wireless: remove remaining qual code

This removes the remaining users of the rx status
'qual' field and the field itself.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath5k/base.c       |  11 ---
 drivers/net/wireless/iwlwifi/iwl-3945.c     |  10 +-
 drivers/net/wireless/iwlwifi/iwl-3945.h     |   1 -
 drivers/net/wireless/iwlwifi/iwl-rx.c       |  48 +---------
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  41 --------
 drivers/net/wireless/libertas_tf/main.c     |   1 -
 drivers/net/wireless/rtl818x/rtl8180_dev.c  |   1 -
 drivers/net/wireless/zd1211rw/zd_chip.c     | 140 ----------------------------
 drivers/net/wireless/zd1211rw/zd_chip.h     |   3 -
 drivers/net/wireless/zd1211rw/zd_mac.c      |   3 -
 include/net/mac80211.h                      |   2 -
 11 files changed, 4 insertions(+), 257 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 2a446a938249..e63b7c40d0ee 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -1903,17 +1903,6 @@ accept:
 		rxs->noise = sc->ah->ah_noise_floor;
 		rxs->signal = rxs->noise + rs.rs_rssi;
 
-		/* An rssi of 35 indicates you should be able use
-		 * 54 Mbps reliably. A more elaborate scheme can be used
-		 * here but it requires a map of SNR/throughput for each
-		 * possible mode used */
-		rxs->qual = rs.rs_rssi * 100 / 35;
-
-		/* rssi can be more than 35 though, anything above that
-		 * should be considered at 100% */
-		if (rxs->qual > 100)
-			rxs->qual = 100;
-
 		rxs->antenna = rs.rs_antenna;
 		rxs->rate_idx = ath5k_hw_to_driver_rix(sc, rs.rs_rate);
 		rxs->flag |= ath5k_rx_decrypted(sc, ds, skb, &rs);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index e413bd35bc41..234891d8cc10 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -681,19 +681,13 @@ static void iwl3945_rx_reply_rx(struct iwl_priv *priv,
 		snr = rx_stats_sig_avg / rx_stats_noise_diff;
 		rx_status.noise = rx_status.signal -
 					iwl3945_calc_db_from_ratio(snr);
-		rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal,
-							 rx_status.noise);
-
-	/* If noise info not available, calculate signal quality indicator (%)
-	 *   using just the dBm signal level. */
 	} else {
 		rx_status.noise = priv->last_rx_noise;
-		rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal, 0);
 	}
 
 
-	IWL_DEBUG_STATS(priv, "Rssi %d noise %d qual %d sig_avg %d noise_diff %d\n",
-			rx_status.signal, rx_status.noise, rx_status.qual,
+	IWL_DEBUG_STATS(priv, "Rssi %d noise %d sig_avg %d noise_diff %d\n",
+			rx_status.signal, rx_status.noise,
 			rx_stats_sig_avg, rx_stats_noise_diff);
 
 	header = (struct ieee80211_hdr *)IWL_RX_DATA(pkt);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h
index ecc23ec1f6a4..531fa125f5a6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@ -222,7 +222,6 @@ struct iwl3945_ibss_seq {
  *
  *****************************************************************************/
 extern int iwl3945_calc_db_from_ratio(int sig_ratio);
-extern int iwl3945_calc_sig_qual(int rssi_dbm, int noise_dbm);
 extern void iwl3945_rx_replenish(void *data);
 extern void iwl3945_rx_queue_reset(struct iwl_priv *priv, struct iwl_rx_queue *rxq);
 extern unsigned int iwl3945_fill_beacon_frame(struct iwl_priv *priv,
diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c
index f5c87e72660a..6f36b6e79f5e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -650,47 +650,6 @@ void iwl_reply_statistics(struct iwl_priv *priv,
 }
 EXPORT_SYMBOL(iwl_reply_statistics);
 
-#define PERFECT_RSSI (-20) /* dBm */
-#define WORST_RSSI (-95)   /* dBm */
-#define RSSI_RANGE (PERFECT_RSSI - WORST_RSSI)
-
-/* Calculate an indication of rx signal quality (a percentage, not dBm!).
- * See http://www.ces.clemson.edu/linux/signal_quality.shtml for info
- *   about formulas used below. */
-static int iwl_calc_sig_qual(int rssi_dbm, int noise_dbm)
-{
-	int sig_qual;
-	int degradation = PERFECT_RSSI - rssi_dbm;
-
-	/* If we get a noise measurement, use signal-to-noise ratio (SNR)
-	 * as indicator; formula is (signal dbm - noise dbm).
-	 * SNR at or above 40 is a great signal (100%).
-	 * Below that, scale to fit SNR of 0 - 40 dB within 0 - 100% indicator.
-	 * Weakest usable signal is usually 10 - 15 dB SNR. */
-	if (noise_dbm) {
-		if (rssi_dbm - noise_dbm >= 40)
-			return 100;
-		else if (rssi_dbm < noise_dbm)
-			return 0;
-		sig_qual = ((rssi_dbm - noise_dbm) * 5) / 2;
-
-	/* Else use just the signal level.
-	 * This formula is a least squares fit of data points collected and
-	 *   compared with a reference system that had a percentage (%) display
-	 *   for signal quality. */
-	} else
-		sig_qual = (100 * (RSSI_RANGE * RSSI_RANGE) - degradation *
-			    (15 * RSSI_RANGE + 62 * degradation)) /
-			   (RSSI_RANGE * RSSI_RANGE);
-
-	if (sig_qual > 100)
-		sig_qual = 100;
-	else if (sig_qual < 1)
-		sig_qual = 0;
-
-	return sig_qual;
-}
-
 /* Calc max signal level (dBm) among 3 possible receivers */
 static inline int iwl_calc_rssi(struct iwl_priv *priv,
 				struct iwl_rx_phy_res *rx_resp)
@@ -1101,11 +1060,8 @@ void iwl_rx_reply_rx(struct iwl_priv *priv,
 	if (iwl_is_associated(priv) &&
 	    !test_bit(STATUS_SCANNING, &priv->status)) {
 		rx_status.noise = priv->last_rx_noise;
-		rx_status.qual = iwl_calc_sig_qual(rx_status.signal,
-							 rx_status.noise);
 	} else {
 		rx_status.noise = IWL_NOISE_MEAS_NOT_AVAILABLE;
-		rx_status.qual = iwl_calc_sig_qual(rx_status.signal, 0);
 	}
 
 	/* Reset beacon noise level if not associated. */
@@ -1118,8 +1074,8 @@ void iwl_rx_reply_rx(struct iwl_priv *priv,
 		iwl_dbg_report_frame(priv, phy_res, len, header, 1);
 #endif
 	iwl_dbg_log_rx_data_frame(priv, len, header);
-	IWL_DEBUG_STATS_LIMIT(priv, "Rssi %d, noise %d, qual %d, TSF %llu\n",
-		rx_status.signal, rx_status.noise, rx_status.qual,
+	IWL_DEBUG_STATS_LIMIT(priv, "Rssi %d, noise %d, TSF %llu\n",
+		rx_status.signal, rx_status.noise,
 		(unsigned long long)rx_status.mactime);
 
 	/*
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index e5d8fa38432e..f8e4e4b18d02 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -1299,47 +1299,6 @@ int iwl3945_calc_db_from_ratio(int sig_ratio)
 	return (int)ratio2dB[sig_ratio];
 }
 
-#define PERFECT_RSSI (-20) /* dBm */
-#define WORST_RSSI (-95)   /* dBm */
-#define RSSI_RANGE (PERFECT_RSSI - WORST_RSSI)
-
-/* Calculate an indication of rx signal quality (a percentage, not dBm!).
- * See http://www.ces.clemson.edu/linux/signal_quality.shtml for info
- *   about formulas used below. */
-int iwl3945_calc_sig_qual(int rssi_dbm, int noise_dbm)
-{
-	int sig_qual;
-	int degradation = PERFECT_RSSI - rssi_dbm;
-
-	/* If we get a noise measurement, use signal-to-noise ratio (SNR)
-	 * as indicator; formula is (signal dbm - noise dbm).
-	 * SNR at or above 40 is a great signal (100%).
-	 * Below that, scale to fit SNR of 0 - 40 dB within 0 - 100% indicator.
-	 * Weakest usable signal is usually 10 - 15 dB SNR. */
-	if (noise_dbm) {
-		if (rssi_dbm - noise_dbm >= 40)
-			return 100;
-		else if (rssi_dbm < noise_dbm)
-			return 0;
-		sig_qual = ((rssi_dbm - noise_dbm) * 5) / 2;
-
-	/* Else use just the signal level.
-	 * This formula is a least squares fit of data points collected and
-	 *   compared with a reference system that had a percentage (%) display
-	 *   for signal quality. */
-	} else
-		sig_qual = (100 * (RSSI_RANGE * RSSI_RANGE) - degradation *
-			    (15 * RSSI_RANGE + 62 * degradation)) /
-			   (RSSI_RANGE * RSSI_RANGE);
-
-	if (sig_qual > 100)
-		sig_qual = 100;
-	else if (sig_qual < 1)
-		sig_qual = 0;
-
-	return sig_qual;
-}
-
 /**
  * iwl3945_rx_handle - Main entry function for receiving responses from uCode
  *
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 019431d2f8a9..26a1abd5bb03 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -495,7 +495,6 @@ int lbtf_rx(struct lbtf_private *priv, struct sk_buff *skb)
 	stats.band = IEEE80211_BAND_2GHZ;
 	stats.signal = prxpd->snr;
 	stats.noise = prxpd->nf;
-	stats.qual = prxpd->snr - prxpd->nf;
 	/* Marvell rate index has a hole at value 4 */
 	if (prxpd->rx_rate > 4)
 		--prxpd->rx_rate;
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index a1a3dd15c664..8a40a1439984 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -132,7 +132,6 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
 
 			rx_status.antenna = (flags2 >> 15) & 1;
 			/* TODO: improve signal/rssi reporting */
-			rx_status.qual = flags2 & 0xFF;
 			rx_status.signal = (flags2 >> 8) & 0x7F;
 			/* XXX: is this correct? */
 			rx_status.rate_idx = (flags >> 20) & 0xF;
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index dfa1b9bc22c8..7ca95c414fa8 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -1325,151 +1325,11 @@ int zd_chip_set_basic_rates(struct zd_chip *chip, u16 cr_rates)
 	return r;
 }
 
-static int ofdm_qual_db(u8 status_quality, u8 zd_rate, unsigned int size)
-{
-	static const u16 constants[] = {
-		715, 655, 585, 540, 470, 410, 360, 315,
-		270, 235, 205, 175, 150, 125, 105,  85,
-		 65,  50,  40,  25,  15
-	};
-
-	int i;
-	u32 x;
-
-	/* It seems that their quality parameter is somehow per signal
-	 * and is now transferred per bit.
-	 */
-	switch (zd_rate) {
-	case ZD_OFDM_RATE_6M:
-	case ZD_OFDM_RATE_12M:
-	case ZD_OFDM_RATE_24M:
-		size *= 2;
-		break;
-	case ZD_OFDM_RATE_9M:
-	case ZD_OFDM_RATE_18M:
-	case ZD_OFDM_RATE_36M:
-	case ZD_OFDM_RATE_54M:
-		size *= 4;
-		size /= 3;
-		break;
-	case ZD_OFDM_RATE_48M:
-		size *= 3;
-		size /= 2;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	x = (10000 * status_quality)/size;
-	for (i = 0; i < ARRAY_SIZE(constants); i++) {
-		if (x > constants[i])
-			break;
-	}
-
-	switch (zd_rate) {
-	case ZD_OFDM_RATE_6M:
-	case ZD_OFDM_RATE_9M:
-		i += 3;
-		break;
-	case ZD_OFDM_RATE_12M:
-	case ZD_OFDM_RATE_18M:
-		i += 5;
-		break;
-	case ZD_OFDM_RATE_24M:
-	case ZD_OFDM_RATE_36M:
-		i += 9;
-		break;
-	case ZD_OFDM_RATE_48M:
-	case ZD_OFDM_RATE_54M:
-		i += 15;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return i;
-}
-
-static int ofdm_qual_percent(u8 status_quality, u8 zd_rate, unsigned int size)
-{
-	int r;
-
-	r = ofdm_qual_db(status_quality, zd_rate, size);
-	ZD_ASSERT(r >= 0);
-	if (r < 0)
-		r = 0;
-
-	r = (r * 100)/29;
-	return r <= 100 ? r : 100;
-}
-
-static unsigned int log10times100(unsigned int x)
-{
-	static const u8 log10[] = {
-		  0,
-		  0,   30,   47,   60,   69,   77,   84,   90,   95,  100,
-		104,  107,  111,  114,  117,  120,  123,  125,  127,  130,
-		132,  134,  136,  138,  139,  141,  143,  144,  146,  147,
-		149,  150,  151,  153,  154,  155,  156,  157,  159,  160,
-		161,  162,  163,  164,  165,  166,  167,  168,  169,  169,
-		170,  171,  172,  173,  174,  174,  175,  176,  177,  177,
-		178,  179,  179,  180,  181,  181,  182,  183,  183,  184,
-		185,  185,  186,  186,  187,  188,  188,  189,  189,  190,
-		190,  191,  191,  192,  192,  193,  193,  194,  194,  195,
-		195,  196,  196,  197,  197,  198,  198,  199,  199,  200,
-		200,  200,  201,  201,  202,  202,  202,  203,  203,  204,
-		204,  204,  205,  205,  206,  206,  206,  207,  207,  207,
-		208,  208,  208,  209,  209,  210,  210,  210,  211,  211,
-		211,  212,  212,  212,  213,  213,  213,  213,  214,  214,
-		214,  215,  215,  215,  216,  216,  216,  217,  217,  217,
-		217,  218,  218,  218,  219,  219,  219,  219,  220,  220,
-		220,  220,  221,  221,  221,  222,  222,  222,  222,  223,
-		223,  223,  223,  224,  224,  224,  224,
-	};
-
-	return x < ARRAY_SIZE(log10) ? log10[x] : 225;
-}
-
-enum {
-	MAX_CCK_EVM_DB = 45,
-};
-
-static int cck_evm_db(u8 status_quality)
-{
-	return (20 * log10times100(status_quality)) / 100;
-}
-
-static int cck_snr_db(u8 status_quality)
-{
-	int r = MAX_CCK_EVM_DB - cck_evm_db(status_quality);
-	ZD_ASSERT(r >= 0);
-	return r;
-}
-
-static int cck_qual_percent(u8 status_quality)
-{
-	int r;
-
-	r = cck_snr_db(status_quality);
-	r = (100*r)/17;
-	return r <= 100 ? r : 100;
-}
-
 static inline u8 zd_rate_from_ofdm_plcp_header(const void *rx_frame)
 {
 	return ZD_OFDM | zd_ofdm_plcp_header_rate(rx_frame);
 }
 
-u8 zd_rx_qual_percent(const void *rx_frame, unsigned int size,
-	              const struct rx_status *status)
-{
-	return (status->frame_status&ZD_RX_OFDM) ?
-		ofdm_qual_percent(status->signal_quality_ofdm,
-				  zd_rate_from_ofdm_plcp_header(rx_frame),
-			          size) :
-		cck_qual_percent(status->signal_quality_cck);
-}
-
 /**
  * zd_rx_rate - report zd-rate
  * @rx_frame - received frame
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.h b/drivers/net/wireless/zd1211rw/zd_chip.h
index 9fd8f3508d66..f8bbf7d302ae 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.h
+++ b/drivers/net/wireless/zd1211rw/zd_chip.h
@@ -929,9 +929,6 @@ static inline int zd_get_beacon_interval(struct zd_chip *chip, u32 *interval)
 
 struct rx_status;
 
-u8 zd_rx_qual_percent(const void *rx_frame, unsigned int size,
-	               const struct rx_status *status);
-
 u8 zd_rx_rate(const void *rx_frame, const struct rx_status *status);
 
 struct zd_mc_hash {
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 8a243732c519..cc648efb8ede 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -828,9 +828,6 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 	stats.freq = zd_channels[_zd_chip_get_channel(&mac->chip) - 1].center_freq;
 	stats.band = IEEE80211_BAND_2GHZ;
 	stats.signal = status->signal_strength;
-	stats.qual = zd_rx_qual_percent(buffer,
-		                          length - sizeof(struct rx_status),
-		                          status);
 
 	rate = zd_rx_rate(buffer, status);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 538d6b761887..0bf369752274 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -547,7 +547,6 @@ enum mac80211_rx_flags {
  *	unspecified depending on the hardware capabilities flags
  *	@IEEE80211_HW_SIGNAL_*
  * @noise: noise when receiving this frame, in dBm.
- * @qual: overall signal quality indication, in percent (0-100).
  * @antenna: antenna used
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *	HT rates are use (RX_FLAG_HT)
@@ -559,7 +558,6 @@ struct ieee80211_rx_status {
 	int freq;
 	int signal;
 	int noise;
-	int __deprecated qual;
 	int antenna;
 	int rate_idx;
 	int flag;
-- 
cgit v1.2.3


From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 29 Dec 2009 08:35:35 +0100
Subject: block: Fix incorrect alignment offset reporting and update
 documentation

queue_sector_alignment_offset returned the wrong value which caused
partitions to report an incorrect alignment_offset.  Since offset
alignment calculation is needed several places it has been split into a
separate helper function.  The topology stacking function has been
updated accordingly.

Furthermore, comments have been added to clarify how the stacking
function works.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h | 11 +++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e14fcbcedbfa..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
 
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
- * @t:	the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t:	the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	sector_t alignment;
-	unsigned int top, bottom, granularity;
+	unsigned int top, bottom;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
-	granularity = max(b->physical_block_size, b->io_min);
-	alignment = b->alignment_offset - (offset & (granularity - 1));
+	alignment = queue_limit_alignment_offset(b, offset);
 
+	/* Bottom device has different alignment.  Check that it is
+	 * compatible with the current top alignment.
+	 */
 	if (t->alignment_offset != alignment) {
 
 		top = max(t->physical_block_size, t->io_min)
 			+ t->alignment_offset;
-		bottom = granularity + alignment;
+		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
+		/* Verify that top and bottom intervals line up */
 		if (max(top, bottom) & (min(top, bottom) - 1))
 			t->misaligned = 1;
 	}
@@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->no_cluster |= b->no_cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
+	/* Physical block size a multiple of the logical block size? */
 	if (t->physical_block_size & (t->logical_block_size - 1)) {
 		t->physical_block_size = t->logical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Minimum I/O a multiple of the physical block size? */
 	if (t->io_min & (t->physical_block_size - 1)) {
 		t->io_min = t->physical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Optimal I/O a multiple of the physical block size? */
 	if (t->io_opt & (t->physical_block_size - 1)) {
 		t->io_opt = 0;
 		t->misaligned = 1;
 	}
 
+	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
 
+	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1))
 		t->misaligned = 1;
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
+		unsigned int granularity = b->discard_granularity;
+		offset &= granularity - 1;
 
-		alignment = b->discard_alignment -
-			(offset & (b->discard_granularity - 1));
+		alignment = (granularity + b->discard_alignment - offset)
+			& (granularity - 1);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				t->discard_misaligned = 1;
 		}
 
+		t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
 		t->discard_alignment = lcm(t->discard_alignment, alignment) &
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d0..59b832be3044 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+	offset &= granularity - 1;
+	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
 static inline int queue_sector_alignment_offset(struct request_queue *q,
 						sector_t sector)
 {
-	return ((sector << 9) - q->limits.alignment_offset)
-		& (q->limits.io_min - 1);
+	return queue_limit_alignment_offset(&q->limits, sector << 9);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <cladisch@fastmail.net>
Date: Thu, 24 Dec 2009 12:05:58 +0100
Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP
 listeners

Control of more than one AV/C device at once --- e.g. camcorders, tape
decks, audio devices, TV tuners --- failed or worked only unreliably,
depending on driver implementation.  This affected kernelspace and
userspace drivers alike and was caused by firewire-core's inability to
accept multiple registrations of FCP listeners.

The fix allows multiple address handlers to be registered for the FCP
command and response registers.  When a request for these registers is
received, all handlers are invoked, and the Firewire response is
generated by the core and not by any handler.

The cdev API does not change, i.e., userspace is still expected to send
a response for FCP requests; this response is silently ignored.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, rebased, whitespace)
---
 drivers/firewire/core-cdev.c            |  26 ++++---
 drivers/firewire/core-transaction.c     | 118 ++++++++++++++++++++++++++------
 drivers/media/dvb/firewire/firedtv-fw.c |  12 +---
 include/linux/firewire-cdev.h           |   3 +
 include/linux/firewire.h                |   4 +-
 5 files changed, 119 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 231e6ee5ba43..2cb22d160f6e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -601,8 +601,9 @@ static void release_request(struct client *client,
 	struct inbound_transaction_resource *r = container_of(resource,
 			struct inbound_transaction_resource, resource);
 
-	fw_send_response(client->device->card, r->request,
-			 RCODE_CONFLICT_ERROR);
+	if (r->request)
+		fw_send_response(client->device->card, r->request,
+				 RCODE_CONFLICT_ERROR);
 	kfree(r);
 }
 
@@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
  failed:
 	kfree(r);
 	kfree(e);
-	fw_send_response(card, request, RCODE_CONFLICT_ERROR);
+	if (request)
+		fw_send_response(card, request, RCODE_CONFLICT_ERROR);
 }
 
 static void release_address_handler(struct client *client,
@@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
 
 	r = container_of(resource, struct inbound_transaction_resource,
 			 resource);
-	if (request->length < r->length)
-		r->length = request->length;
-
-	if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
-		ret = -EFAULT;
-		goto out;
+	if (r->request) {
+		if (request->length < r->length)
+			r->length = request->length;
+		if (copy_from_user(r->data, u64_to_uptr(request->data),
+				   r->length)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		fw_send_response(client->device->card, r->request,
+				 request->rcode);
 	}
-
-	fw_send_response(client->device->card, r->request, request->rcode);
  out:
 	kfree(r);
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 842739df23e2..495849eb13cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler(
 	return NULL;
 }
 
+static bool is_enclosing_handler(struct fw_address_handler *handler,
+				 unsigned long long offset, size_t length)
+{
+	return handler->offset <= offset &&
+		offset + length <= handler->offset + handler->length;
+}
+
 static struct fw_address_handler *lookup_enclosing_address_handler(
 	struct list_head *list, unsigned long long offset, size_t length)
 {
 	struct fw_address_handler *handler;
 
 	list_for_each_entry(handler, list, link) {
-		if (handler->offset <= offset &&
-		    offset + length <= handler->offset + handler->length)
+		if (is_enclosing_handler(handler, offset, length))
 			return handler;
 	}
 
@@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
 #endif  /*  0  */
 
+static bool is_in_fcp_region(u64 offset, size_t length)
+{
+	return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+		offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END);
+}
+
 /**
  * fw_core_add_address_handler - register for incoming requests
  * @handler: callback
@@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region =
  * give the details of the particular request.
  *
  * Return value:  0 on success, non-zero otherwise.
+ *
  * The start offset of the handler's address region is determined by
  * fw_core_add_address_handler() and is returned in handler->offset.
+ *
+ * Address allocations are exclusive, except for the FCP registers.
  */
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region)
@@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler,
 
 	handler->offset = region->start;
 	while (handler->offset + handler->length <= region->end) {
-		other =
-		    lookup_overlapping_address_handler(&address_handler_list,
-						       handler->offset,
-						       handler->length);
+		if (is_in_fcp_region(handler->offset, handler->length))
+			other = NULL;
+		else
+			other = lookup_overlapping_address_handler
+					(&address_handler_list,
+					 handler->offset, handler->length);
 		if (other != NULL) {
 			handler->offset += other->length;
 		} else {
@@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode)
 {
+	if (WARN_ONCE(!request, "invalid for FCP address handlers"))
+		return;
+
 	/* unified transaction or broadcast transaction: don't respond */
 	if (request->ack != ACK_PENDING ||
 	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
@@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_send_response);
 
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+static void handle_exclusive_region_request(struct fw_card *card,
+					    struct fw_packet *p,
+					    struct fw_request *request,
+					    unsigned long long offset)
 {
 	struct fw_address_handler *handler;
-	struct fw_request *request;
-	unsigned long long offset;
 	unsigned long flags;
 	int tcode, destination, source;
 
-	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
-		return;
-
-	request = allocate_request(p);
-	if (request == NULL) {
-		/* FIXME: send statically allocated busy packet. */
-		return;
-	}
-
-	offset      =
-		((unsigned long long)
-		 HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2];
 	tcode       = HEADER_GET_TCODE(p->header[0]);
 	destination = HEADER_GET_DESTINATION(p->header[0]);
 	source      = HEADER_GET_SOURCE(p->header[1]);
@@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 					  request->data, request->length,
 					  handler->callback_data);
 }
+
+static void handle_fcp_region_request(struct fw_card *card,
+				      struct fw_packet *p,
+				      struct fw_request *request,
+				      unsigned long long offset)
+{
+	struct fw_address_handler *handler;
+	unsigned long flags;
+	int tcode, destination, source;
+
+	if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+	     offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) ||
+	    request->length > 0x200) {
+		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+
+		return;
+	}
+
+	tcode       = HEADER_GET_TCODE(p->header[0]);
+	destination = HEADER_GET_DESTINATION(p->header[0]);
+	source      = HEADER_GET_SOURCE(p->header[1]);
+
+	if (tcode != TCODE_WRITE_QUADLET_REQUEST &&
+	    tcode != TCODE_WRITE_BLOCK_REQUEST) {
+		fw_send_response(card, request, RCODE_TYPE_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&address_handler_lock, flags);
+	list_for_each_entry(handler, &address_handler_list, link) {
+		if (is_enclosing_handler(handler, offset, request->length))
+			handler->address_callback(card, NULL, tcode,
+						  destination, source,
+						  p->generation, p->speed,
+						  offset, request->data,
+						  request->length,
+						  handler->callback_data);
+	}
+	spin_unlock_irqrestore(&address_handler_lock, flags);
+
+	fw_send_response(card, request, RCODE_COMPLETE);
+}
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+{
+	struct fw_request *request;
+	unsigned long long offset;
+
+	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
+		return;
+
+	request = allocate_request(p);
+	if (request == NULL) {
+		/* FIXME: send statically allocated busy packet. */
+		return;
+	}
+
+	offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) |
+		p->header[2];
+
+	if (!is_in_fcp_region(offset, request->length))
+		handle_exclusive_region_request(card, p, request, offset);
+	else
+		handle_fcp_region_request(card, p, request, offset);
+
+}
 EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index fe44789ab037..6223bf01efe9 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	unsigned long flags;
 	int su;
 
-	if ((tcode != TCODE_WRITE_QUADLET_REQUEST &&
-	     tcode != TCODE_WRITE_BLOCK_REQUEST) ||
-	    offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE ||
-	    length == 0 ||
-	    (((u8 *)payload)[0] & 0xf0) != 0) {
-		fw_send_response(card, request, RCODE_TYPE_ERROR);
+	if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0)
 		return;
-	}
 
 	su = ((u8 *)payload)[1] & 0x7;
 
@@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&node_list_lock, flags);
 
-	if (fdtv) {
+	if (fdtv)
 		avc_recv(fdtv, payload, length);
-		fw_send_response(card, request, RCODE_COMPLETE);
-	}
 }
 
 static struct fw_address_handler fcp_handler = {
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index c6b3ca3af6df..1f716d9f714b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -340,6 +340,9 @@ struct fw_cdev_send_response {
  * The @closure field is passed back to userspace in the response event.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
+ *
+ * The address range is allocated on all local nodes.  The address allocation
+ * is exclusive except for the FCP command and response registers.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9416a461b696..a0e67150a729 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
 /*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
+ * Important note:  Except for the FCP registers, the callback must guarantee
+ * that either fw_send_response() or kfree() is called on the @request.
  */
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
-- 
cgit v1.2.3


From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 08:41:07 +0100
Subject: block: blk_rq_err_sectors cleanup

blk_rq_err_sectors() seems useless, get rid of it.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b832be3044..9b98173a8184 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
-	return blk_rq_err_bytes(rq) >> 9;
-}
-
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3


From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:26 +0800
Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0

The struct syscall_metadata variable name in SYSCALL_DEFINE0
should be __syscall_meta__##sname instead of __syscall_meta_##sname
to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6.

This error causes event_enter_##sname->data to point to the wrong
location, which causes syscalls which are defined by SYSCALL_DEFINE0()
not to be traced.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..207466a49f3d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
-- 
cgit v1.2.3


From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:38 +0800
Subject: tracing: Fix sign fields in ftrace_define_fields_##call()

Add is_signed_type() call to trace_define_field() in ftrace macros.

The code previously just passed in 0 (false), disregarding whether
or not the field was actually a signed type.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      | 7 ++++---
 kernel/trace/trace_export.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 73523151a731..c6fe03e902ca 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
 				 offsetof(typeof(field), __data_loc_##item),   \
-				 sizeof(field.__data_loc_##item), 0,	       \
-				 FILTER_OTHER);
+				 sizeof(field.__data_loc_##item),	       \
+				 is_signed_type(type), FILTER_OTHER);
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
-				 sizeof(field.container.item), 0,	\
-				 FILTER_OTHER);				\
+				 sizeof(field.container.item),		\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
-- 
cgit v1.2.3


From 75c85a0bc13367aabb36e8208d4e373b022b43b3 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Wed, 30 Dec 2009 13:21:06 -0600
Subject: libsrp: fix compile failure

commit 45465487897a1c6d508b14b904dc5777f7ec7e04 ("kfifo: move struct
kfifo in place") caused a compile failure in ibmvscsitgt.c because it
changed a pointer to kfifo in the libsrp.h structure to a direct
inclusion without including <linux/kfifo.h>.

The fix is simple, just add the include, but how did this happen? This
change, introduced at -rc2, hardly looks like a bug fix, and it clearly
didn't go through linux-next, which would have picked up this compile
failure (it only occurs on ppc because of the ibm virtual scsi target).

[ Apparently all of -mm wasn't in linux-next.. ]

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/scsi/libsrp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index 07e3adde21d9..f4105c91af53 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -2,6 +2,7 @@
 #define __LIBSRP_H__
 
 #include <linux/list.h>
+#include <linux/kfifo.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
 #include <scsi/srp.h>
-- 
cgit v1.2.3


From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sat, 26 Dec 2009 17:58:11 +0100
Subject: kfifo: Fix typo in comment

It's DECLARE_KFIFO, not DECLARED_KFIFO.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 3d44e9c65a8e..7c6b32a1421c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -81,7 +81,7 @@ union { \
 }
 
 /**
- * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO
  * @name: name of the declared kfifo datatype
  */
 #define INIT_KFIFO(name) \
-- 
cgit v1.2.3


From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 30 Dec 2009 15:36:42 +0800
Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable

Introduce kernel parameter acpi_sleep=sci_force_enable

some laptop requires SCI_EN being set directly on resume,
or else they hung somewhere in the resume code path.

We already have a blacklist for these laptops but we still need
this option, especially when debugging some suspend/resume problems,
in case there are systems that need this workaround and are not yet
in the blacklist.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt |  5 ++++-
 arch/x86/kernel/acpi/sleep.c        |  2 ++
 drivers/acpi/sleep.c                | 29 +++++++++++++++++------------
 include/linux/acpi.h                |  1 +
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ba4d9dff113..736d45602886 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs }
+				  old_ordering, s4_nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			of _PTS is used by default).
 			s4_nonvs prevents the kernel from saving/restoring the
 			ACPI NVS memory during hibernation.
+			sci_force_enable causes the kernel to set SCI_EN directly
+			on resume from S1/S3 (which is against the ACPI spec,
+			but some broken systems don't work without it).
 
 	acpi_use_timer_override [HW,ACPI]
 			Use timer override. For some broken Nvidia NF5 boards
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "sci_force_enable", 16) == 0)
+			acpi_set_sci_en_on_resume();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5f2c379ab7bf..79d33d908b5a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state)
 
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+/*
+ * According to the ACPI specification the BIOS should make sure that ACPI is
+ * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
+ * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
+ * on such systems during resume.  Unfortunately that doesn't help in
+ * particularly pathological cases in which SCI_EN has to be set directly on
+ * resume, although the specification states very clearly that this flag is
+ * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
+ * cases.
+ */
+static bool set_sci_en_on_resume;
+
+void __init acpi_set_sci_en_on_resume(void)
+{
+	set_sci_en_on_resume = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -170,18 +187,6 @@ static void acpi_pm_end(void)
 #endif /* CONFIG_ACPI_SLEEP */
 
 #ifdef CONFIG_SUSPEND
-/*
- * According to the ACPI specification the BIOS should make sure that ACPI is
- * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
- * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
- * on such systems during resume.  Unfortunately that doesn't help in
- * particularly pathological cases in which SCI_EN has to be set directly on
- * resume, although the specification states very clearly that this flag is
- * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
- * cases.
- */
-static bool set_sci_en_on_resume;
-
 extern void do_suspend_lowlevel(void);
 
 static u32 acpi_suspend_states[] = {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ce945d4845fc..36924255c0d5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
+void __init acpi_set_sci_en_on_resume(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3


From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 30 Dec 2009 08:23:30 +0000
Subject: phylib: Properly reinitialize PHYs after hibernation

Since hibernation assumes power loss, we should fully reinitialize
PHYs (including platform fixups), as if PHYs were just attached.

This patch factors phy_init_hw() out of phy_attach_direct(), then
converts mdio_bus to dev_pm_ops and adds an appropriate restore()
callback.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 50 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/phy_device.c | 30 +++++++++++++-------------
 include/linux/phy.h          |  1 +
 3 files changed, 59 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 49252d390903..e17b70291bbc 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 		(phydev->phy_id & phydrv->phy_id_mask));
 }
 
+#ifdef CONFIG_PM
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->dev.driver;
@@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	return true;
 }
 
-/* Suspend and resume.  Copied from platform_suspend and
- * platform_resume
- */
-static int mdio_bus_suspend(struct device * dev, pm_message_t state)
+static int mdio_bus_suspend(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 	return phydrv->suspend(phydev);
 }
 
-static int mdio_bus_resume(struct device * dev)
+static int mdio_bus_resume(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -338,11 +337,48 @@ no_resume:
 	return 0;
 }
 
+static int mdio_bus_restore(struct device *dev)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+	struct net_device *netdev = phydev->attached_dev;
+	int ret;
+
+	if (!netdev)
+		return 0;
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* The PHY needs to renegotiate. */
+	phydev->link = 0;
+	phydev->state = PHY_UP;
+
+	phy_start_machine(phydev, NULL);
+
+	return 0;
+}
+
+static struct dev_pm_ops mdio_bus_pm_ops = {
+	.suspend = mdio_bus_suspend,
+	.resume = mdio_bus_resume,
+	.freeze = mdio_bus_suspend,
+	.thaw = mdio_bus_resume,
+	.restore = mdio_bus_restore,
+};
+
+#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
+
+#else
+
+#define MDIO_BUS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
-	.suspend	= mdio_bus_suspend,
-	.resume		= mdio_bus_resume,
+	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b10fedd82143..8212b2b93422 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_disconnect);
 
+int phy_init_hw(struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->drv || !phydev->drv->config_init)
+		return 0;
+
+	ret = phy_scan_fixups(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phydev->drv->config_init(phydev);
+}
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */
-	if (phydev->drv->config_init) {
-		int err;
-
-		err = phy_scan_fixups(phydev);
-
-		if (err < 0)
-			return err;
-
-		err = phydev->drv->config_init(phydev);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return phy_init_hw(phydev);
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b1368b8f6572..7968defd2fa7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_init_hw(struct phy_device *phydev);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3


From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 00:39:22 +0100
Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion

i_xattr_sem depends on the reiserfs lock. But after we grab
i_xattr_sem, we may relax/relock the reiserfs lock while waiting
on a freezed filesystem, creating a dependency inversion between
the two locks.

In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's
create a reiserfs_down_read_safe() that acts like
reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing
another lock to avoid undesired dependencies induced by the
heivyweight reiserfs lock.

This fixes the following warning:

[  990.005931] =======================================================
[  990.012373] [ INFO: possible circular locking dependency detected ]
[  990.013233] 2.6.33-rc1 #1
[  990.013233] -------------------------------------------------------
[  990.013233] dbench/1891 is trying to acquire lock:
[  990.013233]  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]
[  990.013233] but task is already holding lock:
[  990.013233]  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] which lock already depends on the new lock.
[  990.013233]
[  990.013233]
[  990.013233] the existing dependency chain (in reverse order) is:
[  990.013233]
[  990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}:
[  990.013233]        [<ffffffff81063afc>] __lock_acquire+0xf9c/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814ac194>] down_write+0x44/0x80
[  990.013233]        [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
[  990.013233]        [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]        [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]        [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]        [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]        [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] other info that might help us debug this:
[  990.013233]
[  990.013233] 2 locks held by dbench/1891:
[  990.013233]  #0:  (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff810e2678>] vfs_setxattr+0x78/0xc0
[  990.013233]  #1:  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] stack backtrace:
[  990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1
[  990.013233] Call Trace:
[  990.013233]  [<ffffffff81061639>] print_circular_bug+0xe9/0xf0
[  990.013233]  [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81062592>] ? mark_held_locks+0x72/0xa0
[  990.013233]  [<ffffffff814ab81d>] ? __mutex_unlock_slowpath+0xbd/0x140
[  990.013233]  [<ffffffff810628ad>] ? trace_hardirqs_on_caller+0x14d/0x1a0
[  990.013233]  [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]  [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]  [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]  [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]  [<ffffffff814abcb4>] ? __mutex_lock_common+0x284/0x3b0
[  990.013233]  [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]  [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]  [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]  [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]  [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]  [<ffffffff81056018>] ? sched_clock_cpu+0xb8/0x100
[  990.013233]  [<ffffffff8105eded>] ? trace_hardirqs_off+0xd/0x10
[  990.013233]  [<ffffffff810560a3>] ? cpu_clock+0x43/0x50
[  990.013233]  [<ffffffff810c6820>] ? fget+0xb0/0x110
[  990.013233]  [<ffffffff810c6770>] ? fget+0x0/0x110
[  990.013233]  [<ffffffff81002ddc>] ? sysret_check+0x27/0x62
[  990.013233]  [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]  [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b

Reported-and-tested-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/xattr.c         | 2 +-
 include/linux/reiserfs_fs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8891cd88a3f4..a0e2e7acdc75 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	down_write(&REISERFS_I(inode)->i_xattr_sem);
+	reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb);
 
 	xahash = xattr_hash(buffer, buffer_size);
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4351b49e2b1e..35d3f459b0ac 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 03:20:19 +0100
Subject: reiserfs: Warn on lock relax if taken recursively

When we relax the reiserfs lock to avoid creating unwanted
dependencies against others locks while grabbing these,
we want to ensure it has not been taken recursively, otherwise
the lock won't be really relaxed. Only its depth will be decreased.
The unwanted dependency would then actually happen.

To prevent from that, add a reiserfs_lock_check_recursive() call
in the places that need it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 9 +++++++++
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 		reiserfs_panic(sb, "%s called without kernel lock held %d",
 			       caller);
 }
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 35d3f459b0ac..793bf8351ab8 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
 /*
  * Several mutexes depend on the write lock.
  * However sometimes we want to relax the write lock while we hold
@@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock(m);
 	reiserfs_write_lock(s);
@@ -101,6 +108,7 @@ static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock_nested(m, subclass);
 	reiserfs_write_lock(s);
@@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	down_read(sem);
 	reiserfs_write_lock(s);
-- 
cgit v1.2.3


From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 20 Nov 2009 14:16:33 +0100
Subject: resource: move kernel function inside __KERNEL__

It is an internal function. Move it inside __KERNEL__ ifdef, along
with task_struct declaration.

Then we get:
--- /usr/include/linux/resource.h       2009-09-14 15:09:29.000000000 +0200
+++ usr/include/linux/resource.h       2010-01-04 11:30:54.000000000 +0100
@@ -3,8 +3,6 @@

 #include <linux/time.h>

-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,5 @@
  */
 #include <asm/resource.h>

-int getrusage(struct task_struct *p, int who, struct rusage *ru);

 #endif

***********

include/linux/Kbuild is untouched, since unifdef is run even on
headers-y nowadays.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/linux/resource.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e626082..f1e914eefeab 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -3,8 +3,6 @@
 
 #include <linux/time.h>
 
-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,12 @@ struct rlimit {
  */
 #include <asm/resource.h>
 
+#ifdef __KERNEL__
+
+struct task_struct;
+
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3


From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Nov 2009 17:16:37 +0100
Subject: resource: add helpers for fetching rlimits

We want to be sure that compiler fetches the limit variable only
once, so add helpers for fetching current and maximal resource
limits which do that.

Add them to sched.h (instead of resource.h) due to circular dependency
 sched.h->resource.h->task_struct
Alternative would be to create a separate res_access.h or similar.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..8d4991be9d53 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 31 Dec 2009 12:15:54 +0100
Subject: PCI/PM: Use per-device D3 delays

It turns out that some PCI devices require extra delays when changing
power state from D3 to D0 (and the other way around).  Although this
is against the PCI specification, we can handle it quite easily by
allowing drivers to define arbitrary D3 delays for devices known to
require extra time for switching power states.

Introduce additional field d3_delay in struct pci_dev and use it to
store the value of the device's D0->D3 delay, in miliseconds.  Make
the PCI PM core code use the per-device d3_delay unless
pci_pm_d3_delay is greater (in which case the latter is used).
[This also allows the driver to specify d3_delay shorter than the
 10 ms required by the PCI standard if the device is known to be able
 to handle that.]

Make the sky2 driver set d3_delay to 150 for devices handled by it.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a
listed regression from 2.6.30.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/sky2.c  |  1 +
 drivers/pci/pci.c   | 19 +++++++++++++++----
 include/linux/pci.h |  1 +
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 1c01b96c9611..2d28d58200d0 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
+	pdev->d3_delay = 150;
 
 	return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0906599ebfde..315fea47e784 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -29,7 +29,17 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
-unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
+unsigned int pci_pm_d3_delay;
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay = dev->d3_delay;
+
+	if (delay < pci_pm_d3_delay)
+		delay = pci_pm_d3_delay;
+
+	msleep(delay);
+}
 
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
@@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 	/* Mandatory power management transition delays */
 	/* see PCI PM 1.1 5.6.1 table 18 */
 	if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
-		msleep(pci_pm_d3_delay);
+		pci_dev_d3_sleep(dev);
 	else if (state == PCI_D2 || dev->current_state == PCI_D2)
 		udelay(PCI_PM_D2_DELAY);
 
@@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
+	dev->d3_delay = PCI_PM_D3_WAIT;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
@@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5da0690d9cee..174e5392e51e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -243,6 +243,7 @@ struct pci_dev {
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
 	unsigned int	wakeup_prepared:1;
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Jan 2010 17:56:02 -0800
Subject: Input: serio - fix potential deadlock when unbinding drivers

sysfs_remove_group() waits for sysfs attributes to be removed, therefore
we do not need to worry about driver-specific attributes being accessed
after driver has been detached from the device. In fact, attempts to take
serio->drv_mutex in attribute methods may lead to the following deadlock:

                                          sysfs_read_file()
                                            fill_read_buffer()
                                              sysfs_get_active_two()
                                                psmouse_attr_show_helper()
                                                  serio_pin_driver()
serio_disconnect_driver()
  mutex_lock(&serio->drv_mutex);
                                <-------->        mutex_lock(&serio_drv_mutex);
    psmouse_disconnect()
      sysfs_remove_group(... psmouse_attr_group);
        ....
        sysfs_deactivate();
          wait_for_completion();

Fix this by removing calls to serio_[un]pin_driver() and functions themselves
and using driver-private mutexes to serialize access to attribute's set()
methods that may change device state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c     | 59 ++++++++++++++++----------------------
 drivers/input/mouse/psmouse-base.c | 32 ++-------------------
 include/linux/serio.h              | 19 ------------
 3 files changed, 28 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 1f5e2ce327d6..1cf32a7814d0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -225,8 +225,10 @@ struct atkbd {
 
 	struct delayed_work event_work;
 	unsigned long event_jiffies;
-	struct mutex event_mutex;
 	unsigned long event_mask;
+
+	/* Serializes reconnect(), attr->set() and event work */
+	struct mutex mutex;
 };
 
 /*
@@ -577,7 +579,7 @@ static void atkbd_event_work(struct work_struct *work)
 {
 	struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);
 
-	mutex_lock(&atkbd->event_mutex);
+	mutex_lock(&atkbd->mutex);
 
 	if (!atkbd->enabled) {
 		/*
@@ -596,7 +598,7 @@ static void atkbd_event_work(struct work_struct *work)
 			atkbd_set_repeat_rate(atkbd);
 	}
 
-	mutex_unlock(&atkbd->event_mutex);
+	mutex_unlock(&atkbd->mutex);
 }
 
 /*
@@ -612,7 +614,7 @@ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
 
 	atkbd->event_jiffies = jiffies;
 	set_bit(event_bit, &atkbd->event_mask);
-	wmb();
+	mb();
 	schedule_delayed_work(&atkbd->event_work, delay);
 }
 
@@ -849,12 +851,13 @@ static void atkbd_disconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 
+	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
+
 	atkbd_disable(atkbd);
 
 	/* make sure we don't have a command in flight */
 	cancel_delayed_work_sync(&atkbd->event_work);
 
-	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
 	input_unregister_device(atkbd->dev);
 	serio_close(serio);
 	serio_set_drvdata(serio, NULL);
@@ -1087,7 +1090,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
-	mutex_init(&atkbd->event_mutex);
+	mutex_init(&atkbd->mutex);
 
 	switch (serio->id.type) {
 
@@ -1160,19 +1163,23 @@ static int atkbd_reconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 	struct serio_driver *drv = serio->drv;
+	int retval = -1;
 
 	if (!atkbd || !drv) {
 		printk(KERN_DEBUG "atkbd: reconnect request, but serio is disconnected, ignoring...\n");
 		return -1;
 	}
 
+	mutex_lock(&atkbd->mutex);
+
 	atkbd_disable(atkbd);
 
 	if (atkbd->write) {
 		if (atkbd_probe(atkbd))
-			return -1;
+			goto out;
+
 		if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
-			return -1;
+			goto out;
 
 		atkbd_activate(atkbd);
 
@@ -1190,8 +1197,11 @@ static int atkbd_reconnect(struct serio *serio)
 	}
 
 	atkbd_enable(atkbd);
+	retval = 0;
 
-	return 0;
+ out:
+	mutex_unlock(&atkbd->mutex);
+	return retval;
 }
 
 static struct serio_device_id atkbd_serio_ids[] = {
@@ -1235,47 +1245,28 @@ static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
 				ssize_t (*handler)(struct atkbd *, char *))
 {
 	struct serio *serio = to_serio_port(dev);
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	retval = handler((struct atkbd *)serio_get_drvdata(serio), buf);
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return handler(atkbd, buf);
 }
 
 static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
 				ssize_t (*handler)(struct atkbd *, const char *, size_t))
 {
 	struct serio *serio = to_serio_port(dev);
-	struct atkbd *atkbd;
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 	int retval;
 
-	retval = serio_pin_driver(serio);
+	retval = mutex_lock_interruptible(&atkbd->mutex);
 	if (retval)
 		return retval;
 
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	atkbd = serio_get_drvdata(serio);
 	atkbd_disable(atkbd);
 	retval = handler(atkbd, buf, count);
 	atkbd_enable(atkbd);
 
-out:
-	serio_unpin_driver(serio);
+	mutex_unlock(&atkbd->mutex);
+
 	return retval;
 }
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 401ac6b6edd4..d59e18b24ede 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1450,24 +1450,10 @@ ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *de
 	struct serio *serio = to_serio_port(dev);
 	struct psmouse_attribute *attr = to_psmouse_attr(devattr);
 	struct psmouse *psmouse;
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
 
 	psmouse = serio_get_drvdata(serio);
 
-	retval = attr->show(psmouse, attr->data, buf);
-
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return attr->show(psmouse, attr->data, buf);
 }
 
 ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr,
@@ -1478,18 +1464,9 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 	struct psmouse *psmouse, *parent = NULL;
 	int retval;
 
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out_unpin;
-	}
-
 	retval = mutex_lock_interruptible(&psmouse_mutex);
 	if (retval)
-		goto out_unpin;
+		goto out;
 
 	psmouse = serio_get_drvdata(serio);
 
@@ -1519,8 +1496,7 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 
  out_unlock:
 	mutex_unlock(&psmouse_mutex);
- out_unpin:
-	serio_unpin_driver(serio);
+ out:
 	return retval;
 }
 
@@ -1582,9 +1558,7 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 		}
 
 		mutex_unlock(&psmouse_mutex);
-		serio_unpin_driver(serio);
 		serio_unregister_child_port(serio);
-		serio_pin_driver_uninterruptible(serio);
 		mutex_lock(&psmouse_mutex);
 
 		if (serio->drv != &psmouse_drv) {
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..813d26c247ec 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
-/*
- * Use the following functions to pin serio's driver in process context
- */
-static inline int serio_pin_driver(struct serio *serio)
-{
-	return mutex_lock_interruptible(&serio->drv_mutex);
-}
-
-static inline void serio_pin_driver_uninterruptible(struct serio *serio)
-{
-	mutex_lock(&serio->drv_mutex);
-}
-
-static inline void serio_unpin_driver(struct serio *serio)
-{
-	mutex_unlock(&serio->drv_mutex);
-}
-
-
 #endif
 
 /*
-- 
cgit v1.2.3


From 76446cac68568fc7f5168a27deaf803ed22a4360 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 17 Dec 2009 22:05:42 -0500
Subject: drm/i915: execbuf2 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a new execbuf ioctl, execbuf2, for use by clients that
want to control fence register allocation more finely.  The buffer
passed in to the new ioctl includes a new relocation type to indicate
whether a given object needs a fence register assigned for the command
buffer in question.

Compatibility with the existing execbuf ioctl is implemented in terms
of the new code, preserving the assumption that fence registers are
required for pre-965 rendering commands.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[ickle: Remove pre-emptive clear_fence_reg()]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
[anholt: Removed dmesg spam]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_dma.c        |   7 +-
 drivers/gpu/drm/i915/i915_drv.h        |   5 +
 drivers/gpu/drm/i915/i915_gem.c        | 239 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_tiling.c |  46 +++----
 include/drm/i915_drm.h                 |  54 ++++++++
 5 files changed, 273 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e3e5d5094232..d67be655c532 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -813,9 +813,13 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_PAGEFLIPPING:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXECBUF2:
+		/* depends on GEM */
+		value = dev_priv->has_gem;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
-					param->param);
+				 param->param);
 		return -EINVAL;
 	}
 
@@ -1646,6 +1650,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9a05f1a01025..7eb4ad51034d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -815,6 +815,8 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_execbuffer2(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 int i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
@@ -881,6 +883,9 @@ void i915_gem_shrinker_exit(void);
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
 void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
 void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
+bool i915_tiling_ok(struct drm_device *dev, int stride, int size,
+		    int tiling_mode);
+bool i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9e81a0ddafac..0330c3aa8032 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3199,7 +3199,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
 static int
 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 				 struct drm_file *file_priv,
-				 struct drm_i915_gem_exec_object *entry,
+				 struct drm_i915_gem_exec_object2 *entry,
 				 struct drm_i915_gem_relocation_entry *relocs)
 {
 	struct drm_device *dev = obj->dev;
@@ -3207,12 +3207,35 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int i, ret;
 	void __iomem *reloc_page;
+	bool need_fence;
+
+	need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+	             obj_priv->tiling_mode != I915_TILING_NONE;
+
+	/* Check fence reg constraints and rebind if necessary */
+	if (need_fence && !i915_obj_fenceable(dev, obj))
+		i915_gem_object_unbind(obj);
 
 	/* Choose the GTT offset for our buffer and put it there. */
 	ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
 	if (ret)
 		return ret;
 
+	/*
+	 * Pre-965 chips need a fence register set up in order to
+	 * properly handle blits to/from tiled surfaces.
+	 */
+	if (need_fence) {
+		ret = i915_gem_object_get_fence_reg(obj);
+		if (ret != 0) {
+			if (ret != -EBUSY && ret != -ERESTARTSYS)
+				DRM_ERROR("Failure to install fence: %d\n",
+					  ret);
+			i915_gem_object_unpin(obj);
+			return ret;
+		}
+	}
+
 	entry->offset = obj_priv->gtt_offset;
 
 	/* Apply the relocations, using the GTT aperture to avoid cache
@@ -3374,7 +3397,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
  */
 static int
 i915_dispatch_gem_execbuffer(struct drm_device *dev,
-			      struct drm_i915_gem_execbuffer *exec,
+			      struct drm_i915_gem_execbuffer2 *exec,
 			      struct drm_clip_rect *cliprects,
 			      uint64_t exec_offset)
 {
@@ -3464,7 +3487,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 static int
-i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
+i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
 			      uint32_t buffer_count,
 			      struct drm_i915_gem_relocation_entry **relocs)
 {
@@ -3479,8 +3502,10 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 	}
 
 	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
-	if (*relocs == NULL)
+	if (*relocs == NULL) {
+		DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < buffer_count; i++) {
 		struct drm_i915_gem_relocation_entry __user *user_relocs;
@@ -3504,7 +3529,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 }
 
 static int
-i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
+i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
 			    uint32_t buffer_count,
 			    struct drm_i915_gem_relocation_entry *relocs)
 {
@@ -3537,7 +3562,7 @@ err:
 }
 
 static int
-i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
 			   uint64_t exec_offset)
 {
 	uint32_t exec_start, exec_len;
@@ -3590,18 +3615,18 @@ i915_gem_wait_for_pending_flip(struct drm_device *dev,
 }
 
 int
-i915_gem_execbuffer(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
+i915_gem_do_execbuffer(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv,
+		       struct drm_i915_gem_execbuffer2 *args,
+		       struct drm_i915_gem_exec_object2 *exec_list)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_execbuffer *args = data;
-	struct drm_i915_gem_exec_object *exec_list = NULL;
 	struct drm_gem_object **object_list = NULL;
 	struct drm_gem_object *batch_obj;
 	struct drm_i915_gem_object *obj_priv;
 	struct drm_clip_rect *cliprects = NULL;
 	struct drm_i915_gem_relocation_entry *relocs;
-	int ret, ret2, i, pinned = 0;
+	int ret = 0, ret2, i, pinned = 0;
 	uint64_t exec_offset;
 	uint32_t seqno, flush_domains, reloc_index;
 	int pin_tries, flips;
@@ -3615,25 +3640,13 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
 		return -EINVAL;
 	}
-	/* Copy in the exec list from userland */
-	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
 	object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
-	if (exec_list == NULL || object_list == NULL) {
-		DRM_ERROR("Failed to allocate exec or object list "
-			  "for %d buffers\n",
+	if (object_list == NULL) {
+		DRM_ERROR("Failed to allocate object list for %d buffers\n",
 			  args->buffer_count);
 		ret = -ENOMEM;
 		goto pre_mutex_err;
 	}
-	ret = copy_from_user(exec_list,
-			     (struct drm_i915_relocation_entry __user *)
-			     (uintptr_t) args->buffers_ptr,
-			     sizeof(*exec_list) * args->buffer_count);
-	if (ret != 0) {
-		DRM_ERROR("copy %d exec entries failed %d\n",
-			  args->buffer_count, ret);
-		goto pre_mutex_err;
-	}
 
 	if (args->num_cliprects != 0) {
 		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
@@ -3885,20 +3898,6 @@ err:
 
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!ret) {
-		/* Copy the new buffer offsets back to the user's exec list. */
-		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
-				   (uintptr_t) args->buffers_ptr,
-				   exec_list,
-				   sizeof(*exec_list) * args->buffer_count);
-		if (ret) {
-			ret = -EFAULT;
-			DRM_ERROR("failed to copy %d exec entries "
-				  "back to user (%d)\n",
-				  args->buffer_count, ret);
-		}
-	}
-
 	/* Copy the updated relocations out regardless of current error
 	 * state.  Failure to update the relocs would mean that the next
 	 * time userland calls execbuf, it would do so with presumed offset
@@ -3915,12 +3914,158 @@ err:
 
 pre_mutex_err:
 	drm_free_large(object_list);
-	drm_free_large(exec_list);
 	kfree(cliprects);
 
 	return ret;
 }
 
+/*
+ * Legacy execbuffer just creates an exec2 list from the original exec object
+ * list array and passes it to the real function.
+ */
+int
+i915_gem_execbuffer(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct drm_i915_gem_execbuffer *args = data;
+	struct drm_i915_gem_execbuffer2 exec2;
+	struct drm_i915_gem_exec_object *exec_list = NULL;
+	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+	int ret, i;
+
+#if WATCH_EXEC
+	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
+		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
+#endif
+
+	if (args->buffer_count < 1) {
+		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
+		return -EINVAL;
+	}
+
+	/* Copy in the exec list from userland */
+	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
+	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+	if (exec_list == NULL || exec2_list == NULL) {
+		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+			  args->buffer_count);
+		drm_free_large(exec_list);
+		drm_free_large(exec2_list);
+		return -ENOMEM;
+	}
+	ret = copy_from_user(exec_list,
+			     (struct drm_i915_relocation_entry __user *)
+			     (uintptr_t) args->buffers_ptr,
+			     sizeof(*exec_list) * args->buffer_count);
+	if (ret != 0) {
+		DRM_ERROR("copy %d exec entries failed %d\n",
+			  args->buffer_count, ret);
+		drm_free_large(exec_list);
+		drm_free_large(exec2_list);
+		return -EFAULT;
+	}
+
+	for (i = 0; i < args->buffer_count; i++) {
+		exec2_list[i].handle = exec_list[i].handle;
+		exec2_list[i].relocation_count = exec_list[i].relocation_count;
+		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
+		exec2_list[i].alignment = exec_list[i].alignment;
+		exec2_list[i].offset = exec_list[i].offset;
+		if (!IS_I965G(dev))
+			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
+		else
+			exec2_list[i].flags = 0;
+	}
+
+	exec2.buffers_ptr = args->buffers_ptr;
+	exec2.buffer_count = args->buffer_count;
+	exec2.batch_start_offset = args->batch_start_offset;
+	exec2.batch_len = args->batch_len;
+	exec2.DR1 = args->DR1;
+	exec2.DR4 = args->DR4;
+	exec2.num_cliprects = args->num_cliprects;
+	exec2.cliprects_ptr = args->cliprects_ptr;
+	exec2.flags = 0;
+
+	ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
+	if (!ret) {
+		/* Copy the new buffer offsets back to the user's exec list. */
+		for (i = 0; i < args->buffer_count; i++)
+			exec_list[i].offset = exec2_list[i].offset;
+		/* ... and back out to userspace */
+		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
+				   (uintptr_t) args->buffers_ptr,
+				   exec_list,
+				   sizeof(*exec_list) * args->buffer_count);
+		if (ret) {
+			ret = -EFAULT;
+			DRM_ERROR("failed to copy %d exec entries "
+				  "back to user (%d)\n",
+				  args->buffer_count, ret);
+		}
+	} else {
+		DRM_ERROR("i915_gem_do_execbuffer returns %d\n", ret);
+	}
+
+	drm_free_large(exec_list);
+	drm_free_large(exec2_list);
+	return ret;
+}
+
+int
+i915_gem_execbuffer2(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_i915_gem_execbuffer2 *args = data;
+	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+	int ret;
+
+#if WATCH_EXEC
+	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
+		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
+#endif
+
+	if (args->buffer_count < 1) {
+		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
+		return -EINVAL;
+	}
+
+	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+	if (exec2_list == NULL) {
+		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+			  args->buffer_count);
+		return -ENOMEM;
+	}
+	ret = copy_from_user(exec2_list,
+			     (struct drm_i915_relocation_entry __user *)
+			     (uintptr_t) args->buffers_ptr,
+			     sizeof(*exec2_list) * args->buffer_count);
+	if (ret != 0) {
+		DRM_ERROR("copy %d exec entries failed %d\n",
+			  args->buffer_count, ret);
+		drm_free_large(exec2_list);
+		return -EFAULT;
+	}
+
+	ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
+	if (!ret) {
+		/* Copy the new buffer offsets back to the user's exec list. */
+		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
+				   (uintptr_t) args->buffers_ptr,
+				   exec2_list,
+				   sizeof(*exec2_list) * args->buffer_count);
+		if (ret) {
+			ret = -EFAULT;
+			DRM_ERROR("failed to copy %d exec entries "
+				  "back to user (%d)\n",
+				  args->buffer_count, ret);
+		}
+	}
+
+	drm_free_large(exec2_list);
+	return ret;
+}
+
 int
 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 {
@@ -3934,19 +4079,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 		if (ret)
 			return ret;
 	}
-	/*
-	 * Pre-965 chips need a fence register set up in order to
-	 * properly handle tiled surfaces.
-	 */
-	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
-		ret = i915_gem_object_get_fence_reg(obj);
-		if (ret != 0) {
-			if (ret != -EBUSY && ret != -ERESTARTSYS)
-				DRM_ERROR("Failure to install fence: %d\n",
-					  ret);
-			return ret;
-		}
-	}
+
 	obj_priv->pin_count++;
 
 	/* If the object is not active and not pending a flush,
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 30d6af6c09bb..df278b2685bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -304,35 +304,39 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 
 
 /**
- * Returns the size of the fence for a tiled object of the given size.
+ * Returns whether an object is currently fenceable.  If not, it may need
+ * to be unbound and have its pitch adjusted.
  */
-static int
-i915_get_fence_size(struct drm_device *dev, int size)
+bool
+i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj)
 {
-	int i;
-	int start;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	if (IS_I965G(dev)) {
 		/* The 965 can have fences at any page boundary. */
-		return ALIGN(size, 4096);
+		if (obj->size & 4095)
+			return false;
+		return true;
+	} else if (IS_I9XX(dev)) {
+		if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK)
+			return false;
 	} else {
-		/* Align the size to a power of two greater than the smallest
-		 * fence size.
-		 */
-		if (IS_I9XX(dev))
-			start = 1024 * 1024;
-		else
-			start = 512 * 1024;
+		if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK)
+			return false;
+	}
 
-		for (i = start; i < size; i <<= 1)
-			;
+	/* Power of two sized... */
+	if (obj->size & (obj->size - 1))
+		return false;
 
-		return i;
-	}
+	/* Objects must be size aligned as well */
+	if (obj_priv->gtt_offset & (obj->size - 1))
+		return false;
+	return true;
 }
 
 /* Check pitch constriants for all chips & tiling formats */
-static bool
+bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 {
 	int tile_width;
@@ -384,12 +388,6 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	if (stride & (stride - 1))
 		return false;
 
-	/* We don't 0handle the aperture area covered by the fence being bigger
-	 * than the object size.
-	 */
-	if (i915_get_fence_size(dev, size) != size)
-		return false;
-
 	return true;
 }
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ec3f5e80a5df..b64a8d7cdf6d 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -188,6 +188,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_MADVISE	0x26
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
+#define DRM_I915_GEM_EXECBUFFER2	0x29
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -207,6 +208,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -272,6 +274,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_NUM_FENCES_AVAIL      6
 #define I915_PARAM_HAS_OVERLAY           7
 #define I915_PARAM_HAS_PAGEFLIPPING	 8
+#define I915_PARAM_HAS_EXECBUF2          9
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -567,6 +570,57 @@ struct drm_i915_gem_execbuffer {
 	__u64 cliprects_ptr;
 };
 
+struct drm_i915_gem_exec_object2 {
+	/**
+	 * User's handle for a buffer to be bound into the GTT for this
+	 * operation.
+	 */
+	__u32 handle;
+
+	/** Number of relocations to be performed on this buffer */
+	__u32 relocation_count;
+	/**
+	 * Pointer to array of struct drm_i915_gem_relocation_entry containing
+	 * the relocations to be performed in this buffer.
+	 */
+	__u64 relocs_ptr;
+
+	/** Required alignment in graphics aperture */
+	__u64 alignment;
+
+	/**
+	 * Returned value of the updated offset of the object, for future
+	 * presumed_offset writes.
+	 */
+	__u64 offset;
+
+#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+	__u64 flags;
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
+struct drm_i915_gem_execbuffer2 {
+	/**
+	 * List of gem_exec_object2 structs
+	 */
+	__u64 buffers_ptr;
+	__u32 buffer_count;
+
+	/** Offset in the batchbuffer to start execution from. */
+	__u32 batch_start_offset;
+	/** Bytes used in batchbuffer from batch_start_offset */
+	__u32 batch_len;
+	__u32 DR1;
+	__u32 DR4;
+	__u32 num_cliprects;
+	/** This is a struct drm_clip_rect *cliprects */
+	__u64 cliprects_ptr;
+	__u64 flags; /* currently unused */
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
cgit v1.2.3


From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jan 2010 16:11:06 -0500
Subject: x86, ACPI: delete acpi_boot_table_init() return value

cleanup only.

setup_arch(), doesn't care care if ACPI initialization succeeded
or failed, so delete acpi_boot_table_init()'s return value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 22 ++++++----------------
 include/linux/acpi.h        |  6 +++---
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb1035cd9a6a..036d28adf59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
  *	if acpi_blacklisted() acpi_disabled = 1;
  *	acpi_irq_model=...
  *	...
- *
- * return value: (currently ignored)
- *	0: success
- *	!0: failure
  */
 
-int __init acpi_boot_table_init(void)
+void __init acpi_boot_table_init(void)
 {
-	int error;
-
 	dmi_check_system(acpi_dmi_table);
 
 	/*
@@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void)
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
 	 */
 	if (acpi_disabled && !acpi_ht)
-		return 1;
+		return; 
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	error = acpi_table_init();
-	if (error) {
+	if (acpi_table_init()) {
 		disable_acpi();
-		return error;
+		return;
 	}
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void)
 	/*
 	 * blacklist may disable ACPI entirely
 	 */
-	error = acpi_blacklisted();
-	if (error) {
+	if (acpi_blacklisted()) {
 		if (acpi_force) {
 			printk(KERN_WARNING PREFIX "acpi=force override\n");
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return error;
+			return;
 		}
 	}
-
-	return 0;
 }
 
 int __init early_acpi_boot_init(void)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 36924255c0d5..b926afe8c03e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
-int acpi_boot_table_init (void);
+void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
@@ -321,9 +321,9 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
-static inline int acpi_boot_table_init(void)
+static inline void acpi_boot_table_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline int acpi_mps_check(void)
-- 
cgit v1.2.3


From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 6 Jan 2010 17:23:23 +0000
Subject: NOMMU: Avoiding duplicate icache flushes of shared maps

When working with FDPIC, there are many shared mappings of read-only
code regions between applications (the C library, applet packages like
busybox, etc.), but the current do_mmap_pgoff() function will issue an
icache flush whenever a VMA is added to an MM instead of only doing it
when the map is initially created.

The flush can instead be done when a region is first mmapped PROT_EXEC.
Note that we may not rely on the first mapping of a region being
executable - it's possible for it to be PROT_READ only, so we have to
remember whether we've flushed the region or not, and then flush the
entire region when a bit of it is made executable.

However, this also affects the brk area.  That will no longer be
executable.  We can mprotect() it to PROT_EXEC on MPU-mode kernels, but
for NOMMU mode kernels, when it increases the brk allocation, making
sys_brk() flush the extra from the icache should suffice.  The brk area
probably isn't used by NOMMU programs since the brk area can only use up
the leavings from the stack allocation, where the stack allocation is
larger than requested.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 mm/nommu.c               | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524afb3dc..84d020bed083 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -123,6 +123,8 @@ struct vm_region {
 	struct file	*vm_file;	/* the backing file or NULL */
 
 	atomic_t	vm_usage;	/* region usage count */
+	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+						* this region */
 };
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 6f9248f89bde..a8d17521624a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	/*
 	 * Ok, looks good - let it rip.
 	 */
+	flush_icache_range(mm->brk, brk);
 	return mm->brk = brk;
 }
 
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file,
 share:
 	add_vma_to_mm(current->mm, vma);
 
-	up_write(&nommu_region_sem);
+	/* we flush the region from the icache only when the first executable
+	 * mapping of it is made  */
+	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
+		flush_icache_range(region->vm_start, region->vm_end);
+		region->vm_icache_flushed = true;
+	}
 
-	if (prot & PROT_EXEC)
-		flush_icache_range(result, result + len);
+	up_write(&nommu_region_sem);
 
 	kleave(" = %lx", result);
 	return result;
-- 
cgit v1.2.3


From e6be8d9d17bd44061116f601fe2609b3ace7aa69 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyu.z.wang@intel.com>
Date: Tue, 5 Jan 2010 11:25:05 +0800
Subject: drm: remove address mask param for drm_pci_alloc()

drm_pci_alloc() has input of address mask for setting pci dma
mask on the device, which should be properly setup by drm driver.
And leave it as a param for drm_pci_alloc() would cause confusion
or mistake would corrupt the correct dma mask setting, as seen on
intel hw which set wrong dma mask for hw status page. So remove
it from drm_pci_alloc() function.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ati_pcigart.c   | 10 ++++++++--
 drivers/gpu/drm/drm_bufs.c      |  4 ++--
 drivers/gpu/drm/drm_pci.c       |  8 +-------
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 include/drm/drmP.h              |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index 628eae3e9b83..a1fce68e3bbe 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -39,8 +39,7 @@ static int drm_ati_alloc_pcigart_table(struct drm_device *dev,
 				       struct drm_ati_pcigart_info *gart_info)
 {
 	gart_info->table_handle = drm_pci_alloc(dev, gart_info->table_size,
-						PAGE_SIZE,
-						gart_info->table_mask);
+						PAGE_SIZE);
 	if (gart_info->table_handle == NULL)
 		return -ENOMEM;
 
@@ -112,6 +111,13 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga
 	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
 		DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
 
+		if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) {
+			DRM_ERROR("fail to set dma mask to 0x%Lx\n",
+				  gart_info->table_mask);
+			ret = 1;
+			goto done;
+		}
+
 		ret = drm_ati_alloc_pcigart_table(dev, gart_info);
 		if (ret) {
 			DRM_ERROR("cannot allocate PCI GART page!\n");
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 3d09e304f6f4..8417cc4c43f1 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -326,7 +326,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		 * As we're limiting the address to 2^32-1 (or less),
 		 * casting it down to 32 bits is no problem, but we
 		 * need to point to a 64bit variable first. */
-		dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL);
+		dmah = drm_pci_alloc(dev, map->size, map->size);
 		if (!dmah) {
 			kfree(map);
 			return -ENOMEM;
@@ -885,7 +885,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 
 	while (entry->buf_count < count) {
 
-		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000, 0xfffffffful);
+		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000);
 
 		if (!dmah) {
 			/* Set count correctly so we free the proper amount. */
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 577094fb1995..e68ebf92fa2a 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -47,8 +47,7 @@
 /**
  * \brief Allocate a PCI consistent memory block, for DMA.
  */
-drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align,
-				dma_addr_t maxaddr)
+drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
 {
 	drm_dma_handle_t *dmah;
 #if 1
@@ -63,11 +62,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 	if (align > size)
 		return NULL;
 
-	if (pci_set_dma_mask(dev->pdev, maxaddr) != 0) {
-		DRM_ERROR("Setting pci dma mask failed\n");
-		return NULL;
-	}
-
 	dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL);
 	if (!dmah)
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 701bfeac7f57..02607ed61399 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -123,7 +123,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	/* Program Hardware Status Page */
 	dev_priv->status_page_dmah =
-		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff);
+		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE);
 
 	if (!dev_priv->status_page_dmah) {
 		DRM_ERROR("Can not allocate hardware status page\n");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 917b8377ae28..c7f0cbec4e84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4708,7 +4708,7 @@ int i915_gem_init_phys_object(struct drm_device *dev,
 
 	phys_obj->id = id;
 
-	phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
+	phys_obj->handle = drm_pci_alloc(dev, size, 0);
 	if (!phys_obj->handle) {
 		ret = -ENOMEM;
 		goto kfree_obj;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 71dafb69cfeb..ffac157fb5b2 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1408,7 +1408,7 @@ extern int drm_ati_pcigart_cleanup(struct drm_device *dev,
 				   struct drm_ati_pcigart_info * gart_info);
 
 extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size,
-				       size_t align, dma_addr_t maxaddr);
+				       size_t align);
 extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 
-- 
cgit v1.2.3


From 7ad6848c7e81a603605fad3f3575841aab004eea Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 6 Jan 2010 20:37:01 -0800
Subject: ip: fix mc_loop checks for tunnels with multicast outer addresses

When we have L3 tunnels with different inner/outer families
(i.e. IPV4/IPV6) which use a multicast address as the outer tunnel
destination address, multicast packets will be loopbacked back to the
sending socket even if IP*_MULTICAST_LOOP is set to disabled.

The mc_loop flag is present in the family specific part of the socket
(e.g. the IPv4 or IPv4 specific part).  setsockopt sets the inner
family mc_loop flag. When the packet is pushed through the L3 tunnel
it will eventually be processed by the outer family which if different
will check the flag in a different part of the socket then it was set.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 16 ++++++++++++++++
 net/ipv4/ip_output.c  |  2 +-
 net/ipv6/ip6_output.c |  3 +--
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 85108cfbb1ae..d9a0e74d8923 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -326,6 +326,22 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
+static inline int sk_mc_loop(struct sock *sk)
+{
+	if (!sk)
+		return 1;
+	switch (sk->sk_family) {
+	case AF_INET:
+		return inet_sk(sk)->mc_loop;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return inet6_sk(sk)->mc_loop;
+#endif
+	}
+	__WARN();
+	return 1;
+}
+
 extern int	ip_call_ra_chain(struct sk_buff *skb);
 
 /*
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e34013a78ef4..3451799e3dbf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -254,7 +254,7 @@ int ip_mc_output(struct sk_buff *skb)
 	 */
 
 	if (rt->rt_flags&RTCF_MULTICAST) {
-		if ((!sk || inet_sk(sk)->mc_loop)
+		if (sk_mc_loop(sk)
 #ifdef CONFIG_IP_MROUTE
 		/* Small optimization: do not loopback not local frames,
 		   which returned after forwarding; they will be  dropped
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cd48801a8d6f..eb6d09728633 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -121,10 +121,9 @@ static int ip6_output2(struct sk_buff *skb)
 	skb->dev = dev;
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
-		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
-		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
+		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 		    ((mroute6_socket(dev_net(dev)) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
-- 
cgit v1.2.3


From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 7 Jan 2010 11:58:36 -0600
Subject: maccess,probe_kernel: Allow arch specific override
 probe_kernel_(read|write)

Some archs such as blackfin, would like to have an arch specific
probe_kernel_read() and probe_kernel_write() implementation which can
fall back to the generic implementation if no special operations are
needed.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/uaccess.h |  4 +++-
 mm/maccess.c            | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367d145e..d512d98dfb7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, void *src, size_t size);
+extern long __probe_kernel_read(void *dst, void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 9073695ff25f..4e348dbaecd7 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -14,7 +14,11 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_read(void *dst, void *src, size_t size)
+
+long __weak probe_kernel_read(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
+long __probe_kernel_read(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_write")));
+
+long __probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3


From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Jan 2010 11:58:37 -0600
Subject: kgdb: Fix kernel-doc format error in kgdb.h

linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description'

and fix typos in that file's kernel-doc comments.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6adcc297e354..19ec41a183f5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -29,8 +29,7 @@ struct pt_regs;
  *
  *	On some architectures it is required to skip a breakpoint
  *	exception when it occurs after a breakpoint has been removed.
- *	This can be implemented in the architecture specific portion of
- *	for kgdb.
+ *	This can be implemented in the architecture specific portion of kgdb.
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
@@ -65,7 +64,7 @@ struct uart_port;
 /**
  *	kgdb_breakpoint - compiled in breakpoint
  *
- *	This will be impelmented a static inline per architecture.  This
+ *	This will be implemented as a static inline per architecture.  This
  *	function is called by the kgdb core to execute an architecture
  *	specific trap to cause kgdb to enter the exception processing.
  *
@@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  *	@flags: Current IRQ state
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
+ *	and get them into a known state.  This should do what is needed
  *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
  *	the NMI approach is not used for rounding up all the CPUs. For example,
  *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
-- 
cgit v1.2.3


From 7970e677accb676f15e11468c60cb93ae477a513 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Thu, 7 Jan 2010 13:47:47 -0500
Subject: drm: Add eDP connector type

Add a new connector type for eDP (embedded displayport)

eDP is more or less the same as DP but there are some
cases when you might want to handle it separately.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 1 +
 include/drm/drm_mode.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 4a7bbdbedfc2..fa19c2b9820f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -158,6 +158,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
 	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
 	{ DRM_MODE_CONNECTOR_TV, "TV", 0 },
+	{ DRM_MODE_CONNECTOR_eDP, "Embedded DisplayPort", 0 },
 };
 
 static struct drm_prop_enum_list drm_encoder_enum_list[] =
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 43009bc2e757..bc4fdf27bd2e 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -160,6 +160,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_CONNECTOR_HDMIA	11
 #define DRM_MODE_CONNECTOR_HDMIB	12
 #define DRM_MODE_CONNECTOR_TV		13
+#define DRM_MODE_CONNECTOR_eDP		14
 
 struct drm_mode_get_connector {
 
-- 
cgit v1.2.3


From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:48 -0500
Subject: block: Fix discard alignment calculation and printing

Discard alignment reporting for partitions was incorrect.  Update to
match the algorithm used elsewhere.

The alignment can be negative (misaligned).  Fix format string
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 2 +-
 include/linux/blkdev.h | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b98173a8184..a41bcc8e140f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q)
 static inline int queue_sector_discard_alignment(struct request_queue *q,
 						 sector_t sector)
 {
-	return ((sector << 9) - q->limits.discard_alignment)
-		& (q->limits.discard_granularity - 1);
+	struct queue_limits *lim = &q->limits;
+	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+
+	return (lim->discard_granularity + lim->discard_alignment - alignment)
+		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-- 
cgit v1.2.3


From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:49 -0500
Subject: block: bdev_stack_limits wrapper

DM does not want to know about partition offsets.  Add a partition-aware
wrapper that DM can use when stacking block devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 22 ++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 127f82551855..5eeb9e0d256e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -639,6 +639,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
+/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t:	the stacking driver limits (top device)
+ * @bdev:  the component block_device (bottom)
+ * @start:  first data sector within component device
+ *
+ * Description:
+ *    Merges queue limits for a top device and a block_device.  Returns
+ *    0 if alignment didn't change.  Returns -1 if adding the bottom
+ *    device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+		      sector_t start)
+{
+	struct request_queue *bq = bdev_get_queue(bdev);
+
+	start += get_start_sect(bdev);
+
+	return blk_stack_limits(t, &bq->limits, start << 9);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a41bcc8e140f..5c8018977efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
+extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-- 
cgit v1.2.3


From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001
From: Kirill Afonshin <kirill_nnov@mail.ru>
Date: Fri, 8 Jan 2010 22:09:59 +0300
Subject: block: removed unused as_io_context

It isn't used anymore, since AS was deleted.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           |  5 -----
 include/linux/iocontext.h | 27 ---------------------------
 2 files changed, 32 deletions(-)

(limited to 'include')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..98e6bf61b0ac 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -39,8 +39,6 @@ int put_io_context(struct io_context *ioc)
 
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
-		if (ioc->aic && ioc->aic->dtor)
-			ioc->aic->dtor(ioc->aic);
 		cfq_dtor(ioc);
 		rcu_read_unlock();
 
@@ -76,8 +74,6 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
-		if (ioc->aic && ioc->aic->exit)
-			ioc->aic->exit(ioc->aic);
 		cfq_exit(ioc);
 
 	}
@@ -97,7 +93,6 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->ioprio = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
-		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a63235996309..78ef023227d4 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -4,32 +4,6 @@
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
-	spinlock_t lock;
-
-	void (*dtor)(struct as_io_context *aic); /* destructor */
-	void (*exit)(struct as_io_context *aic); /* called on task exit */
-
-	unsigned long state;
-	atomic_t nr_queued; /* queued reads & sync writes */
-	atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
-	/* IO History tracking */
-	/* Thinktime */
-	unsigned long last_end_request;
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-	/* Layout pattern */
-	unsigned int seek_samples;
-	sector_t last_request_pos;
-	u64 seek_total;
-	sector_t seek_mean;
-};
-
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
@@ -78,7 +52,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void *ioc_data;
-- 
cgit v1.2.3


From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 6 Jan 2010 15:45:55 -0800
Subject: genhd: overlapping variable definition

This fixes the sparse warning:
fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one
fs/ext4/super.c:2368:22: originally declared here

Using 'i' in a macro is dubious practice.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41af35f..9717081c75ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
 #define part_stat_read(part, field)					\
 ({									\
 	typeof((part)->dkstats->field) res = 0;				\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr((part)->dkstats, i)->field;		\
+	unsigned int _cpu;						\
+	for_each_possible_cpu(_cpu)					\
+		res += per_cpu_ptr((part)->dkstats, _cpu)->field;	\
 	res;								\
 })
 
-- 
cgit v1.2.3


From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Date: Fri, 8 Jan 2010 14:42:31 -0800
Subject: mm: make totalhigh_pages unsigned long

Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM
is set Removes redundant casts in printout messages

Signed-off-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               | 2 +-
 arch/mips/mm/init.c              | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c | 2 +-
 arch/mn10300/mm/init.c           | 3 +--
 arch/score/mm/init.c             | 2 +-
 arch/x86/mm/init_32.c            | 3 +--
 include/linux/highmem.h          | 2 +-
 7 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 52c40d155672..a04ffbbbe253 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -616,7 +616,7 @@ void __init mem_init(void)
 		"%dK data, %dK init, %luK highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
 		datasize >> 10, initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9e8d00389eef..1651942f7feb 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -424,7 +424,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f61c164d1e67..bc1297109cc5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -505,5 +505,5 @@ void __init mem_init(void)
 	       (num_physpages - tmp) << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index ec1420562dc7..dd27a9a35152 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -118,8 +118,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT - 10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
-	       );
+	       totalhigh_pages << (PAGE_SHIFT - 10));
 }
 
 /*
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 8c15b2c85d5a..dfaf458d6702 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -106,7 +106,7 @@ void __init mem_init(void)
 			ram << (PAGE_SHIFT-10), codesize >> 10,
 			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
 			initsize >> 10,
-			(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+			totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..9a0c258a86be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,8 +892,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..ab2cc20e21a5 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -46,7 +46,7 @@ void kmap_flush_unused(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-#define totalhigh_pages 0
+#define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
-- 
cgit v1.2.3


From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 8 Jan 2010 14:42:35 -0800
Subject: kmemcheck: make bitfield annotations truly no-ops when disabled

It turns out that even zero-sized struct members (int foo[0];) will affect
the struct layout, causing us in particular to lose 4 bytes in struct
sock.

This patch fixes the regression in CONFIG_KMEMCHECK=n case.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..08d7dc4ddf40 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address);
 
 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do {								\
+		int _n;							\
+									\
+		if (!ptr)						\
+			break;						\
+									\
+		_n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #else
 #define kmemcheck_enabled 0
 
@@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 	return true;
 }
 
-#endif /* CONFIG_KMEMCHECK */
-
-/*
- * Bitfield annotations
- *
- * How to use: If you have a struct using bitfields, for example
- *
- *     struct a {
- *             int x:8, y:8;
- *     };
- *
- * then this should be rewritten as
- *
- *     struct a {
- *             kmemcheck_bitfield_begin(flags);
- *             int x:8, y:8;
- *             kmemcheck_bitfield_end(flags);
- *     };
- *
- * Now the "flags_begin" and "flags_end" members may be used to refer to the
- * beginning and end, respectively, of the bitfield (and things like
- * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
- * fields should be annotated:
- *
- *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
- *     kmemcheck_annotate_bitfield(a, flags);
- *
- * Note: We provide the same definitions for both kmemcheck and non-
- * kmemcheck kernels. This makes it harder to introduce accidental errors. It
- * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
- */
-#define kmemcheck_bitfield_begin(name)	\
-	int name##_begin[0];
-
-#define kmemcheck_bitfield_end(name)	\
-	int name##_end[0];
+#define kmemcheck_bitfield_begin(name)
+#define kmemcheck_bitfield_end(name)
+#define kmemcheck_annotate_bitfield(ptr, name)	\
+	do {					\
+	} while (0)
 
-#define kmemcheck_annotate_bitfield(ptr, name)				\
-	do {								\
-		int _n;							\
-									\
-		if (!ptr)						\
-			break;						\
-									\
-		_n = (long) &((ptr)->name##_end)			\
-			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
-									\
-		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+#define kmemcheck_annotate_variable(var)	\
+	do {					\
 	} while (0)
 
-#define kmemcheck_annotate_variable(var)				\
-	do {								\
-		kmemcheck_mark_initialized(&(var), sizeof(var));	\
-	} while (0)							\
+#endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3


From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 8 Jan 2010 14:42:42 -0800
Subject: lib: add support for LZO-compressed kernels

This patch series adds generic support for creating and extracting
LZO-compressed kernel images, as well as support for using such images on
the x86 and ARM architectures, and support for creating and using
LZO-compressed initrd and initramfs images.

Russell King said:

: Testing on a Cortex A9 model:
: - lzo decompressor is 65% of the time gzip takes to decompress a kernel
: - lzo kernel is 9% larger than a gzip kernel
:
: which I'm happy to say confirms your figures when comparing the two.
:
: However, when comparing your new gzip code to the old gzip code:
: - new is 99% of the size of the old code
: - new takes 42% of the time to decompress than the old code
:
: What this means is that for a proper comparison, the results get even better:
: - lzo is 7.5% larger than the old gzip'd kernel image
: - lzo takes 28% of the time that the old gzip code took
:
: So the expense seems definitely worth the effort.  The only reason I
: can think of ever using gzip would be if you needed the additional
: compression (eg, because you have limited flash to store the image.)
:
: I would argue that the default for ARM should therefore be LZO.

This patch:

The lzo compressor is worse than gzip at compression, but faster at
extraction.  Here are some figures for an ARM board I'm working on:

Uncompressed size: 3.24Mo
gzip  1.61Mo 0.72s
lzo   1.75Mo 0.48s

So for a compression ratio that is still relatively close to gzip, it's
much faster to extract, at least in that case.

This part contains:
 - Makefile routine to support lzo compression
 - Fixes to the existing lzo compressor so that it can be used in
   compressed kernels
 - wrapper around the existing lzo1x_decompress, as it only extracts one
   block at a time, while we need to extract a whole file here
 - config dialog for kernel compression

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: Wu Zhangjin <wuzhangjin@gmail.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Russell King <rmk@arm.linux.org.uk>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/unlzo.h |  10 ++
 init/Kconfig                     |  18 +++-
 lib/decompress_unlzo.c           | 209 +++++++++++++++++++++++++++++++++++++++
 lib/lzo/lzo1x_decompress.c       |   9 +-
 scripts/Makefile.lib             |   5 +
 5 files changed, 244 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/decompress/unlzo.h
 create mode 100644 lib/decompress_unlzo.c

(limited to 'include')

diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
new file mode 100644
index 000000000000..987229752519
--- /dev/null
+++ b/include/linux/decompress/unlzo.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZO_H
+#define DECOMPRESS_UNLZO_H
+
+int unlzo(unsigned char *inbuf, int len,
+	int(*fill)(void*, unsigned int),
+	int(*flush)(void*, unsigned int),
+	unsigned char *output,
+	int *pos,
+	void(*error)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..d95ca7cd5d45 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_LZO
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -141,9 +144,8 @@ config KERNEL_GZIP
 	bool "Gzip"
 	depends on HAVE_KERNEL_GZIP
 	help
-	  The old and tried gzip compression. Its compression ratio is
-	  the poorest among the 3 choices; however its speed (both
-	  compression and decompression) is the fastest.
+	  The old and tried gzip compression. It provides a good balance
+	  between compression ratio and decompression speed.
 
 config KERNEL_BZIP2
 	bool "Bzip2"
@@ -164,6 +166,14 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_LZO
+	bool "LZO"
+	depends on HAVE_KERNEL_LZO
+	help
+	  Its compression ratio is the poorest among the 4. The kernel
+	  size is about about 10% bigger than gzip; however its speed
+	  (both compression and decompression) is the fastest.
+
 endchoice
 
 config SWAP
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..db521f45626e
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,209 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int obytes_processed = 0;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+		if (r != LZO_E_OK || dst_len != tmp) {
+			error("Compressed data violation");
+			goto exit_2;
+		}
+
+		obytes_processed += dst_len;
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return obytes_processed;
+}
+
+#define decompress unlzo
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd815ac2a50b..0fe48cd91ffa 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA    $@
 cmd_lzma = (cat $(filter-out FORCE,$^) | \
 	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
+
+quiet_cmd_lzo = LZO    $@
+cmd_lzo = (cat $(filter-out FORCE,$^) | \
+	lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
-- 
cgit v1.2.3


From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 8 Jan 2010 14:43:08 -0800
Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   9 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h |  12 ++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/gpio/adp5588-gpio.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a019b49ecc9b..1f1d88ae68d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -172,6 +172,15 @@ config GPIO_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp5520-gpio.
 
+config GPIO_ADP5588
+	tristate "ADP5588 I2C GPIO expander"
+	depends on I2C
+	help
+	  This option enables support for 18 GPIOs found
+	  on Analog Devices ADP5588 GPIO Expanders.
+	  To compile this driver as a module, choose M here: the module will be
+	  called adp5588-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 52fe4cf734c7..48687238edb1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
+obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
new file mode 100644
index 000000000000..afc097a16b33
--- /dev/null
+++ b/drivers/gpio/adp5588-gpio.c
@@ -0,0 +1,266 @@
+/*
+ * GPIO Chip driver for Analog Devices
+ * ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c/adp5588.h>
+
+#define DRV_NAME		"adp5588-gpio"
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
+struct adp5588_gpio {
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+	struct mutex lock;	/* protect cached dir, dat_out */
+	unsigned gpio_start;
+	uint8_t dat_out[3];
+	uint8_t dir[3];
+};
+
+static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Write Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
+		  & ADP_BIT(off));
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+			   dev->dat_out[bank]);
+	mutex_unlock(&dev->lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	int ret;
+	unsigned bank;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] &= ~ADP_BIT(off);
+	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	int ret;
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] |= bit;
+
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+				 dev->dat_out[bank]);
+	ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank,
+				 dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, revid;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->client = client;
+
+	gc = &dev->gpio_chip;
+	gc->direction_input = adp5588_gpio_direction_input;
+	gc->direction_output = adp5588_gpio_direction_output;
+	gc->get = adp5588_gpio_get_value;
+	gc->set = adp5588_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = MAXGPIO;
+	gc->label = client->name;
+	gc->owner = THIS_MODULE;
+
+	mutex_init(&dev->lock);
+
+
+	ret = adp5588_gpio_read(dev->client, DEV_ID);
+	if (ret < 0)
+		goto err;
+
+	revid = ret & ADP5588_DEVICE_ID_MASK;
+
+	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
+		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
+		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
+		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
+				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+			gc->base, gc->base + gc->ngpio - 1,
+			client->name, revid);
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5588_gpio_remove(struct i2c_client *client)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client,
+				      dev->gpio_chip.base, dev->gpio_chip.ngpio,
+				      pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "teardown failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static const struct i2c_device_id adp5588_gpio_id[] = {
+	{DRV_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id);
+
+static struct i2c_driver adp5588_gpio_driver = {
+	.driver = {
+		   .name = DRV_NAME,
+		   },
+	.probe = adp5588_gpio_probe,
+	.remove = __devexit_p(adp5588_gpio_remove),
+	.id_table = adp5588_gpio_id,
+};
+
+static int __init adp5588_gpio_init(void)
+{
+	return i2c_add_driver(&adp5588_gpio_driver);
+}
+
+module_init(adp5588_gpio_init);
+
+static void __exit adp5588_gpio_exit(void)
+{
+	i2c_del_driver(&adp5588_gpio_driver);
+}
+
+module_exit(adp5588_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5588 Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index fc5db826b48e..02c9af374741 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 };
 
+struct adp5588_gpio_platform_data {
+	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
 #endif
-- 
cgit v1.2.3


From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 10 Jan 2010 16:28:09 +0200
Subject: core, x86: make LIST_POISON less deadly

The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable
pointers in order to trap erronous use of freed list_heads.  Unfortunately
userspace can arrange for those pointers to actually be dereferencable,
potentially turning an oops to an expolit.

To avoid this allow architectures (currently x86_64 only) to override
the default values for these pointers with truly-undereferencable values.
This is easy on x86_64 as the virtual address space is large and contains
areas that cannot be mapped.

Other 64-bit architectures will likely find similar unmapped ranges.

[ingo: switch to 0xdead000000000000 as the unmapped area]
[ingo: add comments, cleanup]
[jaswinder: eliminate sparse warnings]

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig       |  5 +++++
 include/linux/poison.h | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bf1f1ac478c..cbcbfdee3ee0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ILLEGAL_POINTER_VALUE
+       hex
+       default 0 if X86_32
+       default 0xdead000000000000 if X86_64
+
 source "mm/Kconfig"
 
 config HIGHPTE
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 7fc194aef8c2..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -2,13 +2,25 @@
 #define _LINUX_POISON_H
 
 /********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
 /*
  * These are non-NULL pointers that will result in page faults
  * under normal circumstances, used to verify that nobody uses
  * non-initialized list entries.
  */
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
+#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
 /********** include/linux/timer.h **********/
 /*
-- 
cgit v1.2.3


From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Dec 2009 13:35:52 +0100
Subject: drbd: Allow online resizing of DRBD devices while peer not reachable
 (needs to be explicitly forced)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  5 ++---
 drivers/block/drbd/drbd_nl.c       | 17 +++++++++++------
 drivers/block/drbd/drbd_receiver.c |  4 ++--
 include/linux/drbd_nl.h            |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 79d8e22c4d0d..2bf3a6ef3684 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1371,10 +1371,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *,
-		struct drbd_backing_dev *);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3313901a4861..1292e0620663 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -541,7 +541,7 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev);
+	size = drbd_new_dev_size(mdev, mdev->ldev, force);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -596,7 +596,7 @@ out:
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
@@ -606,6 +606,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	m_size = drbd_get_max_capacity(bdev);
 
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
 	if (p_size && m_size) {
 		size = min_t(sector_t, p_size, m_size);
 	} else {
@@ -965,7 +970,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TO_SMALL;
 		goto force_diskless_dec;
@@ -1052,7 +1057,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1504,7 +1509,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, rs.resize_force);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e3716fadc6a5..f22a5283128a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2870,7 +2870,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
 		   drbd_get_capacity(mdev->this_bdev) &&
 		   mdev->state.disk >= D_OUTDATED &&
 		   mdev->state.conn < C_CONNECTED) {
@@ -2885,7 +2885,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 #undef min_not_zero
 
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev);
+	  dd = drbd_determin_dev_size(mdev, 0);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return FALSE;
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index db5721ad50d1..a4d82f895994 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, )
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
 )
 
 NL_PACKET(syncer_conf, 8,
-- 
cgit v1.2.3


From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 12 Jan 2010 10:09:07 +0100
Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e84f4733cb55..78962272338a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.6"
+#define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Jan 2010 11:14:44 +0900
Subject: libata: retry link resume if necessary

Interestingly, when SIDPR is used in ata_piix, writes to DET in
SControl sometimes get ignored leading to detection failure.  Update
sata_link_resume() such that it reads back SControl after clearing DET
and retry if it's not clear.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++-------
 include/linux/libata.h    |  3 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 22ff51bdbc8a..6728328f3bea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		     unsigned long deadline)
 {
+	int tries = ATA_LINK_RESUME_TRIES;
 	u32 scontrol, serror;
 	int rc;
 
 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
 		return rc;
 
-	scontrol = (scontrol & 0x0f0) | 0x300;
+	/*
+	 * Writes to SControl sometimes get ignored under certain
+	 * controllers (ata_piix SIDPR).  Make sure DET actually is
+	 * cleared.
+	 */
+	do {
+		scontrol = (scontrol & 0x0f0) | 0x300;
+		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+			return rc;
+		/*
+		 * Some PHYs react badly if SStatus is pounded
+		 * immediately after resuming.  Delay 200ms before
+		 * debouncing.
+		 */
+		msleep(200);
 
-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
-		return rc;
+		/* is SControl restored correctly? */
+		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+			return rc;
+	} while ((scontrol & 0xf0f) != 0x300 && --tries);
 
-	/* Some PHYs react badly if SStatus is pounded immediately
-	 * after resuming.  Delay 200ms before debouncing.
-	 */
-	msleep(200);
+	if ((scontrol & 0xf0f) != 0x300) {
+		ata_link_printk(link, KERN_ERR,
+				"failed to resume link (SControl %X)\n",
+				scontrol);
+		return 0;
+	}
+
+	if (tries < ATA_LINK_RESUME_TRIES)
+		ata_link_printk(link, KERN_WARNING,
+				"link resume succeeded after %d retries\n",
+				ATA_LINK_RESUME_TRIES - tries);
 
 	if ((rc = sata_link_debounce(link, params, deadline)))
 		return rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6a9c4ddd3d95..73112250862c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -354,6 +354,9 @@ enum {
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
+	/* sometimes resuming a link requires several retries */
+	ATA_LINK_RESUME_TRIES	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
-- 
cgit v1.2.3


From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Tue, 12 Jan 2010 17:39:16 +1100
Subject: lib: Introduce generic list_sort function

There are two copies of list_sort() in the tree already, one in the DRM
code, another in ubifs.  Now XFS needs this as well.  Create a generic
list_sort() function from the ubifs version and convert existing users
to it so we don't end up with yet another copy in the tree.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_modes.c |  90 ++------------------------------------
 fs/ubifs/gc.c               |  96 +----------------------------------------
 include/linux/list_sort.h   |  11 +++++
 lib/Makefile                |   2 +-
 lib/list_sort.c             | 102 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 182 deletions(-)
 create mode 100644 include/linux/list_sort.h
 create mode 100644 lib/list_sort.c

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6d81a02463a3..76d63394c776 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1,9 +1,4 @@
 /*
- * The list_sort function is (presumably) licensed under the GPL (see the
- * top level "COPYING" file for details).
- *
- * The remainder of this file is:
- *
  * Copyright © 1997-2003 by The XFree86 Project, Inc.
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
@@ -36,6 +31,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
@@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 
 /**
  * drm_mode_compare - compare modes for favorability
+ * @priv: unused
  * @lh_a: list_head for first mode
  * @lh_b: list_head for second mode
  *
@@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
  * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
  * positive if @lh_b is better than @lh_a.
  */
-static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b)
 {
 	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
 	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
@@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
 	return diff;
 }
 
-/* FIXME: what we don't have a list sort function? */
-/* list sort from Mark J Roberts (mjr@znex.org) */
-void list_sort(struct list_head *head,
-	       int (*cmp)(struct list_head *a, struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * drm_mode_sort - sort mode list
  * @mode_list: list to sort
@@ -975,7 +893,7 @@ void list_sort(struct list_head *head,
  */
 void drm_mode_sort(struct list_head *mode_list)
 {
-	list_sort(mode_list, drm_mode_compare);
+	list_sort(NULL, mode_list, drm_mode_compare);
 }
 EXPORT_SYMBOL(drm_mode_sort);
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/list_sort.h>
 #include "ubifs.h"
 
 /*
@@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
-/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
-		      int (*cmp)(void *priv, struct list_head *a,
-				 struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	if (list_empty(head))
-		return;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * data_nodes_cmp - compare 2 data nodes.
  * @priv: UBIFS file-system description object
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
new file mode 100644
index 000000000000..1a2df2efb771
--- /dev/null
+++ b/include/linux/list_sort.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b));
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 911b25aed1e7..3b0b4a696db9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..19d11e0bb958
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,102 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+/**
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * are equivalent, then it does not matter what this function returns.
+ */
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	if (list_empty(head))
+		return;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(priv, p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+EXPORT_SYMBOL(list_sort);
-- 
cgit v1.2.3


From f6a8c60960bbea378142d1fa1b3d111555ee41c7 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 29 Nov 2009 15:23:51 +0000
Subject: mtd: Really add ARM pismo support

(Commit 7cb777a3d71f9d1f7eb149c7a504d21f24219ae8 (mtd: add ARM pismo support)
intended to add this, but seems only to have patched the Makefile without
touching Kconfig or providing any code...)

The following patch adds support for PISMO modules found on ARM Ltd
development platforms.  These are MTD modules, and can have a
selection of SRAM, flash or DOC devices as described by an on-board
I2C EEPROM.

We support SRAM and NOR flash devices only by registering appropriate
conventional MTD platform devices as children of the 'pismo' device.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig  |  17 +++
 drivers/mtd/maps/pismo.c  | 320 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/pismo.h |  17 +++
 3 files changed, 354 insertions(+)
 create mode 100644 drivers/mtd/maps/pismo.c
 create mode 100644 include/linux/mtd/pismo.h

(limited to 'include')

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 4c364d44ad59..2de0cc823d60 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -549,4 +549,21 @@ config MTD_VMU
 	  To build this as a module select M here, the module will be called
 	  vmu-flash.
 
+config MTD_PISMO
+	tristate "MTD discovery driver for PISMO modules"
+	depends on I2C
+	depends on ARCH_VERSATILE
+	help
+	  This driver allows for discovery of PISMO modules - see
+	  <http://www.pismoworld.org/>.  These are small modules containing
+	  up to five memory devices (eg, SRAM, flash, DOC) described by an
+	  I2C EEPROM.
+
+	  This driver does not create any MTD maps itself; instead it
+	  creates MTD physmap and MTD SRAM platform devices.  If you
+	  enable this option, you should consider enabling MTD_PHYSMAP
+	  and/or MTD_PLATRAM according to the devices on your module.
+
+	  When built as a module, it will be called pismo.ko
+
 endmenu
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
new file mode 100644
index 000000000000..c48cad271f5d
--- /dev/null
+++ b/drivers/mtd/maps/pismo.c
@@ -0,0 +1,320 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * For ARM Realview and Versatile platforms
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/pismo.h>
+
+#define PISMO_NUM_CS	5
+
+struct pismo_cs_block {
+	u8	type;
+	u8	width;
+	__le16	access;
+	__le32	size;
+	u32	reserved[2];
+	char	device[32];
+} __packed;
+
+struct pismo_eeprom {
+	struct pismo_cs_block cs[PISMO_NUM_CS];
+	char	board[15];
+	u8	sum;
+} __packed;
+
+struct pismo_mem {
+	phys_addr_t base;
+	u32	size;
+	u16	access;
+	u8	width;
+	u8	type;
+};
+
+struct pismo_data {
+	struct i2c_client	*client;
+	void			(*vpp)(void *, int);
+	void			*vpp_data;
+	struct platform_device	*dev[PISMO_NUM_CS];
+};
+
+/* FIXME: set_vpp could do with a better calling convention */
+static struct pismo_data *vpp_pismo;
+static DEFINE_MUTEX(pismo_mutex);
+
+static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo) {
+		mutex_unlock(&pismo_mutex);
+		kfree(pismo);
+		return -EBUSY;
+	}
+	vpp_pismo = pismo;
+	mutex_unlock(&pismo_mutex);
+	return 0;
+}
+
+static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo == pismo)
+		vpp_pismo = NULL;
+	mutex_unlock(&pismo_mutex);
+}
+
+static void pismo_set_vpp(struct map_info *map, int on)
+{
+	struct pismo_data *pismo = vpp_pismo;
+
+	pismo->vpp(pismo->vpp_data, on);
+}
+/* end of hack */
+
+
+static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
+{
+	width &= 15;
+	if (width > 2)
+		return 0;
+	return 1 << width;
+}
+
+static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf,
+	u8 addr, size_t size)
+{
+	int ret;
+	struct i2c_msg msg[] = {
+		{
+			.addr = client->addr,
+			.len = sizeof(addr),
+			.buf = &addr,
+		}, {
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = size,
+			.buf = buf,
+		},
+	};
+
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+
+	return ret == ARRAY_SIZE(msg) ? size : -EIO;
+}
+
+static int __devinit pismo_add_device(struct pismo_data *pismo, int i,
+	struct pismo_mem *region, const char *name, void *pdata, size_t psize)
+{
+	struct platform_device *dev;
+	struct resource res = { };
+	phys_addr_t base = region.base;
+	int ret;
+
+	if (base == ~0)
+		return -ENXIO;
+
+	res.start = base;
+	res.end = base + region->size - 1;
+	res.flags = IORESOURCE_MEM;
+
+	dev = platform_device_alloc(name, i);
+	if (!dev)
+		return -ENOMEM;
+	dev->dev.parent = &pismo->client->dev;
+
+	do {
+		ret = platform_device_add_resources(dev, &res, 1);
+		if (ret)
+			break;
+
+		ret = platform_device_add_data(dev, pdata, psize);
+		if (ret)
+			break;
+
+		ret = platform_device_add(dev);
+		if (ret)
+			break;
+
+		pismo->dev[i] = dev;
+		return 0;
+	} while (0);
+
+	platform_device_put(dev);
+	return ret;
+}
+
+static int __devinit pismo_add_nor(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct physmap_flash_data data = {
+		.width = region->width,
+	};
+
+	if (pismo->vpp)
+		data.set_vpp = pismo_set_vpp;
+
+	return pismo_add_device(pismo, i, region, "physmap-flash",
+		&data, sizeof(data));
+}
+
+static int __devinit pismo_add_sram(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct platdata_mtd_ram data = {
+		.bankwidth = region->width,
+	};
+
+	return pismo_add_device(pismo, i, region, "mtd-ram",
+		&data, sizeof(data));
+}
+
+static void __devinit pismo_add_one(struct pismo_data *pismo, int i,
+	const struct pismo_cs_block *cs, phys_addr_t base)
+{
+	struct device *dev = &pismo->client->dev;
+	struct pismo_mem region;
+
+	region.base = base;
+	region.type = cs->type;
+	region.width = pismo_width_to_bytes(cs->width);
+	region.access = le16_to_cpu(cs->access);
+	region.size = le32_to_cpu(cs->size);
+
+	if (region.width == 0) {
+		dev_err(dev, "cs%u: bad width: %02x, ignoring\n", i, cs->width);
+		return;
+	}
+
+	/*
+	 * FIXME: may need to the platforms memory controller here, but at
+	 * the moment we assume that it has already been correctly setup.
+	 * The memory controller can also tell us the base address as well.
+	 */
+
+	dev_info(dev, "cs%u: %.32s: type %02x access %u00ps size %uK\n",
+		i, cs->device, region.type, region.access, region.size / 1024);
+
+	switch (region.type) {
+	case 0:
+		break;
+	case 1:
+		/* static DOC */
+		break;
+	case 2:
+		/* static NOR */
+		pismo_add_nor(pismo, i, &region);
+		break;
+	case 3:
+		/* static RAM */
+		pismo_add_sram(pismo, i, &region);
+		break;
+	}
+}
+
+static int __devexit pismo_remove(struct i2c_client *client)
+{
+	struct pismo_data *pismo = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
+		platform_device_unregister(pismo->dev[i]);
+
+	/* FIXME: set_vpp needs saner arguments */
+	pismo_setvpp_remove_fix(pismo);
+
+	kfree(pismo);
+
+	return 0;
+}
+
+static int __devinit pismo_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct pismo_pdata *pdata = client->dev.platform_data;
+	struct pismo_eeprom eeprom;
+	struct pismo_data *pismo;
+	int ret, i;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "functionality mismatch\n");
+		return -EIO;
+	}
+
+	pismo = kzalloc(sizeof(*pismo), GFP_KERNEL);
+	if (!pismo)
+		return -ENOMEM;
+
+	/* FIXME: set_vpp needs saner arguments */
+	ret = pismo_setvpp_probe_fix(pismo);
+	if (ret)
+		return ret;
+
+	pismo->client = client;
+	if (pdata) {
+		pismo->vpp = pdata->set_vpp;
+		pismo->vpp_data = pdata->vpp_data;
+	}
+	i2c_set_clientdata(client, pismo);
+
+	ret = pismo_eeprom_read(client, &eeprom, 0, sizeof(eeprom));
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading EEPROM: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(&client->dev, "%.15s board found\n", eeprom.board);
+
+	for (i = 0; i < ARRAY_SIZE(eeprom.cs); i++)
+		if (eeprom.cs[i].type != 0xff)
+			pismo_add_one(pismo, i, &eeprom.cs[i],
+				      pdata->cs_addrs[i]);
+
+	return 0;
+}
+
+static const struct i2c_device_id pismo_id[] = {
+	{ "pismo" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, pismo_id);
+
+static struct i2c_driver pismo_driver = {
+	.driver	= {
+		.name	= "pismo",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pismo_probe,
+	.remove		= __devexit_p(pismo_remove),
+	.id_table	= pismo_id,
+};
+
+static int __init pismo_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct pismo_cs_block) != 48);
+	BUILD_BUG_ON(sizeof(struct pismo_eeprom) != 256);
+
+	return i2c_add_driver(&pismo_driver);
+}
+module_init(pismo_init);
+
+static void __exit pismo_exit(void)
+{
+	i2c_del_driver(&pismo_driver);
+}
+module_exit(pismo_exit);
+
+MODULE_AUTHOR("Russell King <linux@arm.linux.org.uk>");
+MODULE_DESCRIPTION("PISMO memory driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/pismo.h b/include/linux/mtd/pismo.h
new file mode 100644
index 000000000000..8dfb7e1421c5
--- /dev/null
+++ b/include/linux/mtd/pismo.h
@@ -0,0 +1,17 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef __LINUX_MTD_PISMO_H
+#define __LINUX_MTD_PISMO_H
+
+struct pismo_pdata {
+	void			(*set_vpp)(void *, int);
+	void			*vpp_data;
+	phys_addr_t		cs_addrs[5];
+};
+
+#endif
-- 
cgit v1.2.3


From cd65c3c7d1081290b7365897c2290a84aa967d4d Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 13 Jan 2010 18:10:36 -0800
Subject: net: fix build erros with CONFIG_BUG=n, CONFIG_GENERIC_BUG=n

Fixed build errors introduced by commit 7ad6848c (ip: fix mc_loop
checks for tunnels with multicast outer addresses)

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d9a0e74d8923..fb63371c07a8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -338,7 +338,7 @@ static inline int sk_mc_loop(struct sock *sk)
 		return inet6_sk(sk)->mc_loop;
 #endif
 	}
-	__WARN();
+	WARN_ON(1);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 3f09ea4ecdcbcea05541f83e557d6ce2e56626d8 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 13 Jan 2010 22:28:40 +0100
Subject: drm/ttm: Add a swap_notify callback.

This is needed for a bugfix in the vmwgfx driver.
Drivers may have GPU bindings on buffers that core TTM is not aware of,
and TTM may view those buffers as ordinary system memory buffers.
Add a notifier to such drivers when TTM is about to move the buffer
contents out to swappable memory. The driver must then release any
private GPU bindings on those buffers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 3 +++
 include/drm/ttm/ttm_bo_driver.h | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2920f9a279e1..e10a04e7c4e9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1844,6 +1844,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	 * anyone tries to access a ttm page.
 	 */
 
+	if (bo->bdev->driver->swap_notify)
+		bo->bdev->driver->swap_notify(bo);
+
 	ret = ttm_tt_swapout(bo->ttm, bo->persistant_swap_storage);
 out:
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ff7664e0c3cd..4c4e0f8375b3 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -353,6 +353,11 @@ struct ttm_bo_driver {
 	/* notify the driver we are taking a fault on this BO
 	 * and have reserved it */
 	void (*fault_reserve_notify)(struct ttm_buffer_object *bo);
+
+	/**
+	 * notify the driver that we're about to swap out this bo
+	 */
+	void (*swap_notify) (struct ttm_buffer_object *bo);
 };
 
 /**
-- 
cgit v1.2.3


From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Dec 2009 06:58:56 -0500
Subject: Fix ACC_MODE() for real

commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather
nasty mess: definitions of ACC_MODE used to be different.  Fixed the
resulting breakage, converting them to variant that takes O_... value;
all callers have that and it actually simplifies life (see tomoyo part
of changes).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 2 +-
 include/linux/fs.h       | 2 +-
 security/tomoyo/tomoyo.c | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 1b26b1620664..d930f1856ed2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1620,7 +1620,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		open_flag |= O_DSYNC;
 
 	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(flag);
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca88f253..b1bcb275b596 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 #endif /* __KERNEL__ */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 8a00ade85166..2aceebf5f354 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -80,9 +80,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
-	 * '1' is the result of open_to_namei_flags(O_RDONLY).
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, 1);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
 static int tomoyo_path_truncate(struct path *path, loff_t length,
@@ -184,10 +183,6 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
-
-	if ((flags + 1) & O_ACCMODE)
-		flags++;
-	flags |= f->f_flags & (O_APPEND | O_TRUNC);
 	/* Don't check read permission here if called from do_execve(). */
 	if (current->in_execve)
 		return 0;
-- 
cgit v1.2.3


From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:55 +0800
Subject: lib: Introduce strnstr()

It differs strstr() in that it limits the length to be searched
in the first string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |  5 ++++-
 lib/string.c           | 27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a2a755..a716ee2a8adb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4ec50b8..a1cdcfcc42d0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
-- 
cgit v1.2.3


From c084ca704a3661bf77690a05bc6bd2c305d87c34 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Thu, 10 Dec 2009 19:56:45 +0800
Subject: ACPI: don't cond_resched if irq is disabled

commit 8bd108d adds preemption point after each opcode parse, then
a sleeping function called from invalid context bug was founded
during suspend/resume stage. this was fixed in commit abe1dfa by
don't cond_resched when irq_disabled. But recent commit 138d156 changes
the behaviour to don't cond_resched when in_atomic. This makes the
sleeping function called from invalid context bug happen again, which
is reported in http://lkml.org/lkml/2009/12/1/371.

This patch also fixes http://bugzilla.kernel.org/show_bug.cgi?id=14483

Reported-and-bisected-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-and-bisected-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Acked-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 9d7febde10a1..09469971472f 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -152,7 +152,7 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!in_atomic_preempt_off()) \
+		if (!in_atomic_preempt_off() && !irqs_disabled()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v1.2.3


From d00c362f1b0ff54161e0a42b4554ac621a9ef92d Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sat, 16 Jan 2010 01:04:04 -0800
Subject: ax25: netrom: rose: Fix timer oopses

Wrong ax25_cb refcounting in ax25_send_frame() and by its callers can
cause timer oopses (first reported with 2.6.29.6 kernel).

Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14905

Reported-by: Bernard Pidoux <bpidoux@free.fr>
Tested-by: Bernard Pidoux <bpidoux@free.fr>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netrom.h  |  2 ++
 net/ax25/ax25_out.c   |  6 ++++++
 net/netrom/nr_route.c | 11 ++++++-----
 net/rose/rose_link.c  |  8 ++++++++
 net/rose/rose_route.c |  5 +++++
 5 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netrom.h b/include/net/netrom.h
index 15696b1fd30f..ab170a60e7d3 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -132,6 +132,8 @@ static __inline__ void nr_node_put(struct nr_node *nr_node)
 static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh)
 {
 	if (atomic_dec_and_test(&nr_neigh->refcount)) {
+		if (nr_neigh->ax25)
+			ax25_cb_put(nr_neigh->ax25);
 		kfree(nr_neigh->digipeat);
 		kfree(nr_neigh);
 	}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index bf706f83a5c9..14912600ec57 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -92,6 +92,12 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2
 #endif
 	}
 
+	/*
+	 * There is one ref for the state machine; a caller needs
+	 * one more to put it back, just like with the existing one.
+	 */
+	ax25_cb_hold(ax25);
+
 	ax25_cb_add(ax25);
 
 	ax25->state = AX25_STATE_1;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index aacba76070fc..e2e2d33cafdf 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -843,12 +843,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 	dptr  = skb_push(skb, 1);
 	*dptr = AX25_P_NETROM;
 
-	ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev);
-	if (nr_neigh->ax25 && ax25s) {
-		/* We were already holding this ax25_cb */
+	ax25s = nr_neigh->ax25;
+	nr_neigh->ax25 = ax25_send_frame(skb, 256,
+					 (ax25_address *)dev->dev_addr,
+					 &nr_neigh->callsign,
+					 nr_neigh->digipeat, nr_neigh->dev);
+	if (ax25s)
 		ax25_cb_put(ax25s);
-	}
-	nr_neigh->ax25 = ax25s;
 
 	dev_put(dev);
 	ret = (nr_neigh->ax25 != NULL);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index bd86a63960ce..5ef5f6988a2e 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -101,13 +101,17 @@ static void rose_t0timer_expiry(unsigned long param)
 static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
@@ -120,13 +124,17 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 static int rose_link_up(struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 795c4b025e31..70a0b3b4b4d2 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -235,6 +235,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 
 	if ((s = rose_neigh_list) == rose_neigh) {
 		rose_neigh_list = rose_neigh->next;
+		if (rose_neigh->ax25)
+			ax25_cb_put(rose_neigh->ax25);
 		kfree(rose_neigh->digipeat);
 		kfree(rose_neigh);
 		return;
@@ -243,6 +245,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 	while (s != NULL && s->next != NULL) {
 		if (s->next == rose_neigh) {
 			s->next = rose_neigh->next;
+			if (rose_neigh->ax25)
+				ax25_cb_put(rose_neigh->ax25);
 			kfree(rose_neigh->digipeat);
 			kfree(rose_neigh);
 			return;
@@ -812,6 +816,7 @@ void rose_link_failed(ax25_cb *ax25, int reason)
 
 	if (rose_neigh != NULL) {
 		rose_neigh->ax25 = NULL;
+		ax25_cb_put(ax25);
 
 		rose_del_route_by_neigh(rose_neigh);
 		rose_kill_by_neigh(rose_neigh);
-- 
cgit v1.2.3


From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Fri, 15 Jan 2010 17:01:02 -0800
Subject: kfifo: fix kfifo_out_locked race bug

Fix a wrong optimization in include/linux/kfifo.h which could cause a race
in kfifo_out_locked.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-by: Johan Hovold <jhovold@gmail.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7c6b32a1421c..c4ac88b3c302 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 	ret = kfifo_out(fifo, to, n);
 
-	/*
-	 * optimization: if the FIFO is empty, set the indices to 0
-	 * so we don't wrap the next time
-	 */
-	if (kfifo_is_empty(fifo))
-		kfifo_reset(fifo);
-
 	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
-- 
cgit v1.2.3


From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 15 Jan 2010 17:01:11 -0800
Subject: tty.h: make tty_port_get() static inline

I get a few dozen of these warnings when using
  gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2):

In file included from mmotm-2010-0113-1217/init/do_mounts.c:5:
mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get':
mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static

so make the function static inline.

[akpm@linux-foundation.org: may as well convert tty_port_users() also]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ef3a2947b102..6abfcf5b5887 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
-extern inline struct tty_port *tty_port_get(struct tty_port *port)
+static inline struct tty_port *tty_port_get(struct tty_port *port)
 {
 	if (port)
 		kref_get(&port->kref);
@@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern int tty_port_open(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-extern inline int tty_port_users(struct tty_port *port)
+static inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
 }
-- 
cgit v1.2.3


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 +++++-----
 kernel/kfifo.c        |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c4ac88b3c302..6fb495ea956a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -104,15 +104,15 @@ union { \
 
 #undef __kfifo_initializer
 
-extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int kfifo_in(struct kfifo *fifo,
-				const unsigned char *from, unsigned int len);
+				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
-				unsigned char *to, unsigned int len);
+				void *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
-		const unsigned char *from, unsigned int n, spinlock_t *lock)
+		const void *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-	unsigned char *to, unsigned int n, spinlock_t *lock)
+	void *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  8 +++---
 kernel/kfifo.c        | 76 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6fb495ea956a..86ad50a900c8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
 
-extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int n);
+extern __must_check int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned *lenout);
 
-extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int n);
+extern __must_check int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned *lenout);
 
 /*
  * __kfifo_add_out internal helper function for updating the out offset
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  3 +++
 kernel/kfifo.c        | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 86ad50a900c8..7ad6d32dd673 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo,
 				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				void *to, unsigned int len);
+extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
+				void *to, unsigned int len, unsigned offset);
+
 
 /**
  * kfifo_reset - removes the entire FIFO contents
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3


From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: add kfifo_initialized

Simple inline that checks if kfifo_init() has been executed on a fifo.

This is useful for walking all per CPU fifos, when some of them might not
have been brought up yet.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7ad6d32dd673..c8618243ca5a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
 				void *to, unsigned int len, unsigned offset);
 
+/**
+ * kfifo_initialized - Check if kfifo is initialized.
+ * @fifo: fifo to check
+ * Return %true if FIFO is initialized, otherwise %false.
+ * Assumes the fifo was 0 before.
+ */
+static inline bool kfifo_initialized(struct kfifo *fifo)
+{
+	return fifo->buffer != 0;
+}
 
 /**
  * kfifo_reset - removes the entire FIFO contents
-- 
cgit v1.2.3


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c8618243ca5a..6f6c5f300af6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -67,7 +67,7 @@ struct kfifo {
 /**
  * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used inside struct or union declaration
  * Note2: the macro creates two objects:
@@ -91,7 +91,7 @@ union { \
 /**
  * DEFINE_KFIFO - macro to define and initialize a kfifo
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used for global and local kfifo data type variables
  * Note2: the macro creates two objects:
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3


From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 15 Jan 2010 17:01:22 -0800
Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2()

Add BUILD_BUG_ON_NOT_POWER_OF_2()

When code relies on a constant being a power of 2:

	#define FOO	512	/* must be a power of 2 */

it would be nice to be able to do:

	BUILD_BUG_ON(!is_power_of_2(FOO));

However applying an inline function does not result in a compile-time
constant that can be used with BUILD_BUG_ON(), so trying that gives
results in:

	error: bit-field '<anonymous>' width not an integer constant

As suggested by akpm, rather than monkeying around with is_power_of_2()
and risking gcc warts about constant expressions, just create a macro
BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: David Dillow <dave@thedillows.org>
Cc: "Robert P. J. Day" <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..328bca609b9b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,6 +734,10 @@ struct sysinfo {
 /* Force a compilation error if condition is constant and true */
 #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
-- 
cgit v1.2.3


From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:33 -0800
Subject: nommu: struct vm_region's vm_usage count need not be atomic

The vm_usage count field in struct vm_region does not need to be atomic as
it's only even modified whilst nommu_region_sem is write locked.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 mm/nommu.c               | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84d020bed083..80cfa78a8cf6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -122,7 +122,7 @@ struct vm_region {
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-	atomic_t	vm_usage;	/* region usage count */
+	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
 						* this region */
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..5e39294f8ea8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
 static void __put_nommu_region(struct vm_region *region)
 	__releases(nommu_region_sem)
 {
-	kenter("%p{%d}", region, atomic_read(&region->vm_usage));
+	kenter("%p{%d}", region, region->vm_usage);
 
 	BUG_ON(!nommu_region_tree.rb_node);
 
-	if (atomic_dec_and_test(&region->vm_usage)) {
+	if (--region->vm_usage == 0) {
 		if (region->vm_top > region->vm_start)
 			delete_nommu_region(region);
 		up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (!vma)
 		goto error_getting_vma;
 
-	atomic_set(&region->vm_usage, 1);
+	region->vm_usage = 1;
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			}
 
 			/* we've found a region we can share */
-			atomic_inc(&pregion->vm_usage);
+			pregion->vm_usage++;
 			vma->vm_region = pregion;
 			start = pregion->vm_start;
 			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 					vma->vm_region = NULL;
 					vma->vm_start = 0;
 					vma->vm_end = 0;
-					atomic_dec(&pregion->vm_usage);
+					pregion->vm_usage--;
 					pregion = NULL;
 					goto error_just_free;
 				}
@@ -1444,7 +1444,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* we're only permitted to split anonymous regions that have a single
 	 * owner */
 	if (vma->vm_file ||
-	    atomic_read(&vma->vm_region->vm_usage) != 1)
+	    vma->vm_region->vm_usage != 1)
 		return -ENOMEM;
 
 	if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1518,7 @@ static int shrink_vma(struct mm_struct *mm,
 
 	/* cut the backing region down to size */
 	region = vma->vm_region;
-	BUG_ON(atomic_read(&region->vm_usage) != 1);
+	BUG_ON(region->vm_usage != 1);
 
 	down_write(&nommu_region_sem);
 	delete_nommu_region(region);
-- 
cgit v1.2.3


From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:35 -0800
Subject: nommu: don't need get_unmapped_area() for NOMMU

get_unmapped_area() is unnecessary for NOMMU as no-one calls it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  7 +++++--
 mm/nommu.c               | 21 ---------------------
 mm/util.c                |  2 +-
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80cfa78a8cf6..36f96271306c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -205,10 +205,12 @@ struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+#ifdef CONFIG_MMU
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+#endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d4991be9d53..6f7bba93929b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int sysctl_max_map_count;
 
 #include <linux/aio.h>
 
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long flags);
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
 
 #if USE_SPLIT_PTLOCKS
 /*
@@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-
 #ifdef CONFIG_TRACING
 extern void
 __trace_special(void *__tr, void *__data,
diff --git a/mm/nommu.c b/mm/nommu.c
index d6dd656264a2..32be0cf51ba6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1760,27 +1760,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * ask for an unmapped area at which to create a mapping on a file
- */
-unsigned long get_unmapped_area(struct file *file, unsigned long addr,
-				unsigned long len, unsigned long pgoff,
-				unsigned long flags)
-{
-	unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-				  unsigned long, unsigned long);
-
-	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
-
-	if (!get_area)
-		return -ENOSYS;
-
-	return get_area(file, addr, len, pgoff, flags);
-}
-EXPORT_SYMBOL(get_unmapped_area);
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
 }
 EXPORT_SYMBOL(strndup_user);
 
-#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
-- 
cgit v1.2.3


From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:39 -0800
Subject: nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation.  The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

	fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
	ftruncate(fd, 32 * 1024);
	a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	munmap(a, 32 * 1024);
	ftruncate(fd, 16 * 1024);
	c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file.  Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'.  However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

	d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 31 +-------------------------
 include/linux/mm.h    |  1 +
 mm/nommu.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 266531343aae..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -121,35 +121,6 @@ add_error:
 	return ret;
 }
 
-/*****************************************************************************/
-/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
-				      size_t newsize, size_t size)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	down_write(&nommu_region_sem);
-
-	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      newsize >> PAGE_SHIFT,
-			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
-			      ) {
-		/* found one - only interested if it's shared out of the page
-		 * cache */
-		if (vma->vm_flags & VM_SHARED) {
-			up_write(&nommu_region_sem);
-			return -ETXTBSY; /* not quite true, but near enough */
-		}
-	}
-
-	up_write(&nommu_region_sem);
-	return 0;
-}
-
 /*****************************************************************************/
 /*
  *
@@ -169,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 
 	/* check that a decrease in size doesn't cut off any shared mappings */
 	if (newsize < size) {
-		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		ret = nommu_shrink_inode_mappings(inode, size, newsize);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..60c467bfbabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
+extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --git a/mm/nommu.c b/mm/nommu.c
index 32be0cf51ba6..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1914,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	mmput(mm);
 	return len;
 }
+
+/**
+ * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
+ * @inode: The inode to check
+ * @size: The current filesize of the inode
+ * @newsize: The proposed filesize of the inode
+ *
+ * Check the shared mappings on an inode on behalf of a shrinking truncate to
+ * make sure that that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * automatically grant mappings that are too large.
+ */
+int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
+				size_t newsize)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	struct vm_region *region;
+	pgoff_t low, high;
+	size_t r_size, r_top;
+
+	low = newsize >> PAGE_SHIFT;
+	high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	down_write(&nommu_region_sem);
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      low, high) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED) {
+			up_write(&nommu_region_sem);
+			return -ETXTBSY; /* not quite true, but near enough */
+		}
+	}
+
+	/* reduce any regions that overlap the dead zone - if in existence,
+	 * these will be pointed to by VMAs that don't overlap the dead zone
+	 *
+	 * we don't check for any regions that start beyond the EOF as there
+	 * shouldn't be any
+	 */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      0, ULONG_MAX) {
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		region = vma->vm_region;
+		r_size = region->vm_top - region->vm_start;
+		r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
+
+		if (r_top > newsize) {
+			region->vm_top -= r_top - newsize;
+			if (region->vm_end > region->vm_top)
+				region->vm_end = region->vm_top;
+		}
+	}
+
+	up_write(&nommu_region_sem);
+	return 0;
+}
-- 
cgit v1.2.3


From eda05a28ec52be40086400a1b606d211276f0e41 Mon Sep 17 00:00:00 2001
From: Harish Zunjarrao <harish.zunjarrao@qlogic.com>
Date: Tue, 12 Jan 2010 12:59:50 -0800
Subject: [SCSI] fc-transport: Use packed modifier for fc_bsg_request
 structure.

The 32bit kernel does not add padding bytes in the fc_bsg_request structure
whereas the 64bit kernel adds padding bytes in the fc_bsg_request structure.
Due to this, structure elements gets mismatched with 32bit application and
64bit kernel.To resolve this, used packed modifier to avoid adding padding bytes.
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/scsi_bsg_fc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_bsg_fc.h b/include/scsi/scsi_bsg_fc.h
index a4b233318179..91a4e4ff9a9b 100644
--- a/include/scsi/scsi_bsg_fc.h
+++ b/include/scsi/scsi_bsg_fc.h
@@ -292,7 +292,7 @@ struct fc_bsg_request {
 		struct fc_bsg_rport_els		r_els;
 		struct fc_bsg_rport_ct		r_ct;
 	} rqst_data;
-};
+} __attribute__((packed));
 
 
 /* response (request sense data) structure of the sg_io_v4 */
-- 
cgit v1.2.3


From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:05:00 +0000
Subject: mfd: Correct WM835x ISINK ramp time defines

The constants used to specify ISINK ramp times for WM835x had the
wrong shifts so that the on times applied to the off ramp and vice
versa. The masks for the bitfields are correct.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index be3264e286e0..e786fe9841ef 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -666,20 +666,20 @@
 #define WM8350_ISINK_FLASH_DUR_64MS		(1 << 8)
 #define WM8350_ISINK_FLASH_DUR_96MS		(2 << 8)
 #define WM8350_ISINK_FLASH_DUR_1024MS		(3 << 8)
-#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 4)
-#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 4)
-#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 4)
-#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 0)
+#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 4)
 
 /*
  * Regulator Interrupts.
-- 
cgit v1.2.3


From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Wed, 6 Jan 2010 13:51:48 +0100
Subject: mfd: tmio_mmc hardware abstraction for CNF area

This patch abstracts out the CNF area code from tmio_mmc which
is not present in all hardware that can use this driver. This
is required so that we can support non-toshiba based hardware.

ASIC3 support by Philipp Zabel

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile        |   6 +--
 drivers/mfd/asic3.c         |  40 ++++++++++++---
 drivers/mfd/t7l66xb.c       |  55 +++++++++++++-------
 drivers/mfd/tc6387xb.c      | 119 ++++++++++++++++++++++++++++++++------------
 drivers/mfd/tc6393xb.c      |  56 +++++++++++++++++----
 drivers/mfd/tmio_core.c     |  52 +++++++++++++++++++
 drivers/mmc/host/tmio_mmc.c |  59 +++++++---------------
 drivers/mmc/host/tmio_mmc.h |  46 +++--------------
 include/linux/mfd/tmio.h    |  39 +++++++++++++++
 9 files changed, 323 insertions(+), 149 deletions(-)
 create mode 100644 drivers/mfd/tmio_core.c

(limited to 'include')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ca2f2c4ff05e..8f0d18409ede 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -11,9 +11,9 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
-obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
-obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
-obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
+obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index e22128c3e9a8..95c1e6bd1729 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -80,6 +80,7 @@ struct asic3 {
 	u16 irq_bothedge[4];
 	struct gpio_chip gpio;
 	struct device *dev;
+	void __iomem *tmio_cnf;
 
 	struct asic3_clk clocks[ARRAY_SIZE(asic3_clk_init)];
 };
@@ -685,8 +686,24 @@ static struct mfd_cell asic3_cell_ds1wm = {
 	.resources     = ds1wm_resources,
 };
 
+static void asic3_mmc_pwr(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_pwr(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
+static void asic3_mmc_clk_div(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_clk_div(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
 static struct tmio_mmc_data asic3_mmc_data = {
-	.hclk = 24576000,
+	.hclk           = 24576000,
+	.set_pwr        = asic3_mmc_pwr,
+	.set_clk_div    = asic3_mmc_clk_div,
 };
 
 static struct resource asic3_mmc_resources[] = {
@@ -695,11 +712,6 @@ static struct resource asic3_mmc_resources[] = {
 		.end   = ASIC3_SD_CTRL_BASE + 0x3ff,
 		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start = ASIC3_SD_CONFIG_BASE,
-		.end   = ASIC3_SD_CONFIG_BASE + 0x1ff,
-		.flags = IORESOURCE_MEM,
-	},
 	{
 		.start = 0,
 		.end   = 0,
@@ -743,6 +755,10 @@ static int asic3_mmc_enable(struct platform_device *pdev)
 	asic3_set_register(asic, ASIC3_OFFSET(SDHWCTRL, SDCONF),
 			   ASIC3_SDHWCTRL_SDPWR, 1);
 
+	/* ASIC3_SD_CTRL_BASE assumes 32-bit addressing, TMIO is 16-bit */
+	tmio_core_mmc_enable(asic->tmio_cnf, 1 - asic->bus_shift,
+			     ASIC3_SD_CTRL_BASE >> 1);
+
 	return 0;
 }
 
@@ -797,10 +813,15 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_cell_ds1wm.data_size = sizeof(asic3_cell_ds1wm);
 
 	/* MMC */
+	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
+				 mem_sdio->start, 0x400 >> asic->bus_shift);
+	if (!asic->tmio_cnf) {
+		ret = -ENOMEM;
+		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
+		goto out;
+	}
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
-	asic3_mmc_resources[1].start >>= asic->bus_shift;
-	asic3_mmc_resources[1].end   >>= asic->bus_shift;
 
 	asic3_cell_mmc.platform_data = &asic3_cell_mmc;
 	asic3_cell_mmc.data_size = sizeof(asic3_cell_mmc);
@@ -820,7 +841,10 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 static void asic3_mfd_remove(struct platform_device *pdev)
 {
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
 	mfd_remove_devices(&pdev->dev);
+	iounmap(asic->tmio_cnf);
 }
 
 /* Core */
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 0a255c1f1ce7..bcf4687d4af5 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -38,6 +38,19 @@ enum {
 	T7L66XB_CELL_MMC,
 };
 
+static const struct resource t7l66xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end	= 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_T7L66XB_MMC,
+		.end	= IRQ_T7L66XB_MMC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 #define SCR_REVID	0x08		/* b Revision ID	*/
 #define SCR_IMR		0x42		/* b Interrupt Mask	*/
 #define SCR_DEV_CTL	0xe0		/* b Device control	*/
@@ -83,6 +96,9 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 
@@ -106,28 +122,28 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 	return 0;
 }
 
+static void t7l66xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(t7l66xb->scr + 0x200, 0, state);
+}
+
+static void t7l66xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(t7l66xb->scr + 0x200, 0, state);
+}
+
 /*--------------------------------------------------------------------------*/
 
 static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
-};
-
-static const struct resource t7l66xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end	= 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end	= 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = IRQ_T7L66XB_MMC,
-		.end	= IRQ_T7L66XB_MMC,
-		.flags = IORESOURCE_IRQ,
-	},
+	.set_pwr = t7l66xb_mmc_pwr,
+	.set_clk_div = t7l66xb_mmc_clk_div,
 };
 
 static const struct resource t7l66xb_nand_resources[] = {
@@ -282,6 +298,9 @@ static int t7l66xb_resume(struct platform_device *dev)
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 3280ab33f88a..5c7f04343d5c 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -22,28 +22,52 @@ enum {
 	TC6387XB_CELL_MMC,
 };
 
+struct tc6387xb {
+	void __iomem *scr;
+	struct clk *clk32k;
+	struct resource rscr;
+};
+
+static struct resource tc6387xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end   = 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*--------------------------------------------------------------------------*/
+
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_resume(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
@@ -53,12 +77,32 @@ static int tc6387xb_resume(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
+static void tc6387xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6387xb->scr + 0x200, 0, state);
+}
+
+static void tc6387xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6387xb->scr + 0x200, 0, state);
+}
+
+
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
+
+	tmio_core_mmc_enable(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
 
 	return 0;
 }
@@ -66,36 +110,20 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
-/*--------------------------------------------------------------------------*/
-
 static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
+	.set_pwr = tc6387xb_mmc_pwr,
+	.set_clk_div = tc6387xb_mmc_clk_div,
 };
 
-static struct resource tc6387xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end   = 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end   = 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0,
-		.end   = 0,
-		.flags = IORESOURCE_IRQ,
-	},
-};
+/*--------------------------------------------------------------------------*/
 
 static struct mfd_cell tc6387xb_cells[] = {
 	[TC6387XB_CELL_MMC] = {
@@ -111,8 +139,9 @@ static struct mfd_cell tc6387xb_cells[] = {
 static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
-	struct resource *iomem;
+	struct resource *iomem, *rscr;
 	struct clk *clk32k;
+	struct tc6387xb *tc6387xb;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -120,18 +149,40 @@ static int tc6387xb_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
+	tc6387xb = kzalloc(sizeof *tc6387xb, GFP_KERNEL);
+	if (!tc6387xb)
+		return -ENOMEM;
+
 	ret  = platform_get_irq(dev, 0);
 	if (ret >= 0)
 		irq = ret;
 	else
-		goto err_resource;
+		goto err_no_irq;
 
 	clk32k = clk_get(&dev->dev, "CLK_CK32K");
 	if (IS_ERR(clk32k)) {
 		ret = PTR_ERR(clk32k);
+		goto err_no_clk;
+	}
+
+	rscr = &tc6387xb->rscr;
+	rscr->name = "tc6387xb-core";
+	rscr->start = iomem->start;
+	rscr->end = iomem->start + 0xff;
+	rscr->flags = IORESOURCE_MEM;
+
+	ret = request_resource(iomem, rscr);
+	if (ret)
 		goto err_resource;
+
+	tc6387xb->scr = ioremap(rscr->start, rscr->end - rscr->start + 1);
+	if (!tc6387xb->scr) {
+		ret = -ENOMEM;
+		goto err_ioremap;
 	}
-	platform_set_drvdata(dev, clk32k);
+
+	tc6387xb->clk32k = clk32k;
+	platform_set_drvdata(dev, tc6387xb);
 
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
@@ -149,8 +200,13 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
-	clk_put(clk32k);
+err_ioremap:
+	release_resource(&tc6387xb->rscr);
 err_resource:
+	clk_put(clk32k);
+err_no_clk:
+err_no_irq:
+	kfree(tc6387xb);
 	return ret;
 }
 
@@ -195,3 +251,4 @@ MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton");
 MODULE_ALIAS("platform:tc6387xb");
+
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1429a7341a9a..4bc5a08a2b09 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,10 +136,6 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-static struct tmio_mmc_data tc6393xb_mmc_data = {
-	.hclk = 24000000,
-};
-
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -164,11 +160,6 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 		.end	= 0x9ff,
 		.flags	= IORESOURCE_MEM,
 	},
-	{
-		.start	= 0x200,
-		.end	= 0x2ff,
-		.flags	= IORESOURCE_MEM,
-	},
 	{
 		.start	= IRQ_TC6393_MMC,
 		.end	= IRQ_TC6393_MMC,
@@ -346,6 +337,50 @@ int tc6393xb_lcd_mode(struct platform_device *fb,
 }
 EXPORT_SYMBOL(tc6393xb_lcd_mode);
 
+static int tc6393xb_mmc_enable(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_enable(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static int tc6393xb_mmc_resume(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_resume(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static void tc6393xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6393xb->scr + 0x200, 0, state);
+}
+
+static void tc6393xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6393xb->scr + 0x200, 0, state);
+}
+
+static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+	.set_pwr = tc6393xb_mmc_pwr,
+	.set_clk_div = tc6393xb_mmc_clk_div,
+};
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -355,6 +390,8 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.enable = tc6393xb_mmc_enable,
+		.resume = tc6393xb_mmc_resume,
 		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
@@ -836,3 +873,4 @@ MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov and Dirk Opfer");
 MODULE_DESCRIPTION("tc6393xb Toshiba Mobile IO Controller");
 MODULE_ALIAS("platform:tc6393xb");
+
diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c
new file mode 100644
index 000000000000..eddc19ae464b
--- /dev/null
+++ b/drivers/mfd/tmio_core.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2009 Ian Molton <spyro@f2s.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mfd/tmio.h>
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base)
+{
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	/* Disable SD power during suspend */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_3, 0x01);
+
+	/* The below is required but why? FIXME */
+	sd_config_write8(cnf, shift, CNF_STOP_CLK_CTL, 0x1f);
+
+	/* Power down SD bus */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, 0x00);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_enable);
+
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base)
+{
+
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_resume);
+
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, state ? 0x02 : 0x00);
+}
+EXPORT_SYMBOL(tmio_core_mmc_pwr);
+
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_SD_CLK_MODE, state ? 1 : 0);
+}
+EXPORT_SYMBOL(tmio_core_mmc_clk_div);
+
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 7cccc8523747..e22c3fa3516a 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -46,7 +46,9 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 		clk |= 0x100;
 	}
 
-	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk>>22) & 1);
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
@@ -427,12 +429,13 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
-
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -485,21 +488,15 @@ static int tmio_mmc_resume(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
 	/* Tell the MFD core we are ready to be enabled */
-	if (cell->enable) {
-		ret = cell->enable(dev);
+	if (cell->resume) {
+		ret = cell->resume(dev);
 		if (ret)
 			goto out;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	mmc_resume_host(mmc);
 
 out:
@@ -514,17 +511,16 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct tmio_mmc_data *pdata;
-	struct resource *res_ctl, *res_cnf;
+	struct resource *res_ctl;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
 	int ret = -EINVAL;
 
-	if (dev->num_resources != 3)
+	if (dev->num_resources != 2)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf)
+	if (!res_ctl)
 		goto out;
 
 	pdata = cell->driver_data;
@@ -539,8 +535,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->pdev = dev;
 	platform_set_drvdata(dev, mmc);
 
+	host->set_pwr = pdata->set_pwr;
+	host->set_clk_div = pdata->set_clk_div;
+
 	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
 	host->bus_shift = resource_size(res_ctl) >> 10;
 
@@ -548,10 +548,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (!host->ctl)
 		goto host_free;
 
-	host->cnf = ioremap(res_cnf->start, resource_size(res_cnf));
-	if (!host->cnf)
-		goto unmap_ctl;
-
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 	mmc->f_max = pdata->hclk;
@@ -562,23 +558,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (cell->enable) {
 		ret = cell->enable(dev);
 		if (ret)
-			goto unmap_cnf;
+			goto unmap_ctl;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
-	/* Disable SD power during suspend */
-	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
-
-	/* The below is required but why? FIXME */
-	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
-
-	/* Power down SD bus*/
-	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
-
 	tmio_mmc_clk_stop(host);
 	reset(host);
 
@@ -586,14 +568,14 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (ret >= 0)
 		host->irq = ret;
 	else
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	mmc_add_host(mmc);
 
@@ -605,8 +587,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	return 0;
 
-unmap_cnf:
-	iounmap(host->cnf);
 unmap_ctl:
 	iounmap(host->ctl);
 host_free:
@@ -626,7 +606,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 		mmc_remove_host(mmc);
 		free_irq(host->irq, host);
 		iounmap(host->ctl);
-		iounmap(host->cnf);
 		mmc_free_host(mmc);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9fa998594974..692dc23363b9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -11,26 +11,6 @@
 
 #include <linux/highmem.h>
 
-#define CNF_CMD     0x04
-#define CNF_CTL_BASE   0x10
-#define CNF_INT_PIN  0x3d
-#define CNF_STOP_CLK_CTL 0x40
-#define CNF_GCLK_CTL 0x41
-#define CNF_SD_CLK_MODE 0x42
-#define CNF_PIN_STATUS 0x44
-#define CNF_PWR_CTL_1 0x48
-#define CNF_PWR_CTL_2 0x49
-#define CNF_PWR_CTL_3 0x4a
-#define CNF_CARD_DETECT_MODE 0x4c
-#define CNF_SD_SLOT 0x50
-#define CNF_EXT_GCLK_CTL_1 0xf0
-#define CNF_EXT_GCLK_CTL_2 0xf1
-#define CNF_EXT_GCLK_CTL_3 0xf9
-#define CNF_SD_LED_EN_1 0xfa
-#define CNF_SD_LED_EN_2 0xfe
-
-#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
-
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
 #define CTL_STOP_INTERNAL_ACTION 0x08
@@ -110,7 +90,6 @@
 
 
 struct tmio_mmc_host {
-	void __iomem *cnf;
 	void __iomem *ctl;
 	unsigned long bus_shift;
 	struct mmc_command      *cmd;
@@ -119,10 +98,16 @@ struct tmio_mmc_host {
 	struct mmc_host         *mmc;
 	int                     irq;
 
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
+
+	struct platform_device *pdev;
 };
 
 #include <linux/io.h>
@@ -163,25 +148,6 @@ static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
-		u8 val)
-{
-	writeb(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
-}
-
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 6b9c5d06690c..9cb1834deffa 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,6 +2,8 @@
 #define MFD_TMIO_H
 
 #include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -18,11 +20,48 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+#define CNF_CMD     0x04
+#define CNF_CTL_BASE   0x10
+#define CNF_INT_PIN  0x3d
+#define CNF_STOP_CLK_CTL 0x40
+#define CNF_GCLK_CTL 0x41
+#define CNF_SD_CLK_MODE 0x42
+#define CNF_PIN_STATUS 0x44
+#define CNF_PWR_CTL_1 0x48
+#define CNF_PWR_CTL_2 0x49
+#define CNF_PWR_CTL_3 0x4a
+#define CNF_CARD_DETECT_MODE 0x4c
+#define CNF_SD_SLOT 0x50
+#define CNF_EXT_GCLK_CTL_1 0xf0
+#define CNF_EXT_GCLK_CTL_2 0xf1
+#define CNF_EXT_GCLK_CTL_3 0xf9
+#define CNF_SD_LED_EN_1 0xfa
+#define CNF_SD_LED_EN_2 0xfe
+
+#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
+
+#define sd_config_write8(base, shift, reg, val) \
+	tmio_iowrite8((val), (base) + ((reg) << (shift)))
+#define sd_config_write16(base, shift, reg, val) \
+	tmio_iowrite16((val), (base) + ((reg) << (shift)))
+#define sd_config_write32(base, shift, reg, val) \
+	do { \
+		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
+		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
+	} while (0)
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
+
 /*
  * data for the MMC controller
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From ec51b7f538c440bfa5a4d538133c659071c02155 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 19 Jan 2010 00:27:58 -0800
Subject: Input: ad7879 - support auxiliary GPIOs via gpiolib

Drop the simple fancy sysfs hooks for the aux GPIOs and expose these via
the gpiolib interface so that other drivers can use them.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ad7879.c | 197 ++++++++++++++++++++++++++-----------
 include/linux/spi/ad7879.h         |  12 ++-
 2 files changed, 149 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index c21e6d3a8844..794d070c6900 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -47,6 +47,7 @@
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/ad7879.h>
 
@@ -132,7 +133,9 @@ struct ad7879 {
 	struct input_dev	*input;
 	struct work_struct	work;
 	struct timer_list	timer;
-
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip	gc;
+#endif
 	struct mutex		mutex;
 	unsigned		disabled:1;	/* P: mutex */
 
@@ -150,11 +153,9 @@ struct ad7879 {
 	u8			median;
 	u16			x_plate_ohms;
 	u16			pressure_max;
-	u16			gpio_init;
 	u16			cmd_crtl1;
 	u16			cmd_crtl2;
 	u16			cmd_crtl3;
-	unsigned		gpio:1;
 };
 
 static int ad7879_read(bus_device *, u8);
@@ -237,24 +238,6 @@ static irqreturn_t ad7879_irq(int irq, void *handle)
 
 static void ad7879_setup(struct ad7879 *ts)
 {
-	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
-			AD7879_XPLUS_BIT |
-			AD7879_Z2_BIT |
-			AD7879_Z1_BIT |
-			AD7879_TEMPMASK_BIT |
-			AD7879_AUXVBATMASK_BIT |
-			AD7879_GPIOALERTMASK_BIT;
-
-	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
-			AD7879_AVG(ts->averaging) |
-			AD7879_MFS(ts->median) |
-			AD7879_FCD(ts->first_conversion_delay) |
-			ts->gpio_init;
-
-	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
-			AD7879_ACQ(ts->acquisition_time) |
-			AD7879_TMR(ts->pen_down_acc_interval);
-
 	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
 	ad7879_write(ts->bus, AD7879_REG_CTRL3, ts->cmd_crtl3);
 	ad7879_write(ts->bus, AD7879_REG_CTRL1, ts->cmd_crtl1);
@@ -324,48 +307,132 @@ static ssize_t ad7879_disable_store(struct device *dev,
 
 static DEVICE_ATTR(disable, 0664, ad7879_disable_show, ad7879_disable_store);
 
-static ssize_t ad7879_gpio_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static struct attribute *ad7879_attributes[] = {
+	&dev_attr_disable.attr,
+	NULL
+};
+
+static const struct attribute_group ad7879_attr_group = {
+	.attrs = ad7879_attributes,
+};
+
+#ifdef CONFIG_GPIOLIB
+static int ad7879_gpio_direction_input(struct gpio_chip *chip,
+					unsigned gpio)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	return sprintf(buf, "%u\n", ts->gpio);
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIODIR | AD7879_GPIOPOL;
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
 }
 
-static ssize_t ad7879_gpio_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static int ad7879_gpio_direction_output(struct gpio_chip *chip,
+					unsigned gpio, int level)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
-	unsigned long val;
-	int error;
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	error = strict_strtoul(buf, 10, &val);
-	if (error)
-		return error;
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 &= ~AD7879_GPIODIR;
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIOPOL;
+	if (level)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
+
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
+}
+
+static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	u16 val;
 
 	mutex_lock(&ts->mutex);
-	ts->gpio = !!val;
-	error = ad7879_write(ts->bus, AD7879_REG_CTRL2,
-			   ts->gpio ?
-				ts->cmd_crtl2 & ~AD7879_GPIO_DATA :
-				ts->cmd_crtl2 | AD7879_GPIO_DATA);
+	val = ad7879_read(ts->bus, AD7879_REG_CTRL2);
 	mutex_unlock(&ts->mutex);
 
-	return error ? : count;
+	return !!(val & AD7879_GPIO_DATA);
 }
 
-static DEVICE_ATTR(gpio, 0664, ad7879_gpio_show, ad7879_gpio_store);
+static void ad7879_gpio_set_value(struct gpio_chip *chip,
+				  unsigned gpio, int value)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
 
-static struct attribute *ad7879_attributes[] = {
-	&dev_attr_disable.attr,
-	&dev_attr_gpio.attr,
-	NULL
-};
+	mutex_lock(&ts->mutex);
+	if (value)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
 
-static const struct attribute_group ad7879_attr_group = {
-	.attrs = ad7879_attributes,
-};
+	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+}
+
+static int __devinit ad7879_gpio_add(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret = 0;
+
+	if (pdata->gpio_export) {
+		ts->gc.direction_input = ad7879_gpio_direction_input;
+		ts->gc.direction_output = ad7879_gpio_direction_output;
+		ts->gc.get = ad7879_gpio_get_value;
+		ts->gc.set = ad7879_gpio_set_value;
+		ts->gc.can_sleep = 1;
+		ts->gc.base = pdata->gpio_base;
+		ts->gc.ngpio = 1;
+		ts->gc.label = "AD7879-GPIO";
+		ts->gc.owner = THIS_MODULE;
+		ts->gc.dev = dev;
+
+		ret = gpiochip_add(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to register gpio %d\n",
+				ts->gc.base);
+	}
+
+	return ret;
+}
+
+/*
+ * We mark ad7879_gpio_remove inline so there is a chance the code
+ * gets discarded when not needed. We can't do __devinit/__devexit
+ * markup since it is used in both probe and remove methods.
+ */
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret;
+
+	if (pdata->gpio_export) {
+		ret = gpiochip_remove(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to remove gpio %d\n",
+				ts->gc.base);
+	}
+}
+#else
+static inline int ad7879_gpio_add(struct device *dev)
+{
+	return 0;
+}
+
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+}
+#endif
 
 static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 {
@@ -403,12 +470,6 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	ts->pen_down_acc_interval = pdata->pen_down_acc_interval;
 	ts->median = pdata->median;
 
-	if (pdata->gpio_output)
-		ts->gpio_init = AD7879_GPIO_EN |
-				(pdata->gpio_default ? 0 : AD7879_GPIO_DATA);
-	else
-		ts->gpio_init = AD7879_GPIO_EN | AD7879_GPIODIR;
-
 	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&bus->dev));
 
 	input_dev->name = "AD7879 Touchscreen";
@@ -446,6 +507,23 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 		goto err_free_mem;
 	}
 
+	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
+			AD7879_XPLUS_BIT |
+			AD7879_Z2_BIT |
+			AD7879_Z1_BIT |
+			AD7879_TEMPMASK_BIT |
+			AD7879_AUXVBATMASK_BIT |
+			AD7879_GPIOALERTMASK_BIT;
+
+	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
+			AD7879_AVG(ts->averaging) |
+			AD7879_MFS(ts->median) |
+			AD7879_FCD(ts->first_conversion_delay);
+
+	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
+			AD7879_ACQ(ts->acquisition_time) |
+			AD7879_TMR(ts->pen_down_acc_interval);
+
 	ad7879_setup(ts);
 
 	err = request_irq(bus->irq, ad7879_irq,
@@ -460,15 +538,21 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	if (err)
 		goto err_free_irq;
 
-	err = input_register_device(input_dev);
+	err = ad7879_gpio_add(&bus->dev);
 	if (err)
 		goto err_remove_attr;
 
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_remove_gpio;
+
 	dev_info(&bus->dev, "Rev.%d touchscreen, irq %d\n",
 		 revid >> 8, bus->irq);
 
 	return 0;
 
+err_remove_gpio:
+	ad7879_gpio_remove(&bus->dev);
 err_remove_attr:
 	sysfs_remove_group(&bus->dev.kobj, &ad7879_attr_group);
 err_free_irq:
@@ -481,6 +565,7 @@ err_free_mem:
 
 static int __devexit ad7879_destroy(bus_device *bus, struct ad7879 *ts)
 {
+	ad7879_gpio_remove(&bus->dev);
 	ad7879_disable(ts);
 	sysfs_remove_group(&ts->bus->dev.kobj, &ad7879_attr_group);
 	free_irq(ts->bus->irq, ts);
diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h
index 4231104c9afa..6334cee1a3be 100644
--- a/include/linux/spi/ad7879.h
+++ b/include/linux/spi/ad7879.h
@@ -28,8 +28,12 @@ struct ad7879_platform_data {
 	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
 	 */
 	u8	median;
-	/* 1 = AUX/VBAT/GPIO set to GPIO Output */
-	u8	gpio_output;
-	/* Initial GPIO pin state (valid if gpio_output = 1) */
-	u8	gpio_default;
+	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
+	 * requires CONFIG_GPIOLIB
+	 */
+	bool	gpio_export;
+	/* identifies the first GPIO number handled by this chip;
+	 * or, if negative, requests dynamic ID allocation.
+	 */
+	s32	gpio_base;
 };
-- 
cgit v1.2.3


From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 18 Jan 2010 05:37:16 +0000
Subject: phylib: Move workqueue initialization to a proper place

commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock
on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume:

 PM: resume of devices complete after 237.098 msecs
 Restarting tasks ... done.
 kernel BUG at kernel/workqueue.c:354!
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 [...]
 Backtrace:
 [<c002c598>] (__bug+0x0/0x2c) from [<c0052a54>] (queue_delayed_work_on+0x74/0xf8)
 [<c00529e0>] (queue_delayed_work_on+0x0/0xf8) from [<c0052b30>] (queue_delayed_work+0x2c/0x30)

The oops pops up because TI DaVinci EMAC driver detaches PHY on
suspend and attaches it back on resume. Attaching makes phylib call
phy_start_machine() that initializes a workqueue. On the other hand,
PHY's resume routine will call phy_start_machine() again, and that
will cause the oops since we just destroyed the already scheduled
workqueue.

This patch fixes the issue by moving workqueue initialization to
phy_device_create().

p.s. We don't see this oops with ucc_geth and gianfar drivers because
they perform a fine-grained suspend, i.e. they just stop the PHYs
without detaching.

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 4 +---
 drivers/net/phy/phy_device.c | 1 +
 include/linux/phy.h          | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b0e9f9c51721..0295097d6c44 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -410,7 +410,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 
 static void phy_change(struct work_struct *work);
-static void phy_state_machine(struct work_struct *work);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -430,7 +429,6 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
 	schedule_delayed_work(&phydev->state_queue, HZ);
 }
 
@@ -761,7 +759,7 @@ EXPORT_SYMBOL(phy_start);
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
  */
-static void phy_state_machine(struct work_struct *work)
+void phy_state_machine(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8212b2b93422..adbc0fded130 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -177,6 +177,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->state = PHY_DOWN;
 
 	mutex_init(&dev->lock);
+	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
 	return dev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7968defd2fa7..6a7eb402165d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
 		void (*adjust_link)(struct net_device *));
+void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3


From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 6 Jan 2010 10:16:51 -0800
Subject: USB: Fix duplicate sysfs problem after device reset.

Borislav Petkov reports issues with duplicate sysfs endpoint files after a
resume from a hibernate.  It turns out that the code to support alternate
settings under xHCI has issues when a device with a non-default alternate
setting is reset during the hibernate:

[  427.681810] Restarting tasks ...
[  427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000
[  427.682019] usb usb3: usb resume
[  427.682030] ohci_hcd 0000:00:12.0: wakeup root hub
[  427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s
[  427.682205] usb 1-2: usb wakeup-resume
[  427.682226] usb 1-2: finish reset-resume
[  427.682886] done.
[  427.734658] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.746682] hub 3-0:1.0: hub_reset_resume
[  427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub
[  427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2
[  427.839653] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS
[  427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1
[  427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1
[  427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed.
[  427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0
[  427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1
[  427.916609]  ep_81: create, parent hub
[  427.916632] ------------[ cut here ]------------
[  427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96()
[  427.916649] Hardware name: System Product Name
[  427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81'
[  427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat
+8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core
[  427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13
[  427.916699] Call Trace:

The problem is caused by a mismatch between the USB core's view of the
device state and the USB device and xHCI host's view of the device state.

After the device reset and re-configuration, the device and the xHCI host
think they are using alternate setting 0 of all interfaces.  However, the
USB core keeps track of the old state, which may include non-zero
alternate settings.  It uses intf->cur_altsetting to keep the endpoint
sysfs files for the old state across the reset.

The bandwidth allocation functions need to know what the xHCI host thinks
the current alternate settings are, so original patch set
intf->cur_altsetting to the alternate setting 0.  This caused duplicate
endpoint files to be created.

The solution is to not set intf->cur_altsetting before calling
usb_set_interface() in usb_reset_and_verify_device().  Instead, we add a
new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use
alternate setting 0 as the currently installed alternate setting.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 18 ++++++++++++++++++
 drivers/usb/core/hub.c | 15 +++++----------
 include/linux/usb.h    |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0495fa651225..80995ef0868c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1684,6 +1684,24 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		}
 	}
 	if (cur_alt && new_alt) {
+		struct usb_interface *iface = usb_ifnum_to_if(udev,
+				cur_alt->desc.bInterfaceNumber);
+
+		if (iface->resetting_device) {
+			/*
+			 * The USB core just reset the device, so the xHCI host
+			 * and the device will think alt setting 0 is installed.
+			 * However, the USB core will pass in the alternate
+			 * setting installed before the reset as cur_alt.  Dig
+			 * out the alternate setting 0 structure, or the first
+			 * alternate setting if a broken device doesn't have alt
+			 * setting 0.
+			 */
+			cur_alt = usb_altnum_to_altsetting(iface, 0);
+			if (!cur_alt)
+				cur_alt = &iface->altsetting[0];
+		}
+
 		/* Drop all the endpoints in the current alt setting */
 		for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
 			ret = hcd->driver->drop_endpoint(hcd, udev,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b9f5fcd713e2..35cc8b9ba1f5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3695,19 +3695,14 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 			usb_enable_interface(udev, intf, true);
 			ret = 0;
 		} else {
-			/* We've just reset the device, so it will think alt
-			 * setting 0 is installed.  For usb_set_interface() to
-			 * work properly, we need to set the current alternate
-			 * interface setting to 0 (or the first alt setting, if
-			 * the device doesn't have alt setting 0).
+			/* Let the bandwidth allocation function know that this
+			 * device has been reset, and it will have to use
+			 * alternate setting 0 as the current alternate setting.
 			 */
-			intf->cur_altsetting =
-				usb_find_alt_setting(config, i, 0);
-			if (!intf->cur_altsetting)
-				intf->cur_altsetting =
-					&config->intf_cache[i]->altsetting[0];
+			intf->resetting_device = 1;
 			ret = usb_set_interface(udev, desc->bInterfaceNumber,
 					desc->bAlternateSetting);
+			intf->resetting_device = 0;
 		}
 		if (ret < 0) {
 			dev_err(&udev->dev, "failed to restore interface %d "
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e101a2d04d75..d7ace1b80f09 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -192,6 +192,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned reset_running:1;
+	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 2 +-
 kernel/sched_fair.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 57e63579bfdd..5b81156780b1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
-				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3


From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 14:02:16 +0100
Subject: perf: Change the is_software_event() definition

The is_software_event() definition always confuses me because its an
exclusive expression, make it an inclusive one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..8fa71874113f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return (event->attr.type != PERF_TYPE_RAW) &&
-		(event->attr.type != PERF_TYPE_HARDWARE) &&
-		(event->attr.type != PERF_TYPE_HW_CACHE);
+	switch (event->attr.type) {
+	case PERF_TYPE_SOFTWARE:
+	case PERF_TYPE_TRACEPOINT:
+	/* for now the breakpoint stuff also works as software event */
+	case PERF_TYPE_BREAKPOINT:
+		return 1;
+	}
+	return 0;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
-- 
cgit v1.2.3


From e071041be037eca208b62b84469a06bdfc692bea Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 23 Jan 2010 13:37:10 +0000
Subject: netns xfrm: fix "ip xfrm state|policy count" misreport

"ip xfrm state|policy count" report SA/SP count from init_net,
not from netns of caller process.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/xfrm/xfrm_policy.c | 16 ++++++++--------
 net/xfrm/xfrm_state.c  |  6 +++---
 net/xfrm/xfrm_user.c   | 14 ++++++++------
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6d85861ab990..60c27706e7b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1367,8 +1367,8 @@ struct xfrmk_spdinfo {
 extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
-extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
-extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
+extern void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
+extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x,
 			     struct sk_buff *skb, __be32 seq);
 extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4725a549ad4d..d2c8cb57ee4c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -469,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
 	return 0;
 }
 
-void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
+void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 {
 	read_lock_bh(&xfrm_policy_lock);
-	si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN];
-	si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT];
-	si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD];
-	si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
-	si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
-	si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
-	si->spdhcnt = init_net.xfrm.policy_idx_hmask;
+	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
+	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
+	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->spdhcnt = net->xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
 	read_unlock_bh(&xfrm_policy_lock);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d847f1a52b44..b36cc344474b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -641,11 +641,11 @@ out:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
-void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
+void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
-	si->sadcnt = init_net.xfrm.state_num;
-	si->sadhcnt = init_net.xfrm.state_hmask;
+	si->sadcnt = net->xfrm.state_num;
+	si->sadhcnt = net->xfrm.state_hmask;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&xfrm_state_lock);
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ada6186933c..d5a712976004 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -781,7 +781,8 @@ static inline size_t xfrm_spdinfo_msgsize(void)
 	       + nla_total_size(sizeof(struct xfrmu_spdhinfo));
 }
 
-static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_spdinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_spdinfo si;
 	struct xfrmu_spdinfo spc;
@@ -795,7 +796,7 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_spd_getinfo(&si);
+	xfrm_spd_getinfo(net, &si);
 	spc.incnt = si.incnt;
 	spc.outcnt = si.outcnt;
 	spc.fwdcnt = si.fwdcnt;
@@ -828,7 +829,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
@@ -841,7 +842,8 @@ static inline size_t xfrm_sadinfo_msgsize(void)
 	       + nla_total_size(4); /* XFRMA_SAD_CNT */
 }
 
-static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_sadinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_sadinfo si;
 	struct xfrmu_sadhinfo sh;
@@ -854,7 +856,7 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_sad_getinfo(&si);
+	xfrm_sad_getinfo(net, &si);
 
 	sh.sadhmcnt = si.sadhmcnt;
 	sh.sadhcnt = si.sadhcnt;
@@ -882,7 +884,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
-- 
cgit v1.2.3


From d7c7544c3d5f59033d1bf3236bc7b289f5f26b75 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 24 Jan 2010 22:47:53 -0800
Subject: netns xfrm: deal with dst entries in netns

GC is non-existent in netns, so after you hit GC threshold, no new
dst entries will be created until someone triggers cleanup in init_net.

Make xfrm4_dst_ops and xfrm6_dst_ops per-netns.
This is not done in a generic way, because it woule waste
(AF_MAX - 2) * sizeof(struct dst_ops) bytes per-netns.

Reorder GC threshold initialization so it'd be done before registering
XFRM policies.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  6 +++++
 net/ipv4/xfrm4_policy.c  | 14 +++++++-----
 net/ipv6/xfrm6_policy.c  | 25 +++++++++++---------
 net/xfrm/xfrm_policy.c   | 59 +++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 84 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 56f8e5585df7..74f119a2829a 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -5,6 +5,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
+#include <net/dst_ops.h>
 
 struct ctl_table_header;
 
@@ -42,6 +43,11 @@ struct netns_xfrm {
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
 
+	struct dst_ops		xfrm4_dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct dst_ops		xfrm6_dst_ops;
+#endif
+
 	struct sock		*nlsk;
 	struct sock		*nlsk_stash;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8c08a28d8f83..67107d63c1cd 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,6 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 
-static struct dst_ops xfrm4_dst_ops;
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
@@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 {
-	xfrm4_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
+
+	xfrm4_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 static struct ctl_table xfrm4_policy_table[] = {
 	{
 		.procname       = "xfrm4_gc_thresh",
-		.data           = &xfrm4_dst_ops.gc_thresh,
+		.data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
 		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
@@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void)
 
 void __init xfrm4_init(int rt_max_size)
 {
-	xfrm4_state_init();
-	xfrm4_policy_init();
 	/*
 	 * Select a default value for the gc_thresh based on the main route
 	 * table hash size.  It seems to me the worst case scenario is when
@@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size)
 	 * and start cleaning when were 1/2 full
 	 */
 	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+
+	xfrm4_state_init();
+	xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 						xfrm4_policy_table);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7254e3f899a7..dbdc696f5fc5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -24,7 +24,6 @@
 #include <net/mip6.h>
 #endif
 
-static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
@@ -224,8 +223,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
-	xfrm6_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
+
+	xfrm6_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -310,7 +311,7 @@ static void xfrm6_policy_fini(void)
 static struct ctl_table xfrm6_policy_table[] = {
 	{
 		.procname       = "xfrm6_gc_thresh",
-		.data	   	= &xfrm6_dst_ops.gc_thresh,
+		.data	   	= &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
 		.maxlen	 	= sizeof(int),
 		.mode	   	= 0644,
 		.proc_handler   = proc_dointvec,
@@ -326,13 +327,6 @@ int __init xfrm6_init(void)
 	int ret;
 	unsigned int gc_thresh;
 
-	ret = xfrm6_policy_init();
-	if (ret)
-		goto out;
-
-	ret = xfrm6_state_init();
-	if (ret)
-		goto out_policy;
 	/*
 	 * We need a good default value for the xfrm6 gc threshold.
 	 * In ipv4 we set it to the route hash table size * 8, which
@@ -346,6 +340,15 @@ int __init xfrm6_init(void)
 	 */
 	gc_thresh = FIB6_TABLE_HASHSZ * 8;
 	xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
+	ret = xfrm6_policy_init();
+	if (ret)
+		goto out;
+
+	ret = xfrm6_state_init();
+	if (ret)
+		goto out_policy;
+
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
 						xfrm6_policy_table);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d2c8cb57ee4c..0ecb16a9a883 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1309,15 +1309,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
 	return tos;
 }
 
-static inline struct xfrm_dst *xfrm_alloc_dst(int family)
+static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct dst_ops *dst_ops;
 	struct xfrm_dst *xdst;
 
 	if (!afinfo)
 		return ERR_PTR(-EINVAL);
 
-	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
+	switch (family) {
+	case AF_INET:
+		dst_ops = &net->xfrm.xfrm4_dst_ops;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		dst_ops = &net->xfrm.xfrm6_dst_ops;
+		break;
+#endif
+	default:
+		BUG();
+	}
+	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1366,6 +1379,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 					    struct flowi *fl,
 					    struct dst_entry *dst)
 {
+	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
 	struct dst_entry *dst_prev = NULL;
@@ -1389,7 +1403,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	dst_hold(dst);
 
 	for (; i < nx; i++) {
-		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
+		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
 		struct dst_entry *dst1 = &xdst->u.dst;
 
 		err = PTR_ERR(xdst);
@@ -2279,6 +2293,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok);
 
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
+	struct net *net;
 	int err = 0;
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
@@ -2302,6 +2317,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		xfrm_policy_afinfo[afinfo->family] = afinfo;
 	}
 	write_unlock_bh(&xfrm_policy_afinfo_lock);
+
+	rtnl_lock();
+	for_each_net(net) {
+		struct dst_ops *xfrm_dst_ops;
+
+		switch (afinfo->family) {
+		case AF_INET:
+			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
+			break;
+#endif
+		default:
+			BUG();
+		}
+		*xfrm_dst_ops = *afinfo->dst_ops;
+	}
+	rtnl_unlock();
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2332,6 +2368,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+static void __net_init xfrm_dst_ops_init(struct net *net)
+{
+	struct xfrm_policy_afinfo *afinfo;
+
+	read_lock_bh(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[AF_INET];
+	if (afinfo)
+		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	afinfo = xfrm_policy_afinfo[AF_INET6];
+	if (afinfo)
+		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
+#endif
+	read_unlock_bh(&xfrm_policy_afinfo_lock);
+}
+
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo;
@@ -2494,6 +2546,7 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_policy_init(net);
 	if (rv < 0)
 		goto out_policy;
+	xfrm_dst_ops_init(net);
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-- 
cgit v1.2.3


From cb289d6244a37cf932c571d6deb0daa8030f931b Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 13 Jan 2010 09:34:36 -0800
Subject: eventfd - allow atomic read and waitqueue remove

KVM needs a wait to atomically remove themselves from the eventfd ->poll()
wait queue head, in order to handle correctly their IRQfd deassign
operation.

This patch introduces such API, plus a way to read an eventfd from its
context.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 fs/eventfd.c            | 89 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/eventfd.h | 16 +++++++++
 2 files changed, 90 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->wqh.lock, flags);
+	eventfd_ctx_do_read(ctx, cnt);
+	__remove_wait_queue(&ctx->wqh, wait);
+	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+		wake_up_locked_poll(&ctx->wqh, POLLOUT);
+	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+	return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	ssize_t res;
-	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (count < sizeof(ucnt))
-		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
+	*cnt = 0;
 	res = -EAGAIN;
 	if (ctx->count > 0)
-		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+		res = 0;
+	else if (!no_wait) {
 		__add_wait_queue(&ctx->wqh, &wait);
-		for (res = 0;;) {
+		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (ctx->count > 0) {
-				res = sizeof(ucnt);
+				res = 0;
 				break;
 			}
 			if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (likely(res > 0)) {
-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-		ctx->count -= ucnt;
+	if (likely(res == 0)) {
+		eventfd_ctx_do_read(ctx, cnt);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
-	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-		return -EFAULT;
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+	ssize_t res;
+	__u64 cnt;
+
+	if (count < sizeof(cnt))
+		return -EINVAL;
+	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+	if (res < 0)
+		return res;
+
+	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
 			     loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a78..91bb4f27238c 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/file.h>
+#include <linux/wait.h>
 
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
 
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
 }
 
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+				       __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+						wait_queue_t *wait, __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
-- 
cgit v1.2.3


From 6d3faf6f431bafb25f4b9926c50a7e5c267738c6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 24 Jan 2010 14:48:00 +0100
Subject: firewire: cdev: add_descriptor documentation fix

struct fw_cdev_add_descriptor.length is in quadlets, not in bytes.
Also remove any doubts about the endianess of descriptor data.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 1f716d9f714b..520ecf86cbb3 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -380,7 +380,7 @@ struct fw_cdev_initiate_bus_reset {
  * @immediate:	If non-zero, immediate key to insert before pointer
  * @key:	Upper 8 bits of root directory pointer
  * @data:	Userspace pointer to contents of descriptor block
- * @length:	Length of descriptor block data, in bytes
+ * @length:	Length of descriptor block data, in quadlets
  * @handle:	Handle to the descriptor, written by the kernel
  *
  * Add a descriptor block and optionally a preceding immediate key to the local
@@ -394,6 +394,8 @@ struct fw_cdev_initiate_bus_reset {
  * If not 0, the @immediate field specifies an immediate key which will be
  * inserted before the root directory pointer.
  *
+ * @immediate, @key, and @data array elements are CPU-endian quadlets.
+ *
  * If successful, the kernel adds the descriptor and writes back a handle to the
  * kernel-side object to be used for later removal of the descriptor block and
  * immediate key.
-- 
cgit v1.2.3


From 0531b2aac59c2296570ac52bfc032ef2ace7d5e1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jan 2010 09:20:03 -0800
Subject: mm: add new 'read_cache_page_gfp()' helper function

It's a simplified 'read_cache_page()' which takes a page allocation
flag, so that different paths can control how aggressive the memory
allocations are that populate a address space.

In particular, the intel GPU object mapping code wants to be able to do
a certain amount of own internal memory management by automatically
shrinking the address space when memory starts getting tight.  This
allows it to dynamically use different memory allocation policies on a
per-allocation basis, rather than depend on the (static) address space
gfp policy.

The actual new function is a one-liner, but re-organizing the helper
functions to the point where you can do this with a single line of code
is what most of the patch is all about.

Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |   2 +
 mm/filemap.c            | 100 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 70 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ed5d7501e181..3c62ed408492 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -253,6 +253,8 @@ extern struct page * read_cache_page_async(struct address_space *mapping,
 extern struct page * read_cache_page(struct address_space *mapping,
 				pgoff_t index, filler_t *filler,
 				void *data);
+extern struct page * read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..e3736923220e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
 static struct page *__read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = page_cache_alloc_cold(mapping);
+		page = __page_cache_alloc(gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
 	return page;
 }
 
-/**
- * read_cache_page_async - read into page cache, fill it if needed
- * @mapping:	the page's address_space
- * @index:	the page index
- * @filler:	function to perform the read
- * @data:	destination for read data
- *
- * Same as read_cache_page, but don't wait for page to become unlocked
- * after submitting it to the filler.
- *
- * Read into the page cache. If a page already exists, and PageUptodate() is
- * not set, try to fill the page but don't wait for it to become unlocked.
- *
- * If the page does not get brought uptodate, return -EIO.
- */
-struct page *read_cache_page_async(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
+
 {
 	struct page *page;
 	int err;
 
 retry:
-	page = __read_cache_page(mapping, index, filler, data);
+	page = __read_cache_page(mapping, index, filler, data, gfp);
 	if (IS_ERR(page))
 		return page;
 	if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
 	mark_page_accessed(page);
 	return page;
 }
+
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @filler:	function to perform the read
+ * @data:	destination for read data
+ *
+ * Same as read_cache_page, but don't wait for page to become unlocked
+ * after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_async(struct address_space *mapping,
+				pgoff_t index,
+				int (*filler)(void *,struct page*),
+				void *data)
+{
+	return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+}
 EXPORT_SYMBOL(read_cache_page_async);
 
+static struct page *wait_on_page_read(struct page *page)
+{
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		if (!PageUptodate(page)) {
+			page_cache_release(page);
+			page = ERR_PTR(-EIO);
+		}
+	}
+	return page;
+}
+
+/**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @gfp:	the page allocator flags to use if allocating
+ *
+ * This is the same as "read_mapping_page(mapping, index, NULL)", but with
+ * any new page allocations done using the specified allocation flags. Note
+ * that the Radix tree operations will still use GFP_KERNEL, so you can't
+ * expect to do this atomically or anything like that - but you can pass in
+ * other page requirements.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index,
+				gfp_t gfp)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+	return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
+}
+EXPORT_SYMBOL(read_cache_page_gfp);
+
 /**
  * read_cache_page - read into page cache, fill it if needed
  * @mapping:	the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
-	struct page *page;
-
-	page = read_cache_page_async(mapping, index, filler, data);
-	if (IS_ERR(page))
-		goto out;
-	wait_on_page_locked(page);
-	if (!PageUptodate(page)) {
-		page_cache_release(page);
-		page = ERR_PTR(-EIO);
-	}
- out:
-	return page;
+	return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
 }
 EXPORT_SYMBOL(read_cache_page);
 
-- 
cgit v1.2.3


From bb209c8287d2d55ec4a67e3933346e0a3ee0da76 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 26 Jan 2010 17:10:03 +0000
Subject: powerpc/pci: Add calls to set_pcie_port_type() and
 set_pcie_hotplug_bridge()

We are missing these when building the pci_dev from scratch off
the Open Firmware device-tree

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci_of_scan.c | 2 ++
 drivers/pci/probe.c               | 4 ++--
 include/linux/pci.h               | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 7311fdfb9bf8..693eb9a25bfa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -140,6 +140,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
+	set_pcie_port_type(dev);
 
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
@@ -164,6 +165,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 		/* a PCI-PCI bridge */
 		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
 		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+		set_pcie_hotplug_bridge(dev);
 	} else if (!strcmp(type, "cardbus")) {
 		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
 	} else {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 98ffb2de22e9..446e4a94d7d3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -681,7 +681,7 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 }
 
-static void set_pcie_port_type(struct pci_dev *pdev)
+void set_pcie_port_type(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
@@ -695,7 +695,7 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
-static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..c1968f464c38 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -756,6 +756,10 @@ pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
+/* For use by arch with custom probe code */
+void set_pcie_port_type(struct pci_dev *pdev);
+void set_pcie_hotplug_bridge(struct pci_dev *pdev);
+
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 #ifdef CONFIG_HOTPLUG
-- 
cgit v1.2.3


From cb6ecf6f7afece066265e243657b0ac28150a7b2 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 28 Jan 2010 22:28:27 -0800
Subject: Input: add the ABS_MT_PRESSURE event

For pressure-based multi-touch devices, a direct way to send sensor
intensity data per finger is needed. This patch adds the ABS_MT_PRESSURE
event to the MT protocol.

Requested-by: Yoonyoung Shim <jy0922.shim@samsung.com>
Requested-by: Mika Kuoppala <mika.kuoppala@nokia.com>
Requested-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 1 +
 include/linux/input.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 30b503b8d67b..86cb2d2196ff 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -46,6 +46,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
 	ABS_MT_TOOL_TYPE,
 	ABS_MT_BLOB_ID,
 	ABS_MT_TRACKING_ID,
+	ABS_MT_PRESSURE,
 	0
 };
 static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..735ceaf1bc2d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -660,6 +660,7 @@ struct input_absinfo {
 #define ABS_MT_TOOL_TYPE	0x37	/* Type of touching device */
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
+#define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
 
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
-- 
cgit v1.2.3


From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Jan 2010 22:14:42 -0800
Subject: Split 'flush_old_exec' into two functions

'flush_old_exec()' is the point of no return when doing an execve(), and
it is pretty badly misnamed.  It doesn't just flush the old executable
environment, it also starts up the new one.

Which is very inconvenient for things like setting up the new
personality, because we want the new personality to affect the starting
of the new environment, but at the same time we do _not_ want the new
personality to take effect if flushing the old one fails.

As a result, the x86-64 '32-bit' personality is actually done using this
insane "I'm going to change the ABI, but I haven't done it yet" bit
(TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the
personality, but just the "pending" bit, so that "flush_thread()" can do
the actual personality magic.

This patch in no way changes any of that insanity, but it does split the
'flush_old_exec()' function up into a preparatory part that can fail
(still called flush_old_exec()), and a new part that will actually set
up the new exec environment (setup_new_exec()).  All callers are changed
to trivially comply with the new world order.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/process_64.c |  2 +-
 arch/x86/ia32/ia32_aout.c   | 10 ++++++----
 fs/binfmt_aout.c            |  1 +
 fs/binfmt_elf.c             | 27 ++-------------------------
 fs/binfmt_elf_fdpic.c       |  3 +++
 fs/binfmt_flat.c            |  1 +
 fs/binfmt_som.c             |  1 +
 fs/exec.c                   | 26 ++++++++++++++++----------
 include/linux/binfmts.h     |  1 +
 include/linux/sched.h       |  2 +-
 10 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 31f80c61b031..ec79faf6f021 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -368,7 +368,7 @@ void exit_thread(void)
 void flush_thread(void)
 {
 
-	/* Called by fs/exec.c (flush_old_exec) to remove traces of a
+	/* Called by fs/exec.c (setup_new_exec) to remove traces of a
 	 * previously running executable. */
 #ifdef CONFIG_SH_FPU
 	if (last_task_used_math == current) {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..435d2a5323da 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS;
-	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-		regs->r13 = regs->r14 = regs->r15 = 0;
-
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
 	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
+	setup_new_exec(bprm);
+
+	regs->cs = __USER32_CS;
+	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+		regs->r13 = regs->r14 = regs->r15 = 0;
+
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
 	current->mm->end_data = ex.a_data +
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #else
 	set_personality(PER_LINUX);
 #endif
+	setup_new_exec(bprm);
 
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 				goto out_free_interp;
 
-			/*
-			 * The early SET_PERSONALITY here is so that the lookup
-			 * for the interpreter happens in the namespace of the 
-			 * to-be-execed image.  SET_PERSONALITY can select an
-			 * alternate root.
-			 *
-			 * However, SET_PERSONALITY is NOT allowed to switch
-			 * this task into the new images's memory mapping
-			 * policy - that is, TASK_SIZE must still evaluate to
-			 * that which is appropriate to the execing application.
-			 * This is because exit_mmap() needs to have TASK_SIZE
-			 * evaluate to the size of the old image.
-			 *
-			 * So if (say) a 64-bit application is execing a 32-bit
-			 * application it is the architecture's responsibility
-			 * to defer changing the value of TASK_SIZE until the
-			 * switch really is going to happen - do this in
-			 * flush_thread().	- akpm
-			 */
-			SET_PERSONALITY(loc->elf_ex);
-
 			interpreter = open_exec(elf_interpreter);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		/* Verify the interpreter has a valid arch */
 		if (!elf_check_arch(&loc->interp_elf_ex))
 			goto out_free_dentry;
-	} else {
-		/* Executables without an interpreter also need a personality  */
-		SET_PERSONALITY(loc->elf_ex);
 	}
 
 	/* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
-	arch_pick_mmap_layout(current->mm);
+
+	setup_new_exec(bprm);
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	set_personality(PER_LINUX_FDPIC);
 	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
+
+	setup_new_exec(bprm);
+
 	set_binfmt(&elf_fdpic_format);
 
 	current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX_32BIT);
+		setup_new_exec(bprm);
 	}
 
 	/*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->personality = PER_HPUX;
+	setup_new_exec(bprm);
 
 	/* Set the task size for HP-UX processes such that
 	 * the gateway page is outside the address space.
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..675c3f44c2ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 
 int flush_old_exec(struct linux_binprm * bprm)
 {
-	char * name;
-	int i, ch, retval;
-	char tcomm[sizeof(current->comm)];
+	int retval;
 
 	/*
 	 * Make sure we have a private signal table and that
@@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 		goto out;
 
 	bprm->mm = NULL;		/* We're using it now */
+	return 0;
+
+out:
+	return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+
+void setup_new_exec(struct linux_binprm * bprm)
+{
+	int i, ch;
+	char * name;
+	char tcomm[sizeof(current->comm)];
+
+	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
 	current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
 			
 	flush_signal_handlers(current, 0);
 	flush_old_files(current->files);
-
-	return 0;
-
-out:
-	return retval;
 }
-
-EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(setup_new_exec);
 
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cd4349bdc34e..89c6249fc561 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
+extern void setup_new_exec(struct linux_binprm * bprm);
 
 extern int suid_dumpable;
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..abdfacc58653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1369,7 +1369,7 @@ struct task_struct {
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
-				     - initialized normally by flush_old_exec */
+				     - initialized normally by setup_new_exec */
 /* file system info */
 	int link_count, total_link_count;
 #ifdef CONFIG_SYSVIPC
-- 
cgit v1.2.3


From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 28 Jan 2010 17:04:43 -0600
Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger

This patch fixes the regression in functionality where the
kernel debugger and the perf API do not nicely share hw
breakpoint reservations.

The kernel debugger cannot use any mutex_lock() calls because it
can start the kernel running from an invalid context.

A mutex free version of the reservation API needed to get
created for the kernel debugger to safely update hw breakpoint
reservations.

The possibility for a breakpoint reservation to be concurrently
processed at the time that kgdb interrupts the system is
improbable. Should this corner case occur the end user is
warned, and the kernel debugger will prohibit updating the
hardware breakpoint reservations.

Any time the kernel debugger reserves a hardware breakpoint it
will be a system wide reservation.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: torvalds@linux-foundation.org
LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kgdb.c        | 51 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/hw_breakpoint.h |  2 ++
 kernel/hw_breakpoint.c        | 52 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 62bea7307eaa..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void)
 	hw_breakpoint_restore();
 }
 
+static int hw_break_reserve_slot(int breakno)
+{
+	int cpu;
+	int cnt = 0;
+	struct perf_event **pevent;
+
+	for_each_online_cpu(cpu) {
+		cnt++;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_reserve_bp_slot(*pevent))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for_each_online_cpu(cpu) {
+		cnt--;
+		if (!cnt)
+			break;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		dbg_release_bp_slot(*pevent);
+	}
+	return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+	struct perf_event **pevent;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_release_bp_slot(*pevent))
+			/*
+			 * The debugger is responisble for handing the retry on
+			 * remove failure.
+			 */
+			return -1;
+	}
+	return 0;
+}
+
 static int
 kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 {
@@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 	if (i == 4)
 		return -1;
 
+	if (hw_break_release_slot(i)) {
+		printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
+		return -1;
+	}
 	breakinfo[i].enabled = 0;
 
 	return 0;
@@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 		return -1;
 	}
 	breakinfo[i].addr = addr;
+	if (hw_break_reserve_slot(i)) {
+		breakinfo[i].addr = 0;
+		return -1;
+	}
 	breakinfo[i].enabled = 1;
 
 	return 0;
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 41235c93e4e9..070ba0621738 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
 extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
 
+extern int dbg_reserve_bp_slot(struct perf_event *bp);
+extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..8a5c7d55ac9f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
  *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
  *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
  */
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
 {
 	struct bp_busy_slots slots = {0};
-	int ret = 0;
-
-	mutex_lock(&nr_bp_mutex);
 
 	fetch_bp_busy_slots(&slots, bp);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
-		ret = -ENOSPC;
-		goto end;
-	}
+	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+		return -ENOSPC;
 
 	toggle_bp_slot(bp, true);
 
-end:
+	return 0;
+}
+
+int reserve_bp_slot(struct perf_event *bp)
+{
+	int ret;
+
+	mutex_lock(&nr_bp_mutex);
+
+	ret = __reserve_bp_slot(bp);
+
 	mutex_unlock(&nr_bp_mutex);
 
 	return ret;
 }
 
+static void __release_bp_slot(struct perf_event *bp)
+{
+	toggle_bp_slot(bp, false);
+}
+
 void release_bp_slot(struct perf_event *bp)
 {
 	mutex_lock(&nr_bp_mutex);
 
-	toggle_bp_slot(bp, false);
+	__release_bp_slot(bp);
 
 	mutex_unlock(&nr_bp_mutex);
 }
 
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	return __reserve_bp_slot(bp);
+}
+
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	__release_bp_slot(bp);
+
+	return 0;
+}
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-- 
cgit v1.2.3


From fa5829b36539067f3c675f5d437531dedcfc4ad8 Mon Sep 17 00:00:00 2001
From: Marcin Kościelnicki <koriakin@0x04.net>
Date: Sat, 23 Jan 2010 10:25:28 +1000
Subject: drm/kms: Remove incorrect comment in struct drm_mode_modeinfo

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index bc4fdf27bd2e..c5ba1636613c 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -85,7 +85,7 @@ struct drm_mode_modeinfo {
 	__u16 hdisplay, hsync_start, hsync_end, htotal, hskew;
 	__u16 vdisplay, vsync_start, vsync_end, vtotal, vscan;
 
-	__u32 vrefresh; /* vertical refresh * 1000 */
+	__u32 vrefresh;
 
 	__u32 flags;
 	__u32 type;
-- 
cgit v1.2.3


From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 27 Jan 2010 16:25:22 -0600
Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb
 resume

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets
the time from hardware such as the TSC on x86. In this
configuration kgdb will report a softlock warning message on
resuming or detaching from a debug session.

Sequence of events in the problem case:

 1) "cpu sched clock" and "hardware time" are at 100 sec prior
    to a call to kgdb_handle_exception()

 2) Debugger waits in kgdb_handle_exception() for 80 sec and on
    exit the following is called ...  touch_softlockup_watchdog() -->
    __raw_get_cpu_var(touch_timestamp) = 0;

 3) "cpu sched clock" = 100s (it was not updated, because the
    interrupt was disabled in kgdb) but the "hardware time" = 180 sec

 4) The first timer interrupt after resuming from
    kgdb_handle_exception updates the watchdog from the "cpu sched clock"

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> check (touch_timestamp == 0) (it is "YES"
here, we have set "touch_timestamp = 0" at kgdb) -->
__touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp"
to "get_timestamp()" (Here, the "touch_timestamp" will still be
set to 100s.)  ...

    scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched
    clock" to "hardware time" = 180s) ...  }

 5) The Second timer interrupt handler appears to have a large
    jump and trips the softlockup warning.

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> "cpu sched clock" - "touch_timestamp" =
180s-100s > 60s --> printk "soft lockup error messages" ...  }

note: ***(A) reset "touch_timestamp" to
"get_timestamp(this_cpu)"

Why is "touch_timestamp" 100 sec, instead of 180 sec?

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of
get_timestamp() is:

get_timestamp(this_cpu)
 -->cpu_clock(this_cpu)
 -->sched_clock_cpu(this_cpu)
 -->__update_sched_clock(sched_clock_data, now)

The __update_sched_clock() function uses the GTOD tick value to
create a window to normalize the "now" values.  So if "now"
value is too big for sched_clock_data, it will be ignored.

The fix is to invoke sched_clock_tick() to update "cpu sched
clock" in order to recover from this state.  This is done by
introducing the function touch_softlockup_watchdog_sync(). This
allows kgdb to request that the sched clock is updated when the
watchdog thread runs the first time after a resume from kgdb.

[yong.zhang0@gmail.com: Use per cpu instead of an array]
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Dongdong Deng <Dongdong.Deng@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: peterz@infradead.org
LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/kgdb.c         |  6 +++---
 kernel/softlockup.c   | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..89232151a9d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 				    void __user *buffer,
@@ -323,6 +324,9 @@ static inline void softlockup_tick(void)
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..87f2cc557553 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs)
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1450,7 +1450,7 @@ acquirelock:
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
-		touch_softlockup_watchdog();
+		touch_softlockup_watchdog_sync();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1550,7 +1550,7 @@ kgdb_restore:
 	}
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlock_touch_sync) = true;
+	__raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
 void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
 	}
 
 	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlock_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
 		__touch_softlockup_watchdog();
 		return;
 	}
-- 
cgit v1.2.3


From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Tue, 2 Feb 2010 15:58:48 -0800
Subject: connector: Delete buggy notification code.

On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote:
> > There are at least two ways to fix it: using a big cannon and a small
> > one. The former way is to disable notification registration, since it is
> > not used by anyone at all. Second way is to check whether calling
> > process is root and its destination group is -1 (kind of priveledged
> > one) before command is dispatched to workqueue.
>
> Well if no one is using it, removing it makes the most sense, right?
>
> No objection from me, care to make up a patch either way for this?

Getting it is not used, let's drop support for notifications about
(un)registered events from connector.
Another option was to check credentials on receiving, but we can always
restore it without bugs if needed, but genetlink has a wider code base
and none complained, that userspace can not get notification when some
other clients were (un)registered.

Kudos for Sebastian Krahmer <krahmer@suse.de>, who found a bug in the
code.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 175 ------------------------------------------
 include/linux/connector.h     |  32 --------
 2 files changed, 207 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index f06024668f99..537c29ac4487 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -36,17 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
 
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DEFINE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
 static struct cn_dev cdev;
 
 static int cn_already_initialized;
@@ -209,54 +198,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	}
 }
 
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	mutex_lock(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first &&
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first &&
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	mutex_unlock(&notify_lock);
-}
-
 /*
  * Callback add routing - adds callback with given ID and name.
  * If there is registered callback with the same ID it will not be added.
@@ -276,8 +217,6 @@ int cn_add_callback(struct cb_id *id, char *name,
 	if (err)
 		return err;
 
-	cn_notify(id, 0);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cn_add_callback);
@@ -295,111 +234,9 @@ void cn_del_callback(struct cb_id *id)
 	struct cn_dev *dev = &cdev;
 
 	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		mutex_lock(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		mutex_unlock(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kzalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	mutex_lock(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	mutex_unlock(&notify_lock);
-}
-
 static int cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
@@ -437,11 +274,8 @@ static const struct file_operations cn_file_ops = {
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
-	int err;
 
 	dev->input = cn_rx_skb;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
@@ -457,14 +291,6 @@ static int __devinit cn_init(void)
 
 	cn_already_initialized = 1;
 
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_already_initialized = 0;
-		cn_queue_free_dev(dev->cbdev);
-		netlink_kernel_release(dev->nls);
-		return -EINVAL;
-	}
-
 	proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops);
 
 	return 0;
@@ -478,7 +304,6 @@ static void __devexit cn_fini(void)
 
 	proc_net_remove(&init_net, "connector");
 
-	cn_del_callback(&dev->id);
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 72ba63eb83c5..3a779ffba60b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -24,9 +24,6 @@
 
 #include <linux/types.h>
 
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
 /*
  * Process Events connector unique ids -- used for message routing
  */
@@ -75,30 +72,6 @@ struct cn_msg {
 	__u8 data[0];
 };
 
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
@@ -151,11 +124,6 @@ struct cn_callback_entry {
 	u32 seq, group;
 };
 
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
 struct cn_dev {
 	struct cb_id id;
 
-- 
cgit v1.2.3


From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2010 10:25:18 +0530
Subject: perf: Make bp_len type to u64 generic across the arch

Change 'bp_len' type to __u64 to make it work across archs as
the s390 architecture watch point length can be upto 2^64.

reference:
	http://lkml.org/lkml/2010/1/25/212

This is an ABI change that is not backward compatible with
the previous hardware breakpoint info layout integrated in this
development cycle, a rebuilt of perf tools is necessary for
versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a
kernel based on this patch.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin <schwidefsky@de.ibm.com>
LKML-Reference: <20100130045518.GA20776@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h | 2 +-
 include/linux/perf_event.h    | 6 ++----
 kernel/hw_breakpoint.c        | 2 +-
 kernel/perf_event.c           | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 070ba0621738..5977b724f7c6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp)
 	return bp->attr.bp_type;
 }
 
-static inline int hw_breakpoint_len(struct perf_event *bp)
+static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 {
 	return bp->attr.bp_len;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa71874113f..a177698d95e2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,11 +211,9 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	__u32			__reserved_2;
-
-	__u64			bp_addr;
 	__u32			bp_type;
-	__u32			bp_len;
+	__u64			bp_addr;
+	__u64			bp_len;
 };
 
 /*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 8a5c7d55ac9f..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
 	u64 old_addr = bp->attr.bp_addr;
+	u64 old_len = bp->attr.bp_len;
 	int old_type = bp->attr.bp_type;
-	int old_len = bp->attr.bp_len;
 	int err = 0;
 
 	perf_event_disable(bp);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746bd3a06..2b19297742cb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2)
+	if (attr->__reserved_1)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Jan 2010 13:30:11 +0100
Subject: libata: fix ata_id_logical_per_physical_sectors

The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not
a plain multiple, but the log of it, so fix the helper to give the correct
answer.  Without this we'll get an incorrect minimal I/O size in the block
limits VPD page for 4k sector drives.

Also change the return value of ata_id_logical_per_physical_sectors to u16
for the unlikely case of very large logical sectors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 38a6948ce0c2..20f31567ccee 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id)
 	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
 }
 
-static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
 {
-	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
 }
 
 static inline int ata_id_has_lba48(const u16 *id)
-- 
cgit v1.2.3


From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 5 Feb 2010 16:09:11 +1100
Subject: percpu: add __percpu for sparse

This is to make the annotation of percpu variables during the next merge
window less painfull.

Extracted from a patch by Rusty Russell.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab4a695..188fcae10a99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 15:35:41 -0500
Subject: fix ima breakage

The "Untangling ima mess, part 2 with counters" patch messed
up the counters.  Based on conversations with Al Viro, this patch
streamlines ima_path_check() by removing the counter maintaince.
The counters are now updated independently, from measuring the file,
in __dentry_open() and alloc_file() by calling ima_counts_get().
ima_path_check() is called from nfsd and do_filp_open().
It also did not measure all files that should have been measured.
Reason: ima_path_check() got bogus value passed as mask.
[AV: mea culpa]
[AV: add missing nfsd bits]

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |   6 +-
 fs/nfsd/vfs.c                     |   3 +-
 include/linux/ima.h               |   4 +-
 security/integrity/ima/ima_main.c | 236 +++++++++++++++-----------------------
 4 files changed, 97 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..cd77b6375efd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,8 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC));
+			error = ima_path_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1797,8 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC));
+		error = ima_path_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 325959e264ce..32477e3a645c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(&(*filp)->f_path,
-				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
+	host_err = ima_path_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 99dc6d5cf7e5..aa55a8f1f5b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a89f44d5e030..75aee18f6163 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -84,6 +84,36 @@ out:
 	return found;
 }
 
+/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
+ *
+ * When opening a file for read, if the file is already open for write,
+ * the file could change, resulting in a file measurement error.
+ *
+ * Opening a file for write, if the file is already open for read, results
+ * in a time of measure, time of use (ToMToU) error.
+ *
+ * In either case invalidate the PCR.
+ */
+enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
+static void ima_read_write_check(enum iint_pcr_error error,
+				 struct ima_iint_cache *iint,
+				 struct inode *inode,
+				 const unsigned char *filename)
+{
+	switch (error) {
+	case TOMTOU:
+		if (iint->readcount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "ToMToU");
+		break;
+	case OPEN_WRITERS:
+		if (iint->writecount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "open_writers");
+		break;
+	}
+}
+
 /*
  * Update the counts given an fmode_t
  */
@@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
+/*
+ * ima_counts_get - increment file counts
+ *
+ * Maintain read/write counters for all files, but only
+ * invalidate the PCR for measured files:
+ * 	- Opening a file for write when already open for read,
+ *	  results in a time of measure, time of use (ToMToU) error.
+ *	- Opening a file for read when already open for write,
+ * 	  could result in a file measurement error.
+ *
+ */
+void ima_counts_get(struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	fmode_t mode = file->f_mode;
+	struct ima_iint_cache *iint;
+	int rc;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_get(inode);
+	if (!iint)
+		return;
+	mutex_lock(&iint->mutex);
+	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	if (rc < 0)
+		goto out;
+
+	if (mode & FMODE_WRITE) {
+		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		goto out;
+	}
+	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+out:
+	ima_inc_counts(iint, file->f_mode);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
 /*
  * Decrement ima counts
  */
@@ -153,123 +224,6 @@ void ima_file_free(struct file *file)
 	kref_put(&iint->refcount, iint_free);
 }
 
-/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
- *
- * When opening a file for read, if the file is already open for write,
- * the file could change, resulting in a file measurement error.
- *
- * Opening a file for write, if the file is already open for read, results
- * in a time of measure, time of use (ToMToU) error.
- *
- * In either case invalidate the PCR.
- */
-enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
-static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
-				 struct inode *inode,
-				 const unsigned char *filename)
-{
-	switch (error) {
-	case TOMTOU:
-		if (iint->readcount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "ToMToU");
-		break;
-	case OPEN_WRITERS:
-		if (iint->writecount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "open_writers");
-		break;
-	}
-}
-
-static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
-				const unsigned char *filename)
-{
-	int rc = 0;
-
-	ima_inc_counts(iint, file->f_mode);
-
-	rc = ima_collect_measurement(iint, file);
-	if (!rc)
-		ima_store_measurement(iint, file, filename);
-	return rc;
-}
-
-/**
- * ima_path_check - based on policy, collect/store measurement.
- * @path: contains a pointer to the path to be measured
- * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
- *
- * Measure the file being open for readonly, based on the
- * ima_must_measure() policy decision.
- *
- * Keep read/write counters for all files, but only
- * invalidate the PCR for measured files:
- * 	- Opening a file for write when already open for read,
- *	  results in a time of measure, time of use (ToMToU) error.
- *	- Opening a file for read when already open for write,
- * 	  could result in a file measurement error.
- *
- * Always return 0 and audit dentry_open failures.
- * (Return code will be based upon measurement appraisal.)
- */
-int ima_path_check(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-	struct file *file = NULL;
-	int rc;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return 0;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return 0;
-
-	mutex_lock(&iint->mutex);
-
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
-	if (rc < 0)
-		goto out;
-
-	if ((mask & MAY_WRITE) || (mask == 0))
-		ima_read_write_check(TOMTOU, iint, inode,
-				     path->dentry->d_name.name);
-
-	if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ)
-		goto out;
-
-	ima_read_write_check(OPEN_WRITERS, iint, inode,
-			     path->dentry->d_name.name);
-	if (!(iint->flags & IMA_MEASURED)) {
-		struct dentry *dentry = dget(path->dentry);
-		struct vfsmount *mnt = mntget(path->mnt);
-
-		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
-				   current_cred());
-		if (IS_ERR(file)) {
-			int audit_info = 0;
-
-			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-					    dentry->d_name.name,
-					    "add_measurement",
-					    "dentry_open failed",
-					    1, audit_info);
-			file = NULL;
-			goto out;
-		}
-		rc = get_path_measurement(iint, file, dentry->d_name.name);
-	}
-out:
-	mutex_unlock(&iint->mutex);
-	if (file)
-		fput(file);
-	kref_put(&iint->refcount, iint_free);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ima_path_check);
-
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
@@ -297,33 +251,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_get - increment file counts
- *
- * - for IPC shm and shmat file.
- * - for nfsd exported files.
- *
- * Increment the counts for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_counts_get(struct file *file)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
-	ima_inc_counts(iint, file->f_mode);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-EXPORT_SYMBOL_GPL(ima_counts_get);
-
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured (May be NULL)
@@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
+/**
+ * ima_path_check - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured
+ * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
+ *
+ * Measure files based on the ima_must_measure() policy decision.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
+ */
+int ima_path_check(struct file *file, int mask)
+{
+	int rc;
+
+	rc = process_measurement(file, file->f_dentry->d_name.name,
+				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				 PATH_CHECK);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ima_path_check);
+
 static int __init init_ima(void)
 {
 	int error;
-- 
cgit v1.2.3


From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:40 -0500
Subject: ima: rename ima_path_check to ima_file_check

ima_path_check actually deals with files!  call it ima_file_check instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        | 4 ++--
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/ima.h               | 4 ++--
 security/integrity/ima/ima_main.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index cd77b6375efd..d62fdc875f22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,7 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(filp, acc_mode);
+			error = ima_file_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1796,7 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(filp, acc_mode);
+		error = ima_file_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 32477e3a645c..97d79eff6b7f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(*filp, access);
+	host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index aa55a8f1f5b9..975837e7d6c0 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index eb1cf6498cc9..b76e1f03ea2b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -14,7 +14,7 @@
  *
  * File: ima_main.c
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
- *	and ima_path_check.
+ *	and ima_file_check.
  */
 #include <linux/module.h>
 #include <linux/file.h>
@@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask)
 {
 	int rc;
 
@@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask)
 				 PATH_CHECK);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ima_path_check);
+EXPORT_SYMBOL_GPL(ima_file_check);
 
 static int __init init_ima(void)
 {
-- 
cgit v1.2.3


From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 11:16:56 -0800
Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h     |  2 ++
 net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ba1ba0c5efd1..aed23b6c8478 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
@@ -28,5 +29,6 @@ struct netns_ct {
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
+	char			*slabname;
 };
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
-- 
cgit v1.2.3


From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:18:07 -0800
Subject: netfilter: nf_conntrack: fix hash resizing with namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/conntrack.h                      |  1 +
 include/net/netns/ipv4.h                           |  1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_nat_core.c                   | 22 ++++-----
 net/netfilter/nf_conntrack_core.c                  | 53 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  2 +-
 net/netfilter/nf_conntrack_standalone.c            |  7 +--
 10 files changed, 49 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index aed23b6c8478..63d449807d9b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2eb3814d6258..9a4b8b714079 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
 	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
 	int			nat_vmalloced;
 #endif
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 
-- 
cgit v1.2.3


From 69c9700b544e496dc3ccf472a4f3a76dcf4abaf7 Mon Sep 17 00:00:00 2001
From: Marcin Kościelnicki <koriakin@0x04.net>
Date: Tue, 26 Jan 2010 18:39:20 +0000
Subject: drm/nouveau: Add getparam to get available PGRAPH units.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On nv50, this will be needed by applications using CUDA to know
how much stack/local memory to allocate.

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_reg.h   | 1 +
 drivers/gpu/drm/nouveau/nouveau_state.c | 9 +++++++++
 include/drm/nouveau_drm.h               | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 251f1b3b38b9..aa9b310e41be 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -99,6 +99,7 @@
  * the card will hang early on in the X init process.
  */
 #    define NV_PMC_ENABLE_UNK13                               (1<<13)
+#define NV40_PMC_GRAPH_UNITS				   0x00001540
 #define NV40_PMC_BACKLIGHT				   0x000015f0
 #	define NV40_PMC_BACKLIGHT_MASK			   0x001f0000
 #define NV40_PMC_1700                                      0x00001700
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 241e24d60eb4..fcd7610817a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -825,6 +825,15 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 	case NOUVEAU_GETPARAM_VM_VRAM_BASE:
 		getparam->value = dev_priv->vm_vram_base;
 		break;
+	case NOUVEAU_GETPARAM_GRAPH_UNITS:
+		/* NV40 and NV50 versions are quite different, but register
+		 * address is the same. User is supposed to know the card
+		 * family anyway... */
+		if (dev_priv->chipset >= 0x40) {
+			getparam->value = nv_rd32(dev, NV40_PMC_GRAPH_UNITS);
+			break;
+		}
+		/* FALLTHRU */
 	default:
 		NV_ERROR(dev, "unknown parameter %lld\n", getparam->param);
 		return -EINVAL;
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 1e67c441ea82..f745948b61e4 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -77,6 +77,7 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_GETPARAM_PCI_PHYSICAL    10
 #define NOUVEAU_GETPARAM_CHIPSET_ID      11
 #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
+#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
 struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;
-- 
cgit v1.2.3


From f77cef3db357aeea22d82a2aa4f0ef8fbae41d47 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 9 Feb 2010 19:41:55 +0000
Subject: drm/vmwgfx: Update the user-space interface.

When time-based throttling is implemented, we need to bump minor.
When the old way of detecting scanout is removed, we need to bump major.
In the meantime, this change should not break existing user-space.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c    |  6 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  5 +++--
 include/drm/vmwgfx_drm.h                 | 12 +++++++++---
 4 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 135be9688c90..0eaf68273eaf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,10 +39,10 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20090724"
+#define VMWGFX_DRIVER_DATE "20100118"
 #define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 1
-#define VMWGFX_DRIVER_PATCHLEVEL 2
+#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 778851f9f1d6..1c7a316454d8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -48,6 +48,12 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_FIFO_OFFSET:
 		param->value = dev_priv->mmio_start;
 		break;
+	case DRM_VMW_PARAM_HW_CAPS:
+		param->value = dev_priv->capabilities;
+		break;
+	case DRM_VMW_PARAM_FIFO_CAPS:
+		param->value = dev_priv->fifo.capabilities;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c7efbd47ab84..933e90d82866 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -610,9 +610,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		 */
 		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
 		srf->scanout = true;
-	} else {
+	} else if (req->scanout)
+		srf->scanout = true;
+	else
 		srf->scanout = false;
-	}
 
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index 2be7e1249b6f..dfaf3c2d2c8e 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -68,7 +68,8 @@
 #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
 #define DRM_VMW_PARAM_3D               2
 #define DRM_VMW_PARAM_FIFO_OFFSET      3
-
+#define DRM_VMW_PARAM_HW_CAPS          4
+#define DRM_VMW_PARAM_FIFO_CAPS        5
 
 /**
  * struct drm_vmw_getparam_arg
@@ -181,6 +182,8 @@ struct drm_vmw_context_arg {
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
+ * @scanout: Boolean whether the surface is intended to be used as a
+ * scanout.
  *
  * Input data to the DRM_VMW_CREATE_SURFACE Ioctl.
  * Output data from the DRM_VMW_REF_SURFACE Ioctl.
@@ -192,7 +195,7 @@ struct drm_vmw_surface_create_req {
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
 	uint64_t size_addr;
 	int32_t shareable;
-	uint32_t pad64;
+	int32_t scanout;
 };
 
 /**
@@ -295,6 +298,9 @@ union drm_vmw_surface_reference_arg {
  *
  * @commands: User-space address of a command buffer cast to an uint64_t.
  * @command-size: Size in bytes of the command buffer.
+ * @throttle-us: Sleep until software is less than @throttle_us
+ * microseconds ahead of hardware. The driver may round this value
+ * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
  *
@@ -304,7 +310,7 @@ union drm_vmw_surface_reference_arg {
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
-	uint32_t pad64;
+	uint32_t throttle_us;
 	uint64_t fence_rep;
 };
 
-- 
cgit v1.2.3


From a87897edbae2d60db7bcb6bb0a75e82013d68305 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 9 Feb 2010 21:29:47 +0000
Subject: drm/vmwgfx: Drop scanout flag compat and add execbuf ioctl parameter
 members. Bumps major.

Even if this bumps the version to 1 it does not mean the driver is
out of staging. From what we know this is the last backwards
incompatible change to the driver.

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 17 +----------------
 include/drm/vmwgfx_drm.h                 |  8 ++++++++
 3 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 0eaf68273eaf..3e4e670d3216 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,9 +39,9 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20100118"
-#define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_DATE "20100209"
+#define VMWGFX_DRIVER_MAJOR 1
+#define VMWGFX_DRIVER_MINOR 0
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 933e90d82866..f8fbbc67a406 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -35,11 +35,6 @@
 #define VMW_RES_SURFACE ttm_driver_type1
 #define VMW_RES_STREAM ttm_driver_type2
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
@@ -579,6 +574,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	srf->flags = req->flags;
 	srf->format = req->format;
+	srf->scanout = req->scanout;
 	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
 	srf->num_sizes = 0;
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
@@ -604,17 +600,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0))
 		goto out_err1;
 
-	if (srf->flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-		/* we should not send this flag down to hardware since
-		 * its not a official one
-		 */
-		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
-		srf->scanout = true;
-	} else if (req->scanout)
-		srf->scanout = true;
-	else
-		srf->scanout = false;
-
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index dfaf3c2d2c8e..c7645f480d12 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -303,15 +303,23 @@ union drm_vmw_surface_reference_arg {
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
+ * @version: Allows expanding the execbuf ioctl parameters without breaking
+ * backwards compatibility, since user-space will always tell the kernel
+ * which version it uses.
+ * @flags: Execbuf flags. None currently.
  *
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
+#define DRM_VMW_EXECBUF_VERSION 0
+
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
 	uint32_t throttle_us;
 	uint64_t fence_rep;
+	 uint32_t version;
+	 uint32_t flags;
 };
 
 /**
-- 
cgit v1.2.3


From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:38 +0300
Subject: kfifo: Don't use integer as NULL pointer

This patch fixes following sparse warnings:

include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer
kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 2 +-
 kernel/kfifo.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6f6c5f300af6..bc0fc795bd35 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
  */
 static inline bool kfifo_initialized(struct kfifo *fifo)
 {
-	return fifo->buffer != 0;
+	return fifo->buffer != NULL;
 }
 
 /**
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 559fb5582b60..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0);
+		_kfifo_init(fifo, NULL, 0);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From 3b77fd8ee6a8ae34e349651e9d5f5000d1cc206e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 12:21:45 -0800
Subject: Input: add KEY_RFKILL

Most laptops have keys that are intended to toggle all device state, not
just wifi. These are currently generally mapped to KEY_WLAN. As a result,
rfkill will only kill or enable wifi in response to the key press. This
confuses users and can make it difficult for them to enable bluetooth
and wwan devices.

This patch adds a new keycode, KEY_RFKILL. It indicates that the system
should toggle the state of all rfkillable devices.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index 735ceaf1bc2d..663208afb64c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -376,6 +376,7 @@ struct input_absinfo {
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
 #define KEY_WIMAX		246
+#define KEY_RFKILL		247	/* Key that controls all radios */
 
 /* Range 248 - 255 is reserved for special needs of AT keyboard driver */
 
-- 
cgit v1.2.3


From 4e70af56319e56423d6eb1ce25fc321cdf8cd41d Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 15 Feb 2010 11:16:11 +0000
Subject: fs: inode - remove 8 bytes of padding on 64bits allowing 1 more
 objects/slab under slub

This removes 8 bytes of padding from struct inode on 64bit builds, and
so allows 1 more object/slab in the inode_cache when using slub.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
----
patch against 2.6.33-rc8
compiled & tested on x86_64 AMDX2

I've been running this patch for over a week with no obvious problems
regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1bcb275b596..ebb1cd5bc241 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,6 +729,7 @@ struct inode {
 	uid_t			i_uid;
 	gid_t			i_gid;
 	dev_t			i_rdev;
+	unsigned int		i_blkbits;
 	u64			i_version;
 	loff_t			i_size;
 #ifdef __NEED_I_SIZE_ORDERED
@@ -738,7 +739,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	blkcnt_t		i_blocks;
-	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-- 
cgit v1.2.3


From 4e10ae11317b238609fc3ec9d50a5dee9473e045 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 20 Feb 2010 09:41:30 +0100
Subject: ARM: 5951/1: ARM: fix documentation of the PrimeCell bus

This fixes the filepath encoded in <linux/amba/bus.h> and adds
some documentation as to what this bus really means.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index ab94335b4bb9..6816be6c3f77 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -1,5 +1,9 @@
 /*
- *  linux/include/asm-arm/hardware/amba.h
+ *  linux/include/amba/bus.h
+ *
+ *  This device type deals with ARM PrimeCells and anything else that
+ *  presents a proper CID (0xB105F00D) at the end of the I/O register
+ *  region or that is derived from a PrimeCell.
  *
  *  Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved.
  *
-- 
cgit v1.2.3


From 79da0644a8e0838522828f106e4049639eea6baf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Feb 2010 08:40:43 +0100
Subject: Revert "block: improve queue_should_plug() by looking at IO depths"

This reverts commit fb1e75389bd06fd5987e9cda1b4e0305c782f854.

"Benjamin S." <sbenni@gmx.de> reports that the patch in question
causes a big drop in sequential throughput for him, dropping from
200MB/sec down to only 70MB/sec.

Needs to be investigated more fully, for now lets just revert the
offending commit.

Conflicts:

	include/linux/blkdev.h

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 11 ++---------
 include/linux/blkdev.h |  4 +---
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..d1a9a0a64f95 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-	return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
+	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1859,15 +1859,8 @@ void blk_dequeue_request(struct request *rq)
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq)) {
+	if (blk_account_rq(rq))
 		q->in_flight[rq_is_sync(rq)]++;
-		/*
-		 * Mark this device as supporting hardware queuing, if
-		 * we have more IOs in flight than 4.
-		 */
-		if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
-			set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
-	}
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c8018977efa..1896e868854f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -461,8 +461,7 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
-#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
+#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -586,7 +585,6 @@ enum {
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
-#define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-- 
cgit v1.2.3